From e74dd446fb206c8abd4fae1cdaa53fd068411944 Mon Sep 17 00:00:00 2001 From: James Prestwood Date: Thu, 31 Aug 2023 05:39:23 -0700 Subject: [PATCH] station: fall back to reassociation under certain FT failures The auth/action status is now tracked in ft.c. If an AP rejects the FT attempt with "Invalid PMKID" we can now assume this AP is either mis-configured for FT or is lagging behind getting the proper keys from neighboring APs (e.g. was just rebooted). If we see this condition IWD can now fall back to reassociation in an attempt to still roam to the best candidate. The fallback decision is still rank based: if a BSS fails FT it is marked as such, its ranking is reset removing the FT factor and it is inserted back into the queue. The motivation behind this isn't necessarily to always force a roam, but instead to handle two cases where IWD can either make a bad roam decision or get 'stuck' and never roam: 1. If there is one good roam candidate and other bad ones. For example say BSS A is experiencing this FT key pull issue: Current BSS: -85dbm BSS A: -55dbm BSS B: -80dbm The current logic would fail A, and roam to B. In this case reassociation would have likely succeeded so it makes more sense to reassociate to A as a fallback. 2. If there is only one candidate, but its failing FT. IWD will never try anything other than FT and repeatedly fail. Both of the above have been seen on real network deployments and result in either poor performance (1) or eventually lead to a full disconnect due to never roaming (2). --- src/station.c | 79 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 22 deletions(-) diff --git a/src/station.c b/src/station.c index 2473de2a..9e4ee69a 100644 --- a/src/station.c +++ b/src/station.c @@ -147,15 +147,18 @@ struct roam_bss { uint8_t addr[6]; uint16_t rank; int32_t signal_strength; + bool ft_failed: 1; }; -static struct roam_bss *roam_bss_from_scan_bss(const struct scan_bss *bss) +static struct roam_bss *roam_bss_from_scan_bss(const struct scan_bss *bss, + uint16_t rank) { struct roam_bss *rbss = l_new(struct roam_bss, 1); memcpy(rbss->addr, bss->addr, 6); - rbss->rank = bss->rank; + rbss->rank = rank; rbss->signal_strength = bss->signal_strength; + rbss->ft_failed = false; return rbss; } @@ -2169,10 +2172,14 @@ static int station_transition_reassociate(struct station *station, if (ret < 0) return ret; + l_debug(""); + station->connected_bss = bss; station->preparing_roam = false; station_enter_state(station, STATION_STATE_ROAMING); + station_debug_event(station, "reassoc-roam"); + return 0; } @@ -2264,34 +2271,60 @@ static void station_transition_start(struct station *station); static bool station_ft_work_ready(struct wiphy_radio_work_item *item) { struct station *station = l_container_of(item, struct station, ft_work); - struct roam_bss *rbss = l_queue_pop_head(station->roam_bss_list); - struct scan_bss *bss = network_bss_find_by_addr( - station->connected_network, rbss->addr); + _auto_(l_free) struct roam_bss *rbss = l_queue_pop_head( + station->roam_bss_list); + struct scan_bss *bss; int ret; - l_free(rbss); - /* Very unlikely, but the BSS could have gone away */ + bss = network_bss_find_by_addr(station->connected_network, rbss->addr); if (!bss) goto try_next; ret = ft_associate(netdev_get_ifindex(station->netdev), bss->addr); - if (ret == -ENOENT) { + switch (ret) { + case MMPDU_STATUS_CODE_INVALID_PMKID: + /* + * Re-insert removing FT from the ranking (scan_bss does not + * take into account FT, so we can use that rank directly). + * If the BSS is still the best reassociation will be used, + * otherwise we'll try more FT candidates that are better ranked + */ + rbss->rank = bss->rank; + rbss->ft_failed = true; + + l_debug("Re-inserting BSS "MAC" using reassociation, rank: %u", + MAC_STR(rbss->addr), rbss->rank); + + l_queue_insert(station->roam_bss_list, rbss, + roam_bss_rank_compare, NULL); + + station_debug_event(station, "ft-fallback-to-reassoc"); + + station_transition_start(station); + l_steal_ptr(rbss); + break; + case -ENOENT: station_debug_event(station, "ft-roam-failed"); try_next: station_transition_start(station); - return true; - } else if (ret < 0) - goto assoc_failed; + break; + case 0: + station->connected_bss = bss; + station->preparing_roam = false; + station_enter_state(station, STATION_STATE_FT_ROAMING); - station->connected_bss = bss; - station->preparing_roam = false; - station_enter_state(station, STATION_STATE_FT_ROAMING); + station_debug_event(station, "ft-roam"); - return true; + break; + default: + if (ret > 0) + goto try_next; + + station_roam_failed(station); + break; + } -assoc_failed: - station_roam_failed(station); return true; } @@ -2349,7 +2382,8 @@ done: } static bool station_try_next_transition(struct station *station, - struct scan_bss *bss) + struct scan_bss *bss, + bool no_ft) { struct handshake_state *hs = netdev_get_handshake(station->netdev); struct network *connected = station->connected_network; @@ -2364,7 +2398,7 @@ static bool station_try_next_transition(struct station *station, station->ap_directed_roaming = false; /* Can we use Fast Transition? */ - if (station_can_fast_transition(hs, bss)) + if (station_can_fast_transition(hs, bss) && !no_ft) return station_fast_transition(station, bss); /* Non-FT transition */ @@ -2425,7 +2459,8 @@ static void station_transition_start(struct station *station) struct scan_bss *bss = network_bss_find_by_addr( station->connected_network, rbss->addr); - roaming = station_try_next_transition(station, bss); + roaming = station_try_next_transition(station, bss, + rbss->ft_failed); if (roaming) break; @@ -2581,7 +2616,7 @@ static bool station_roam_scan_notify(int err, struct l_queue *bss_list, */ station_update_roam_bss(station, bss); - rbss = roam_bss_from_scan_bss(bss); + rbss = roam_bss_from_scan_bss(bss, rank); l_queue_insert(station->roam_bss_list, rbss, roam_bss_rank_compare, NULL); @@ -4603,7 +4638,7 @@ static bool station_force_roam_scan_notify(int err, struct l_queue *bss_list, /* The various roam routines expect this to be set from scanning */ station->preparing_roam = true; l_queue_push_tail(station->roam_bss_list, - roam_bss_from_scan_bss(target)); + roam_bss_from_scan_bss(target, target->rank)); station_update_roam_bss(station, target);