From 320041eaf2c8d8228f0763f2e259308858060e15 Mon Sep 17 00:00:00 2001 From: James Prestwood Date: Mon, 30 Oct 2023 06:48:37 -0700 Subject: [PATCH] station: rate limit packet loss roam scans The packet loss handler puts a higher priority on roaming compared to the low signal roam path. This is generally beneficial since this event usually indicates some problem with the BSS and generally is an indicator that a disconnect will follow sometime soon. But by immediately issuing a scan we run the risk of causing many successive scans if more packet loss events arrive following the roam scans (and if no candidates are found). Logs provided further. To help with this handle the first event with priority and immediately issue a roam scan. If another event comes in within a certain timeframe (2 seconds) don't immediately scan, but instead rearm the roam timer instead of issuing a scan. This also handles the case of a low signal roam scan followed by a packet loss event. Delaying the roam will at least provide some time for packets to get out in between roam scans. Logs were snipped to be less verbose, but this cycled happened 5 times prior. In total 7 scans were issued in 5 seconds which may very well have been the reason for the local disconnect: Oct 27 16:23:46 src/station.c:station_roam_failed() 9 Oct 27 16:23:46 src/wiphy.c:wiphy_radio_work_done() Work item 29 done Oct 27 16:23:47 src/netdev.c:netdev_mlme_notify() MLME notification Notify CQM(64) Oct 27 16:23:47 src/station.c:station_packets_lost() Packets lost event: 10 Oct 27 16:23:47 src/station.c:station_roam_scan() ifindex: 9 Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_insert() Inserting work item 30 Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_next() Starting work item 30 Oct 27 16:23:47 src/station.c:station_start_roam() Using cached neighbor report for roam Oct 27 16:23:47 src/scan.c:scan_notify() Scan notification Trigger Scan(33) Oct 27 16:23:47 src/scan.c:scan_request_triggered() Active scan triggered for wdev a Oct 27 16:23:47 src/scan.c:scan_notify() Scan notification New Scan Results(34) Oct 27 16:23:47 src/netdev.c:netdev_link_notify() event 16 on ifindex 9 ... scan results ... Oct 27 16:23:47 src/station.c:station_roam_failed() 9 Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_done() Work item 30 done Oct 27 16:23:47 src/netdev.c:netdev_mlme_notify() MLME notification Notify CQM(64) Oct 27 16:23:47 src/station.c:station_packets_lost() Packets lost event: 10 Oct 27 16:23:47 src/station.c:station_roam_scan() ifindex: 9 Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_insert() Inserting work item 31 Oct 27 16:23:47 src/wiphy.c:wiphy_radio_work_next() Starting work item 31 Oct 27 16:23:47 src/station.c:station_start_roam() Using cached neighbor report for roam Oct 27 16:23:47 src/scan.c:scan_notify() Scan notification Trigger Scan(33) Oct 27 16:23:47 src/scan.c:scan_request_triggered() Active scan triggered for wdev a Oct 27 16:23:48 src/scan.c:scan_notify() Scan notification New Scan Results(34) Oct 27 16:23:48 src/netdev.c:netdev_link_notify() event 16 on ifindex 9 ... scan results ... Oct 27 16:23:48 src/station.c:station_roam_failed() 9 Oct 27 16:23:48 src/wiphy.c:wiphy_radio_work_done() Work item 31 done Oct 27 16:23:48 src/netdev.c:netdev_mlme_notify() MLME notification Notify CQM(64) Oct 27 16:23:48 src/station.c:station_packets_lost() Packets lost event: 10 Oct 27 16:23:48 src/station.c:station_roam_scan() ifindex: 9 Oct 27 16:23:48 src/wiphy.c:wiphy_radio_work_insert() Inserting work item 32 Oct 27 16:23:48 src/wiphy.c:wiphy_radio_work_next() Starting work item 32 Oct 27 16:23:48 src/station.c:station_start_roam() Using cached neighbor report for roam Oct 27 16:23:48 src/scan.c:scan_notify() Scan notification Trigger Scan(33) Oct 27 16:23:48 src/scan.c:scan_request_triggered() Active scan triggered for wdev a Oct 27 16:23:49 src/netdev.c:netdev_link_notify() event 16 on ifindex 9 Oct 27 16:23:49 src/netdev.c:netdev_mlme_notify() MLME notification Del Station(20) Oct 27 16:23:49 src/netdev.c:netdev_mlme_notify() MLME notification Deauthenticate(39) Oct 27 16:23:49 src/netdev.c:netdev_deauthenticate_event() Oct 27 16:23:49 src/netdev.c:netdev_mlme_notify() MLME notification Disconnect(48) Oct 27 16:23:49 src/netdev.c:netdev_disconnect_event() Oct 27 16:23:49 Received Deauthentication event, reason: 4, from_ap: false --- src/station.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/station.c b/src/station.c index 9b224e5a..d4eb0cd8 100644 --- a/src/station.c +++ b/src/station.c @@ -123,6 +123,8 @@ struct station { struct wiphy_radio_work_item ft_work; + uint64_t last_roam_scan; + bool preparing_roam : 1; bool roam_scan_full : 1; bool signal_low : 1; @@ -1708,6 +1710,7 @@ static void station_roam_state_clear(struct station *station) station->signal_low = false; station->roam_min_time.tv_sec = 0; station->netconfig_after_roam = false; + station->last_roam_scan = 0; if (station->roam_scan_id) scan_cancel(netdev_get_wdev_id(station->netdev), @@ -2711,6 +2714,8 @@ static int station_roam_scan(struct station *station, if (L_WARN_ON(scan_freq_set_isempty(params.freqs))) return -ENOTSUP; + station->last_roam_scan = l_time_now(); + station->roam_scan_id = scan_active_full(netdev_get_wdev_id(station->netdev), ¶ms, station_roam_scan_triggered, @@ -3330,10 +3335,13 @@ static void station_disconnect_event(struct station *station, void *event_data) l_warn("Unexpected disconnect event"); } -#define STATION_PKT_LOSS_THRESHOLD 10 +#define STATION_PKT_LOSS_THRESHOLD 10 +#define LOSS_ROAM_RATE_LIMIT 2 static void station_packets_lost(struct station *station, uint32_t num_pkts) { + uint64_t elapsed; + l_debug("Packets lost event: %u", num_pkts); if (num_pkts < STATION_PKT_LOSS_THRESHOLD) @@ -3344,6 +3352,24 @@ static void station_packets_lost(struct station *station, uint32_t num_pkts) station_debug_event(station, "packet-loss-roam"); + elapsed = l_time_diff(station->last_roam_scan, l_time_now()); + + /* + * If we just issued a roam scan, delay the roam to avoid constant + * scanning. + */ + if (LOSS_ROAM_RATE_LIMIT > l_time_to_secs(elapsed)) { + l_debug("Too many roam attempts in %u second timeframe, " + "delaying roam", LOSS_ROAM_RATE_LIMIT); + + if (station->roam_trigger_timeout) + return; + + station_roam_timeout_rearm(station, LOSS_ROAM_RATE_LIMIT); + + return; + } + station_start_roam(station); }