From a0911ca77812776ef39d47bb0cfe0d5fee3de577 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Wed, 28 Apr 2021 12:55:56 -0500 Subject: [PATCH] station: Make sure roam_scan_id is always canceled Under very rare circumstances the roaming scan triggered might not be canceled properly. This is because we issue the roam scan recursively from within a scan callback and re-use the id of the scan for the subsequent request. The destroy callback is invoked right after the callback and resets the id. This leads to the scan not being canceled properly in roam_state_clear(). src/netdev.c:netdev_mlme_notify() MLME notification Notify CQM(64) src/station.c:station_roam_trigger_cb() 37 src/station.c:station_roam_scan() ifindex: 37 src/station.c:station_roam_trigger_cb() Using cached neighbor report for roam ... src/scan.c:get_scan_done() get_scan_done src/station.c:station_roam_failed() 37 src/station.c:station_roam_scan() ifindex: 37 src/scan.c:scan_request_triggered() Active scan triggered for wdev 22 ^CTerminate src/netdev.c:netdev_free() Freeing netdev wlan0[37] src/device.c:device_free() src/station.c:station_free() ... Removing scan context for wdev 22 src/scan.c:scan_context_free() sc: 0x4a362a0 src/wiphy.c:wiphy_radio_work_done() Work item 14 done ==19542== Invalid write of size 4 ==19542== at 0x411500: station_roam_scan_destroy (station.c:2010) ==19542== by 0x420B5B: scan_request_free (scan.c:156) ==19542== by 0x410BAC: destroy_work (wiphy.c:294) ==19542== by 0x410BAC: wiphy_radio_work_done (wiphy.c:1613) ==19542== by 0x46C66E: l_queue_clear (queue.c:107) ==19542== by 0x46C6B8: l_queue_destroy (queue.c:82) ==19542== by 0x420BAE: scan_context_free (scan.c:205) ==19542== by 0x424135: scan_wdev_remove (scan.c:2272) ==19542== by 0x408754: netdev_free (netdev.c:847) ==19542== by 0x40E18C: netdev_shutdown (netdev.c:5773) ==19542== by 0x404756: iwd_shutdown (main.c:78) ==19542== by 0x404756: iwd_shutdown (main.c:65) ==19542== by 0x470E21: handle_callback (signal.c:78) ==19542== by 0x470E21: signalfd_read_cb (signal.c:104) ==19542== by 0x47166B: io_callback (io.c:120) ==19542== Address 0x4d81f98 is 200 bytes inside a block of size 288 free'd ==19542== at 0x48399CB: free (vg_replace_malloc.c:538) ==19542== by 0x47F3E5: interface_instance_free (dbus-service.c:510) ==19542== by 0x481DEA: _dbus_object_tree_remove_interface (dbus-service.c:1694) ==19542== by 0x481F1C: _dbus_object_tree_object_destroy (dbus-service.c:795) ==19542== by 0x40894F: netdev_free (netdev.c:844) ==19542== by 0x40E18C: netdev_shutdown (netdev.c:5773) ==19542== by 0x404756: iwd_shutdown (main.c:78) ==19542== by 0x404756: iwd_shutdown (main.c:65) ==19542== by 0x470E21: handle_callback (signal.c:78) ==19542== by 0x470E21: signalfd_read_cb (signal.c:104) ==19542== by 0x47166B: io_callback (io.c:120) ==19542== by 0x47088C: l_main_iterate (main.c:478) ==19542== by 0x47095B: l_main_run (main.c:525) ==19542== by 0x47095B: l_main_run (main.c:507) ==19542== by 0x470B6B: l_main_run_with_signal (main.c:647) ==19542== Block was alloc'd at ==19542== at 0x483879F: malloc (vg_replace_malloc.c:307) ==19542== by 0x46AB2D: l_malloc (util.c:62) ==19542== by 0x416599: station_create (station.c:3448) ==19542== by 0x406D55: netdev_newlink_notify (netdev.c:5324) ==19542== by 0x46D4BC: l_hashmap_foreach (hashmap.c:612) ==19542== by 0x472F46: process_broadcast (netlink.c:158) ==19542== by 0x472F46: can_read_data (netlink.c:279) ==19542== by 0x47166B: io_callback (io.c:120) ==19542== by 0x47088C: l_main_iterate (main.c:478) ==19542== by 0x47095B: l_main_run (main.c:525) ==19542== by 0x47095B: l_main_run (main.c:507) ==19542== by 0x470B6B: l_main_run_with_signal (main.c:647) ==19542== by 0x403EDB: main (main.c:490) ==19542== --- src/station.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/station.c b/src/station.c index 9c2b4e64..617a9fc0 100644 --- a/src/station.c +++ b/src/station.c @@ -1605,9 +1605,18 @@ static void station_roam_failed(struct station *station) * If we tried a limited scan, failed and the signal is still low, * repeat with a full scan right away */ - if (station->signal_low && !station->roam_scan_full && - !station_roam_scan(station, NULL)) - return; + if (station->signal_low && !station->roam_scan_full) { + /* + * Since we're re-using roam_scan_id, explicitly cancel + * the scan here, so that the destroy callback is not called + * after the return of this function + */ + scan_cancel(netdev_get_wdev_id(station->netdev), + station->roam_scan_id); + + if (!station_roam_scan(station, NULL)) + return; + } delayed_retry: /*