From 35225d81bd1605ddca32c75e35960498f91a1e7e Mon Sep 17 00:00:00 2001 From: Kasper Lund Date: Fri, 22 Sep 2023 11:10:37 +0200 Subject: [PATCH] Fix deadlock in SystemEvent handling (#1826) --- src/event_sources/system_esp32.cc | 21 ++++++- src/resources/wifi_esp32.cc | 97 ++----------------------------- src/scheduler.cc | 29 --------- system/containers.toit | 7 +-- 4 files changed, 26 insertions(+), 128 deletions(-) diff --git a/src/event_sources/system_esp32.cc b/src/event_sources/system_esp32.cc index d60835f2d..3add64eea 100644 --- a/src/event_sources/system_esp32.cc +++ b/src/event_sources/system_esp32.cc @@ -63,12 +63,29 @@ void SystemEventSource::run(const std::function& func) { void SystemEventSource::on_register_resource(Locker& locker, Resource* resource) { SystemResource* system_resource = static_cast(resource); - FATAL_IF_NOT_ESP_OK(esp_event_handler_register(system_resource->event_base(), system_resource->event_id(), on_event, this)); + esp_event_base_t base = system_resource->event_base(); + int32_t id = system_resource->event_id(); + { // The call to register the event handler must be done + // without holding the lock, because registering might + // be forced to wait until any ongoing event handling + // is done. If the event handling itself is blocked on + // the mutex in SystemEventSource::on_event, then we + // would get stuck here if we do not release the lock. + Unlocker unlock(locker); + FATAL_IF_NOT_ESP_OK(esp_event_handler_register(base, id, on_event, this)); + } } void SystemEventSource::on_unregister_resource(Locker& locker, Resource* resource) { SystemResource* system_resource = static_cast(resource); - FATAL_IF_NOT_ESP_OK(esp_event_handler_unregister(system_resource->event_base(), system_resource->event_id(), on_event)); + esp_event_base_t base = system_resource->event_base(); + int32_t id = system_resource->event_id(); + { // The call to unregister the event handler must be done + // without holding the lock. See comment for the equivalent + // situation in SystemEventSource::on_register_resource. + Unlocker unlock(locker); + FATAL_IF_NOT_ESP_OK(esp_event_handler_unregister(base, id, on_event)); + } } void SystemEventSource::on_event(esp_event_base_t base, int32_t id, void* event_data) { diff --git a/src/resources/wifi_esp32.cc b/src/resources/wifi_esp32.cc index f5a6f99cf..56cf2b90d 100644 --- a/src/resources/wifi_esp32.cc +++ b/src/resources/wifi_esp32.cc @@ -43,24 +43,6 @@ enum { WIFI_SCAN_DONE = 1 << 5, }; -class LogMeTender { - public: - LogMeTender(const char* message) : message_(message) { -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] %s - enter\n", message); -#endif - } - -#if CONFIG_WPA_DEBUG_PRINT - ~LogMeTender() { - printf("[wifi] %s - leave\n", message_); - } -#endif - - private: - const char* message_; -}; - const int kInvalidWifi = -1; // Only allow one instance of WiFi running. @@ -172,23 +154,9 @@ class WifiResourceGroup : public ResourceGroup { } ~WifiResourceGroup() { -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] ~WifiResourceGroup()\n"); -#endif - -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] esp_wifi_deinit()\n"); -#endif - esp_err_t err = esp_wifi_deinit(); -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] esp_wifi_deinit() -> done: %d\n", err); -#endif - FATAL_IF_NOT_ESP_OK(err); + FATAL_IF_NOT_ESP_OK(esp_wifi_deinit()); esp_netif_destroy_default_wifi(netif_); wifi_pool.put(id_); -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] ~WifiResourceGroup() -> done\n"); -#endif } uint32_t on_event(Resource* resource, word data, uint32_t state) override; @@ -231,45 +199,20 @@ class WifiEvents : public SystemResource { , state_(STOPPED) {} ~WifiEvents() { -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] ~WifiEvents()\n"); -#endif State state = this->state(); if (state >= CONNECTED) { -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] esp_wifi_disconnect\n"); -#endif - esp_err_t err = esp_wifi_disconnect(); -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] esp_wifi_disconnect() -> done: %d\n", err); -#endif - FATAL_IF_NOT_ESP_OK(err); + FATAL_IF_NOT_ESP_OK(esp_wifi_disconnect()); } if (state >= STARTED) { -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] esp_wifi_stop()\n"); -#endif - esp_err_t err = esp_wifi_stop(); -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] esp_wifi_stop() -> done: %d\n", err); -#endif - FATAL_IF_NOT_ESP_OK(err); + FATAL_IF_NOT_ESP_OK(esp_wifi_stop()); } -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] ~WifiEvents() -> done\n"); -#endif } uint8 disconnect_reason() const { return disconnect_reason_; } void set_disconnect_reason(uint8 reason) { disconnect_reason_ = reason; } State state() const { return state_; } - void set_state(State state) { -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] state updated: %d->%d\n", state_, state); -#endif - state_ = state; - } + void set_state(State state) { state_ = state; } private: friend class WifiResourceGroup; @@ -285,7 +228,6 @@ class WifiIpEvents : public SystemResource { }; uint32 WifiResourceGroup::on_event_wifi(Resource* resource, word data, uint32 state) { - LogMeTender lmt("on_event_wifi"); SystemEvent* system_event = reinterpret_cast(data); WifiEvents* events = static_cast(resource); @@ -325,14 +267,7 @@ uint32 WifiResourceGroup::on_event_wifi(Resource* resource, word data, uint32 st bool reconnecting = false; if (reconnect && reconnects_remaining_ > 0) { reconnects_remaining_--; - esp_err_t cr = ESP_OK; - { LogMeTender lmt("esp_wifi_connect:STA_DISCONNECTED"); - cr = esp_wifi_connect(); -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] esp_wifi_connect -> %d\n", cr); -#endif - } - reconnecting = cr == ESP_OK; + reconnecting = esp_wifi_connect() == ESP_OK; } // If we're attempting to reconnect, we do not @@ -351,14 +286,7 @@ uint32 WifiResourceGroup::on_event_wifi(Resource* resource, word data, uint32 st // because something is seriously wrong. We let the // higher level code know that we're disconnected and // clean up from there. - esp_err_t cr = ESP_OK; - { LogMeTender lmt("esp_wifi_connect:STA_START"); - cr = esp_wifi_connect(); -#if CONFIG_WPA_DEBUG_PRINT - printf("[wifi] esp_wifi_connect -> %d\n", cr); -#endif - } - if (cr != ESP_OK) { + if (esp_wifi_connect() != ESP_OK) { reconnects_remaining_ = 0; state |= WIFI_DISCONNECTED; } @@ -413,7 +341,6 @@ uint32 WifiResourceGroup::on_event_wifi(Resource* resource, word data, uint32 st } uint32 WifiResourceGroup::on_event_ip(Resource* resource, word data, uint32 state) { - LogMeTender lmt("on_event_ip"); SystemEvent* system_event = reinterpret_cast(data); switch (system_event->id) { @@ -474,7 +401,6 @@ uint32_t WifiResourceGroup::on_event(Resource* resource, word data, uint32_t sta MODULE_IMPLEMENTATION(wifi, MODULE_WIFI) PRIMITIVE(init) { - LogMeTender lmt("init"); ARGS(bool, ap); HeapTagScope scope(ITERATE_CUSTOM_TAGS + WIFI_MALLOC_TAG); @@ -572,7 +498,6 @@ PRIMITIVE(init) { } PRIMITIVE(close) { - LogMeTender lmt("close"); ARGS(WifiResourceGroup, group); group->tear_down(); @@ -581,7 +506,6 @@ PRIMITIVE(close) { } PRIMITIVE(connect) { - LogMeTender lmt("connect"); ARGS(WifiResourceGroup, group, cstring, ssid, cstring, password); HeapTagScope scope(ITERATE_CUSTOM_TAGS + WIFI_MALLOC_TAG); @@ -608,7 +532,6 @@ PRIMITIVE(connect) { } PRIMITIVE(establish) { - LogMeTender lmt("establish"); ARGS(WifiResourceGroup, group, cstring, ssid, cstring, password, bool, broadcast, int, channel); HeapTagScope scope(ITERATE_CUSTOM_TAGS + WIFI_MALLOC_TAG); @@ -635,7 +558,6 @@ PRIMITIVE(establish) { } PRIMITIVE(setup_ip) { - LogMeTender lmt("setup_ip"); ARGS(WifiResourceGroup, group); HeapTagScope scope(ITERATE_CUSTOM_TAGS + WIFI_MALLOC_TAG); @@ -651,7 +573,6 @@ PRIMITIVE(setup_ip) { } PRIMITIVE(disconnect) { - LogMeTender lmt("disconnect"); ARGS(WifiResourceGroup, group, WifiEvents, wifi); group->unregister_resource(wifi); @@ -660,7 +581,6 @@ PRIMITIVE(disconnect) { } PRIMITIVE(disconnect_reason) { - LogMeTender lmt("disconnect_reason"); ARGS(WifiEvents, wifi); switch (wifi->disconnect_reason()) { case WIFI_REASON_ASSOC_EXPIRE: @@ -684,7 +604,6 @@ PRIMITIVE(disconnect_reason) { } PRIMITIVE(get_ip) { - LogMeTender lmt("get_ip"); ARGS(WifiResourceGroup, group, int, index); if (index < 0 || index >= WifiResourceGroup::NUMBER_OF_ADDRESSES) { FAIL(INVALID_ARGUMENT); @@ -702,7 +621,6 @@ PRIMITIVE(get_ip) { } PRIMITIVE(init_scan) { - LogMeTender lmt("init_scan"); ARGS(WifiResourceGroup, group) ByteArray* proxy = process->object_heap()->allocate_proxy(); @@ -724,7 +642,6 @@ PRIMITIVE(init_scan) { } PRIMITIVE(start_scan) { - LogMeTender lmt("start_scan"); ARGS(WifiResourceGroup, group, int, channel, bool, passive, int, period_ms); esp_err_t ret = group->start_scan(passive, channel, period_ms); @@ -736,7 +653,6 @@ PRIMITIVE(start_scan) { } PRIMITIVE(read_scan) { - LogMeTender lmt("read_scan"); ARGS(WifiResourceGroup, group); uint16_t count; @@ -781,7 +697,6 @@ PRIMITIVE(read_scan) { } PRIMITIVE(ap_info) { - LogMeTender lmt("ap_info"); ARGS(WifiResourceGroup, group); wifi_ap_record_t ap_record; diff --git a/src/scheduler.cc b/src/scheduler.cc index 3a82b1a13..9bd73cb94 100644 --- a/src/scheduler.cc +++ b/src/scheduler.cc @@ -172,10 +172,6 @@ Scheduler::ExitState Scheduler::launch_program(Locker& locker, Process* process) } } -#if CONFIG_WPA_DEBUG_PRINT - printf("[scheduler] exit - reason=%d\n", exit_state_.reason); -#endif - if (!has_exit_reason()) { exit_state_.reason = EXIT_DONE; } @@ -186,10 +182,6 @@ Scheduler::ExitState Scheduler::launch_program(Locker& locker, Process* process) delete thread; } -#if CONFIG_WPA_DEBUG_PRINT - printf("[scheduler] exit - threads deleted\n"); -#endif - for (int i = 0; i < NUMBER_OF_READY_QUEUES; i++) { ProcessListFromScheduler& ready_queue = ready_queue_[i]; while (ready_queue.remove_first()) { @@ -208,10 +200,6 @@ Scheduler::ExitState Scheduler::launch_program(Locker& locker, Process* process) delete group; } -#if CONFIG_WPA_DEBUG_PRINT - printf("[scheduler] exit - processes deleted\n"); -#endif - return exit_state_; } @@ -417,11 +405,6 @@ void Scheduler::run(SchedulerThread* scheduler_thread) { run_process(locker, process, scheduler_thread); } -#ifdef CONFIG_WPA_DEBUG_PRINT - printf("[scheduler] stopping scheduler thread (threads=%d->%d)\n", - num_threads_, num_threads_ - 1); -#endif - // Notify potential other thread, that no more processes are left. OS::signal(has_processes_); @@ -739,10 +722,6 @@ void Scheduler::run_process(Locker& locker, Process* process, SchedulerThread* s } case Interpreter::Result::DEEP_SLEEP: { -#if CONFIG_WPA_DEBUG_PRINT - printf("[scheduler] process requested deep sleep (processes=%d, threads=%d)\n", - num_processes_, num_threads_); -#endif ExitState exit(EXIT_DEEP_SLEEP, result.value()); terminate_execution(locker, exit); break; @@ -892,10 +871,6 @@ void Scheduler::terminate_execution(Locker& locker, ExitState exit) { exit_state_ = exit; } -#if CONFIG_WPA_DEBUG_PRINT - printf("[scheduler] terminating - reason=%d\n", exit_state_.reason); -#endif - for (SchedulerThread* thread : threads_) { Process* process = thread->interpreter()->process(); if (process != null) { @@ -903,10 +878,6 @@ void Scheduler::terminate_execution(Locker& locker, ExitState exit) { } } -#if CONFIG_WPA_DEBUG_PRINT - printf("[scheduler] terminating - processes signalled\n"); -#endif - OS::signal(has_processes_); } diff --git a/system/containers.toit b/system/containers.toit index 038d425ea..dba158261 100644 --- a/system/containers.toit +++ b/system/containers.toit @@ -322,12 +322,7 @@ class ContainerManager extends ContainerServiceProvider implements SystemMessage // TODO(kasper): Not so happy with this name. wait-until-done -> int: if containers-by-id_.is-empty: return 0 - if platform != PLATFORM-FREERTOS: return done_.get - while true: - interval := Duration --s=1 - catch --unwind=(: it != DEADLINE-EXCEEDED-ERROR): - with_timeout interval: return done_.get - print_ "[toit] DEBUG: system process is running ($Time.monotonic-us)" + return done_.get on-container-load_ container/Container -> none: containers/Map ::= containers-by-image_.get container.image.id --init=: {:}