From 9388fd0ff87a58b3694fdc6044e36661b083591b Mon Sep 17 00:00:00 2001 From: Kamil Piszczek Date: Mon, 29 Apr 2024 15:01:06 +0200 Subject: [PATCH] [nrf fromlist] ipc: ipc_service: icbmsg backend: workaround endpoint binding deadlock This change works around the issue with the semaphore timeout during the Bluetooth HCI driver initialization when the bt_enable function is called in the context of the System Workqueue thread. This issue only affects platform that use the IPC service and its ICBMsg backend (e.g. the nRF54H20 DK target). The bt_enable function, when called in the System Workqueue context, results in a deadlock, as the waiting semaphore of the Bluetooth HCI driver times out: bt_hci_driver: Endpoint binding failed with -11 During the Bluetooth HCI driver open operation in the context of the bt_enable function, the driver code waits using the semaphore for the endpoint binding process of the IPC service module to finalize. The issue occurs when the waiting occurs in the System Workqueue context. The ICBMsg backend from the IPC service schedules a system work during the endpoint registration, in which it finalizes the binding operation - also in the System Workqueue context. As the Bluetooth HCI driver with its wait operation keeps the System Workqueue context busy, the endpoint binding cannot be completed by the ICBMsg backend before the HCI driver semaphore timeout. Upstream PR: https://github.com/zephyrproject-rtos/zephyr/pull/72377 Signed-off-by: Kamil Piszczek --- subsys/ipc/ipc_service/backends/ipc_icbmsg.c | 22 +++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/subsys/ipc/ipc_service/backends/ipc_icbmsg.c b/subsys/ipc/ipc_service/backends/ipc_icbmsg.c index c7cb5f8ea34..c7127888b18 100644 --- a/subsys/ipc/ipc_service/backends/ipc_icbmsg.c +++ b/subsys/ipc/ipc_service/backends/ipc_icbmsg.c @@ -110,6 +110,12 @@ LOG_MODULE_REGISTER(ipc_icbmsg, /** Registered endpoints count mask in flags. */ #define FLAG_EPT_COUNT_MASK 0xFFFF +/** Workqueue stack size for bounding processing (this configuration is not optimized). */ +#define EP_BOUND_WORK_Q_STACK_SIZE (512U) + +/** Workqueue priority for bounding processing. */ +#define EP_BOUND_WORK_Q_PRIORITY (CONFIG_SYSTEM_WORKQUEUE_PRIORITY) + enum msg_type { MSG_DATA = 0, /* Data message. */ MSG_RELEASE_DATA, /* Release data buffer message. */ @@ -194,6 +200,9 @@ struct control_message { BUILD_ASSERT(NUM_EPT <= EPT_ADDR_INVALID, "Too many endpoints"); +/* Work queue for bounding processing. */ +static struct k_work_q ep_bound_work_q; + /** * Calculate pointer to block from its index and channel configuration (RX or TX). * No validation is performed. @@ -672,7 +681,7 @@ static int send_bound_message(struct backend_data *dev_data, struct ept_data *ep */ static void schedule_ept_bound_process(struct backend_data *dev_data) { - k_work_submit(&dev_data->ep_bound_work); + k_work_submit_to_queue(&ep_bound_work_q, &dev_data->ep_bound_work); } /** @@ -1117,6 +1126,17 @@ static int backend_init(const struct device *instance) { const struct icbmsg_config *conf = instance->config; struct backend_data *dev_data = instance->data; + static K_THREAD_STACK_DEFINE(ep_bound_work_q_stack, EP_BOUND_WORK_Q_STACK_SIZE); + static bool is_work_q_started; + + if (!is_work_q_started) { + k_work_queue_init(&ep_bound_work_q); + k_work_queue_start(&ep_bound_work_q, ep_bound_work_q_stack, + K_THREAD_STACK_SIZEOF(ep_bound_work_q_stack), + EP_BOUND_WORK_Q_PRIORITY, NULL); + + is_work_q_started = true; + } dev_data->conf = conf; dev_data->is_initiator = (conf->rx.blocks_ptr < conf->tx.blocks_ptr);