openvinotoolkit · wangleis · Jan 15, 2025 · Oct 15, 2024 · Oct 28, 2024 · Oct 29, 2024
diff --git a/src/plugins/auto/src/auto_schedule.cpp b/src/plugins/auto/src/auto_schedule.cpp
@@ -212,10 +212,12 @@ void AutoSchedule::init() {
             // initialize containers before run async task
             m_idle_worker_requests[device.device_name];
             m_worker_requests[device.device_name];
+            m_worker_requests_conds[device.device_name];
             m_infer_pipeline_tasks_device_specific[device.device_name] = nullptr;
         }
         m_idle_worker_requests["CPU_HELP"];
         m_worker_requests["CPU_HELP"];
+        m_worker_requests_conds["CPU_HELP"];
         m_infer_pipeline_tasks_device_specific["CPU_HELP"] = nullptr;
         m_executor->run(m_compile_context[CPU].m_task);
         m_executor->run(m_compile_context[ACTUALDEVICE].m_task);
@@ -488,7 +490,11 @@ bool AutoSchedule::schedule_to_worker_infer_request(ov::threading::Task pipeline
         if (!preferred_device.empty() && (device.device_name != preferred_device)) {
             continue;
         }
-        if (run_pipeline_task(pipeline_task, m_idle_worker_requests[device.device_name], preferred_device)) {
+        if (run_pipeline_task(pipeline_task,
+                              m_idle_worker_requests[device.device_name],
+                              preferred_device,
+                              m_worker_requests_conds[device.device_name],
+                              m_worker_infer_mutex)) {
             return true;
         }
     }

diff --git a/src/plugins/auto/src/cumulative_schedule.cpp b/src/plugins/auto/src/cumulative_schedule.cpp
@@ -148,6 +148,7 @@ void CumuSchedule::init() {
         // initialize containers before run async task, if not initialized, it will hang during infer
         m_idle_worker_requests[device.device_name];
         m_worker_requests[device.device_name];
+        m_worker_requests_conds[device.device_name];
         m_infer_pipeline_tasks_device_specific[device.device_name] = nullptr;
     }
     // load devices other than CPU first
@@ -247,7 +248,11 @@ bool CumuSchedule::schedule_to_worker_infer_request(ov::threading::Task pipeline
         }
         auto selected_device_name =
             preferred_device.empty() ? schedule_to_next_device(devices, current_device_index) : preferred_device;
-        if (run_pipeline_task(pipeline_task, m_idle_worker_requests[selected_device_name], preferred_device)) {
+        if (run_pipeline_task(pipeline_task,
+                              m_idle_worker_requests[selected_device_name],
+                              preferred_device,
+                              m_worker_requests_conds[selected_device_name],
+                              m_worker_infer_mutex)) {
             return true;
         } else {
             current_device_index++;

@@ -52,11 +52,21 @@ void Schedule::run(ov::threading::Task pipeline_task) {
 }
 
 bool Schedule::run_pipeline_task(ov::threading::Task& pipeline_task,
-    NotBusyPriorityWorkerRequests& idle_workerrequests,
-    const DeviceName& preferred_device) {
+                                 NotBusyPriorityWorkerRequests& idle_workerrequests,
+                                 const DeviceName& preferred_device,
+                                 std::condition_variable& idle_workerrequests_cv,
+                                 std::mutex& worker_infer_mutex) {
     WorkerInferRequest* worker_request_ptr = nullptr;
     std::pair<int, WorkerInferRequest*> worker;
-    if (idle_workerrequests.try_pop(worker)) {
+    {
+        std::unique_lock<std::mutex> lck(worker_infer_mutex);
+        if (!idle_workerrequests.try_pop(worker)) {
+            idle_workerrequests_cv.wait(lck, [&idle_workerrequests, &worker] {
+                return idle_workerrequests.try_pop(worker);
+            });
+        }
+    }
+    if (worker.second) {
         worker_request_ptr = worker.second;
         IdleGuard<NotBusyPriorityWorkerRequests> idle_guard{worker_request_ptr, idle_workerrequests};
         m_this_worker_infer_request = worker_request_ptr;
@@ -85,10 +95,13 @@ void Schedule::generate_workers(const std::string& device, const SoCompiledModel
         OPENVINO_THROW("Every device used with AUTO should support query optimal_number_of_infer_requests property from compiled model ",
                     iie.what());
     }
-    const auto num_requests = (m_context->m_device_priorities.end() == it_numrequests ||
-                              it_numrequests->num_requests_per_devices == -1) ? optimal_num : it_numrequests->num_requests_per_devices;
+    auto num_requests =
+        (m_context->m_device_priorities.end() == it_numrequests || it_numrequests->num_requests_per_devices == -1)
+            ? optimal_num
+            : it_numrequests->num_requests_per_devices;
     auto& worker_requests = m_worker_requests[device];
     auto& idle_worker_requests = m_idle_worker_requests[device];
+    auto& worker_requests_cv = m_worker_requests_conds[device];
     worker_requests.resize(num_requests);
     m_infer_pipeline_tasks_device_specific[device] = std::unique_ptr<TaskQueue>(new TaskQueue);
     auto* idle_workerrequests_ptr = &(idle_worker_requests);
@@ -98,9 +111,11 @@ void Schedule::generate_workers(const std::string& device, const SoCompiledModel
         worker_request.m_inferrequest = {compiled_model->create_infer_request(), compiled_model._so};
         auto* worker_request_ptr = &worker_request;
         worker_request_ptr->m_index = num++;
-        OPENVINO_ASSERT(idle_worker_requests.try_push(std::make_pair(worker_request_ptr->m_index, worker_request_ptr)) == true);
+        OPENVINO_ASSERT(
+            idle_worker_requests.try_push(std::make_pair(worker_request_ptr->m_index, worker_request_ptr)) == true);
         worker_request.m_inferrequest->set_callback(
-            [worker_request_ptr, this, device, idle_workerrequests_ptr](std::exception_ptr exception_ptr) mutable {
+            [worker_request_ptr, this, device, idle_workerrequests_ptr, &worker_requests_cv](
+                std::exception_ptr exception_ptr) mutable {
                 IdleGuard<NotBusyPriorityWorkerRequests> idleGuard{worker_request_ptr, *idle_workerrequests_ptr};
                 worker_request_ptr->m_exception_ptr = std::move(exception_ptr);
                 {
@@ -128,17 +143,20 @@ void Schedule::generate_workers(const std::string& device, const SoCompiledModel
                     } else {
                         stop_retry_and_continue();
                     }
-                    // try to return the request to the idle list (fails if the overall object destruction has began)
-                    if (idleGuard.release()->try_push(std::make_pair(worker_request_ptr->m_index, worker_request_ptr))) {
-                        // let's try to pop a task, as we know there is at least one idle request, schedule if succeeded
-                        // if no device-agnostic tasks, let's try pop the device specific task, schedule if succeeded
+                    std::unique_lock<std::mutex> lck(m_worker_infer_mutex);
+                    if (idleGuard.release()->try_push(
+                            std::make_pair(worker_request_ptr->m_index, worker_request_ptr))) {
+                        // let's try to pop a task, as we know there is at least one idle request, schedule if
+                        // succeeded if no device-agnostic tasks, let's try pop the device specific task, schedule
+                        // if succeeded
                         ov::threading::Task t;
                         do {
                             m_infer_pipeline_tasks.try_pop(t);
                         } while (t && schedule_to_worker_infer_request(std::move(t)));
                         do {
                             m_infer_pipeline_tasks_device_specific[device]->try_pop(t);
                         } while (t && schedule_to_worker_infer_request(std::move(t), device));
+                        worker_requests_cv.notify_all();
                     }
                 }
             });

diff --git a/src/plugins/auto/src/schedule.hpp b/src/plugins/auto/src/schedule.hpp
@@ -29,8 +29,11 @@ class Schedule : public std::enable_shared_from_this<Schedule>, public ov::threa
 
 protected:
     virtual void init() = 0;
-    static bool run_pipeline_task(ov::threading::Task& pipeline_task, NotBusyPriorityWorkerRequests& idle_worker_request,
-                                  const DeviceName& preferred_device);
+    static bool run_pipeline_task(ov::threading::Task& pipeline_task,
+                                  NotBusyPriorityWorkerRequests& idle_worker_request,
+                                  const DeviceName& preferred_device,
+                                  std::condition_variable& idle_worker_request_cv,
+                                  std::mutex& mutex);
     virtual void generate_workers(const std::string& device, const SoCompiledModel& compiled_model);
     virtual void try_to_compile_model(AutoCompileContext& context, const std::shared_ptr<ov::Model>& model) = 0;
     virtual bool schedule_to_worker_infer_request(ov::threading::Task, DeviceName preferred_device = "") = 0;
@@ -40,6 +43,7 @@ class Schedule : public std::enable_shared_from_this<Schedule>, public ov::threa
     std::shared_ptr<ov::threading::IStreamsExecutor>                     m_executor;
     DeviceMap<NotBusyPriorityWorkerRequests>                             m_idle_worker_requests;
     DeviceMap<std::vector<WorkerInferRequest>>                           m_worker_requests;
+    DeviceMap<std::condition_variable>                                   m_worker_requests_conds;
     TaskQueue                                                            m_infer_pipeline_tasks;
     DeviceMap<std::unique_ptr<TaskQueue>>                                m_infer_pipeline_tasks_device_specific;
     SoCompiledModel                                                      m_passthrough_compiled_model;
@@ -50,6 +54,7 @@ class Schedule : public std::enable_shared_from_this<Schedule>, public ov::threa
     mutable std::atomic<std::size_t>                                     m_request_id = {0};
     std::mutex                                                           m_dev_infer_mutex;
     std::unordered_map<IASyncInferPtr, WorkerInferRequest*>              m_dev_infer;
+    std::mutex                                                           m_worker_infer_mutex;
 };
 
 }  // namespace auto_plugin