[AUTO] Fix the timing issue in AUTO inference (openvinotoolkit#27290)

### Details: - Added synchronization ### Tickets: - 153629 --------- Co-authored-by: Chen Peter <[email protected]>
MirceaDan99 · Jan 22, 2025 · c9933ff · c9933ff
1 parent f252985
commit c9933ff
Show file tree

Hide file tree

Showing 4 changed files with 174 additions and 49 deletions.
diff --git a/src/plugins/auto/src/schedule.cpp b/src/plugins/auto/src/schedule.cpp
@@ -85,8 +85,11 @@ void Schedule::generate_workers(const std::string& device, const SoCompiledModel
         OPENVINO_THROW("Every device used with AUTO should support query optimal_number_of_infer_requests property from compiled model ",
                     iie.what());
     }
-    const auto num_requests = (m_context->m_device_priorities.end() == it_numrequests ||
-                              it_numrequests->num_requests_per_devices == -1) ? optimal_num : it_numrequests->num_requests_per_devices;
+    auto num_requests =
+        (m_context->m_device_priorities.end() == it_numrequests || it_numrequests->num_requests_per_devices == -1)
+            ? optimal_num
+            : it_numrequests->num_requests_per_devices;
+    num_requests = (num_requests == 1) ? 2 : num_requests;
     auto& worker_requests = m_worker_requests[device];
     auto& idle_worker_requests = m_idle_worker_requests[device];
     worker_requests.resize(num_requests);

diff --git a/src/plugins/auto/tests/unit/dynamic_output_test.cpp b/src/plugins/auto/tests/unit/dynamic_output_test.cpp
@@ -7,7 +7,6 @@
 
 #include "include/auto_unit_test.hpp"
 #include "openvino/runtime/threading/immediate_executor.hpp"
-
 using DynamicOutputConfigParams = std::tuple<ov::Any,  // priority device list
                                              ov::Any   // expected device to run inference on
                                              >;
@@ -21,14 +20,18 @@ class DynamicOutputInferenceTest : public tests::AutoTest, public ::testing::Tes
         mockExecutor.reset();
         mockExecutorActual.reset();
         mockInferrequest.reset();
+        mockInferrequest_2.reset();
         mockInferrequestActual.reset();
+        mockInferrequestActual_2.reset();
     }
 
 protected:
     ov::Any priorityList;
     ov::Any targetList;
     std::shared_ptr<ov::mock_auto_plugin::MockAsyncInferRequest> mockInferrequest;
+    std::shared_ptr<ov::mock_auto_plugin::MockAsyncInferRequest> mockInferrequest_2;
     std::shared_ptr<ov::mock_auto_plugin::MockAsyncInferRequest> mockInferrequestActual;
+    std::shared_ptr<ov::mock_auto_plugin::MockAsyncInferRequest> mockInferrequestActual_2;
     std::shared_ptr<ov::threading::ImmediateExecutor> mockExecutor;
     std::shared_ptr<ov::threading::ImmediateExecutor> mockExecutorActual;
 };
@@ -53,10 +56,22 @@ void DynamicOutputInferenceTest::SetUp() {
     mockExecutorActual = std::make_shared<ov::threading::ImmediateExecutor>();
     mockInferrequest =
         std::make_shared<ov::mock_auto_plugin::MockAsyncInferRequest>(inferReqInternal, mockExecutor, nullptr, false);
+    // will be at least 2 infer requests for mocked CPU/GPU
+    auto inferReqInternal_2 = std::make_shared<ov::mock_auto_plugin::MockISyncInferRequest>(mockIExeNet);
+    mockInferrequest_2 =
+        std::make_shared<ov::mock_auto_plugin::MockAsyncInferRequest>(inferReqInternal_2, mockExecutor, nullptr, false);
+
+    auto inferReqInternalActual_2 = std::make_shared<ov::mock_auto_plugin::MockISyncInferRequest>(mockIExeNetActual);
+
     mockInferrequestActual = std::make_shared<ov::mock_auto_plugin::MockAsyncInferRequest>(inferReqInternalActual,
                                                                                            mockExecutorActual,
                                                                                            nullptr,
                                                                                            false);
+    mockInferrequestActual_2 = std::make_shared<ov::mock_auto_plugin::MockAsyncInferRequest>(inferReqInternalActual_2,
+                                                                                             mockExecutorActual,
+                                                                                             nullptr,
+                                                                                             false);
+
     std::tie(priorityList, targetList) = GetParam();
     auto targets = targetList.as<std::vector<std::string>>();
     ON_CALL(*core, get_available_devices()).WillByDefault(Return(targets));
@@ -103,11 +118,12 @@ TEST_P(DynamicOutputInferenceTest, CanInferWithOutputChangedFromDynamicOnAutoToS
         auto tensor = inferReqInternal->get_tensor(it);
         tensor->set_shape(ov::Shape{2, 3});
     }
-    ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault(Return(mockInferrequest));
-    ON_CALL(*mockIExeNetActual.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() {
-        std::this_thread::sleep_for(std::chrono::milliseconds(0));
-        return mockInferrequestActual;
-    }));
+    EXPECT_CALL(*mockIExeNet.get(), create_infer_request())
+        .WillOnce(Return(mockInferrequest))
+        .WillOnce(Return(mockInferrequest_2));
+    EXPECT_CALL(*mockIExeNetActual.get(), create_infer_request())
+        .WillOnce(Return(mockInferrequestActual))
+        .WillOnce(Return(mockInferrequestActual_2));
     config.insert(ov::device::priorities(priorityList.as<std::string>()));
     config.insert(ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT));
     std::shared_ptr<ov::ICompiledModel> exeNetwork;

diff --git a/src/plugins/auto/tests/unit/release_helper_test.cpp b/src/plugins/auto/tests/unit/release_helper_test.cpp
@@ -157,7 +157,8 @@ TEST_P(AutoReleaseHelperTest, releaseResource) {
     bool cpuSuccess;
     bool accSuccess;
     std::tie(cpuSuccess, accSuccess) = this->GetParam();
-    size_t decreaseCount = 0;
+    size_t decreaseExeNetworkCount = 0;
+    size_t decreaseInferReqCount = 0;
     // test auto plugin
     plugin->set_device_name("AUTO");
     const std::string strDevices = ov::test::utils::DEVICE_GPU + std::string(",") + ov::test::utils::DEVICE_CPU;
@@ -188,8 +189,11 @@ TEST_P(AutoReleaseHelperTest, releaseResource) {
                               ::testing::Matcher<const std::string&>(StrEq(ov::test::utils::DEVICE_CPU)),
                               _))
             .WillByDefault(Return(mockExeNetwork));
-        if (accSuccess)
-            decreaseCount++;
+        if (accSuccess) {
+            decreaseExeNetworkCount++;
+            // will be at least 2 infer requests for mocked CPU/GPU
+            decreaseInferReqCount += 2;
+        }
     } else {
         ON_CALL(*core,
                 compile_model(::testing::Matcher<const std::shared_ptr<const ov::Model>&>(_),
@@ -224,8 +228,8 @@ TEST_P(AutoReleaseHelperTest, releaseResource) {
     auto sharedcount = mockExeNetwork._ptr.use_count();
     auto requestsharedcount = inferReqInternal.use_count();
     std::this_thread::sleep_for(std::chrono::milliseconds(500));
-    EXPECT_EQ(mockExeNetwork._ptr.use_count(), sharedcount - decreaseCount);
-    EXPECT_EQ(inferReqInternal.use_count(), requestsharedcount - decreaseCount);
+    EXPECT_EQ(mockExeNetwork._ptr.use_count(), sharedcount - decreaseExeNetworkCount);
+    EXPECT_EQ(inferReqInternal.use_count(), requestsharedcount - decreaseInferReqCount);
     if (cpuSuccess || accSuccess) {
         if (accSuccess)
             EXPECT_EQ(exeNetwork->get_property(ov::execution_devices.name()).as<std::string>(),