Skip to content

Commit

Permalink
[AUTO] Fix the timing issue in AUTO inference (openvinotoolkit#27290)
Browse files Browse the repository at this point in the history
### Details:
 - Added synchronization

### Tickets:
 - 153629

---------

Co-authored-by: Chen Peter <[email protected]>
  • Loading branch information
2 people authored and MirceaDan99 committed Jan 22, 2025
1 parent f252985 commit c9933ff
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 49 deletions.
7 changes: 5 additions & 2 deletions src/plugins/auto/src/schedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,11 @@ void Schedule::generate_workers(const std::string& device, const SoCompiledModel
OPENVINO_THROW("Every device used with AUTO should support query optimal_number_of_infer_requests property from compiled model ",
iie.what());
}
const auto num_requests = (m_context->m_device_priorities.end() == it_numrequests ||
it_numrequests->num_requests_per_devices == -1) ? optimal_num : it_numrequests->num_requests_per_devices;
auto num_requests =
(m_context->m_device_priorities.end() == it_numrequests || it_numrequests->num_requests_per_devices == -1)
? optimal_num
: it_numrequests->num_requests_per_devices;
num_requests = (num_requests == 1) ? 2 : num_requests;
auto& worker_requests = m_worker_requests[device];
auto& idle_worker_requests = m_idle_worker_requests[device];
worker_requests.resize(num_requests);
Expand Down
28 changes: 22 additions & 6 deletions src/plugins/auto/tests/unit/dynamic_output_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

#include "include/auto_unit_test.hpp"
#include "openvino/runtime/threading/immediate_executor.hpp"

using DynamicOutputConfigParams = std::tuple<ov::Any, // priority device list
ov::Any // expected device to run inference on
>;
Expand All @@ -21,14 +20,18 @@ class DynamicOutputInferenceTest : public tests::AutoTest, public ::testing::Tes
mockExecutor.reset();
mockExecutorActual.reset();
mockInferrequest.reset();
mockInferrequest_2.reset();
mockInferrequestActual.reset();
mockInferrequestActual_2.reset();
}

protected:
ov::Any priorityList;
ov::Any targetList;
std::shared_ptr<ov::mock_auto_plugin::MockAsyncInferRequest> mockInferrequest;
std::shared_ptr<ov::mock_auto_plugin::MockAsyncInferRequest> mockInferrequest_2;
std::shared_ptr<ov::mock_auto_plugin::MockAsyncInferRequest> mockInferrequestActual;
std::shared_ptr<ov::mock_auto_plugin::MockAsyncInferRequest> mockInferrequestActual_2;
std::shared_ptr<ov::threading::ImmediateExecutor> mockExecutor;
std::shared_ptr<ov::threading::ImmediateExecutor> mockExecutorActual;
};
Expand All @@ -53,10 +56,22 @@ void DynamicOutputInferenceTest::SetUp() {
mockExecutorActual = std::make_shared<ov::threading::ImmediateExecutor>();
mockInferrequest =
std::make_shared<ov::mock_auto_plugin::MockAsyncInferRequest>(inferReqInternal, mockExecutor, nullptr, false);
// will be at least 2 infer requests for mocked CPU/GPU
auto inferReqInternal_2 = std::make_shared<ov::mock_auto_plugin::MockISyncInferRequest>(mockIExeNet);
mockInferrequest_2 =
std::make_shared<ov::mock_auto_plugin::MockAsyncInferRequest>(inferReqInternal_2, mockExecutor, nullptr, false);

auto inferReqInternalActual_2 = std::make_shared<ov::mock_auto_plugin::MockISyncInferRequest>(mockIExeNetActual);

mockInferrequestActual = std::make_shared<ov::mock_auto_plugin::MockAsyncInferRequest>(inferReqInternalActual,
mockExecutorActual,
nullptr,
false);
mockInferrequestActual_2 = std::make_shared<ov::mock_auto_plugin::MockAsyncInferRequest>(inferReqInternalActual_2,
mockExecutorActual,
nullptr,
false);

std::tie(priorityList, targetList) = GetParam();
auto targets = targetList.as<std::vector<std::string>>();
ON_CALL(*core, get_available_devices()).WillByDefault(Return(targets));
Expand Down Expand Up @@ -103,11 +118,12 @@ TEST_P(DynamicOutputInferenceTest, CanInferWithOutputChangedFromDynamicOnAutoToS
auto tensor = inferReqInternal->get_tensor(it);
tensor->set_shape(ov::Shape{2, 3});
}
ON_CALL(*mockIExeNet.get(), create_infer_request()).WillByDefault(Return(mockInferrequest));
ON_CALL(*mockIExeNetActual.get(), create_infer_request()).WillByDefault(InvokeWithoutArgs([this]() {
std::this_thread::sleep_for(std::chrono::milliseconds(0));
return mockInferrequestActual;
}));
EXPECT_CALL(*mockIExeNet.get(), create_infer_request())
.WillOnce(Return(mockInferrequest))
.WillOnce(Return(mockInferrequest_2));
EXPECT_CALL(*mockIExeNetActual.get(), create_infer_request())
.WillOnce(Return(mockInferrequestActual))
.WillOnce(Return(mockInferrequestActual_2));
config.insert(ov::device::priorities(priorityList.as<std::string>()));
config.insert(ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT));
std::shared_ptr<ov::ICompiledModel> exeNetwork;
Expand Down
14 changes: 9 additions & 5 deletions src/plugins/auto/tests/unit/release_helper_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ TEST_P(AutoReleaseHelperTest, releaseResource) {
bool cpuSuccess;
bool accSuccess;
std::tie(cpuSuccess, accSuccess) = this->GetParam();
size_t decreaseCount = 0;
size_t decreaseExeNetworkCount = 0;
size_t decreaseInferReqCount = 0;
// test auto plugin
plugin->set_device_name("AUTO");
const std::string strDevices = ov::test::utils::DEVICE_GPU + std::string(",") + ov::test::utils::DEVICE_CPU;
Expand Down Expand Up @@ -188,8 +189,11 @@ TEST_P(AutoReleaseHelperTest, releaseResource) {
::testing::Matcher<const std::string&>(StrEq(ov::test::utils::DEVICE_CPU)),
_))
.WillByDefault(Return(mockExeNetwork));
if (accSuccess)
decreaseCount++;
if (accSuccess) {
decreaseExeNetworkCount++;
// will be at least 2 infer requests for mocked CPU/GPU
decreaseInferReqCount += 2;
}
} else {
ON_CALL(*core,
compile_model(::testing::Matcher<const std::shared_ptr<const ov::Model>&>(_),
Expand Down Expand Up @@ -224,8 +228,8 @@ TEST_P(AutoReleaseHelperTest, releaseResource) {
auto sharedcount = mockExeNetwork._ptr.use_count();
auto requestsharedcount = inferReqInternal.use_count();
std::this_thread::sleep_for(std::chrono::milliseconds(500));
EXPECT_EQ(mockExeNetwork._ptr.use_count(), sharedcount - decreaseCount);
EXPECT_EQ(inferReqInternal.use_count(), requestsharedcount - decreaseCount);
EXPECT_EQ(mockExeNetwork._ptr.use_count(), sharedcount - decreaseExeNetworkCount);
EXPECT_EQ(inferReqInternal.use_count(), requestsharedcount - decreaseInferReqCount);
if (cpuSuccess || accSuccess) {
if (accSuccess)
EXPECT_EQ(exeNetwork->get_property(ov::execution_devices.name()).as<std::string>(),
Expand Down
Loading

0 comments on commit c9933ff

Please sign in to comment.