Skip to content

Commit

Permalink
io_setup may return EAGAIN - it should be retried (for a reasonable t…
Browse files Browse the repository at this point in the history
…ime interval) (#1834)

* io_setup may return EAGAIN - it should be retried (for a reasonable time interval)

* increasing retry count for io_setup EAGAIN

* io_setup EAGAIN retries - more logs

* io_setup EAGAIN retries - even more logs
  • Loading branch information
qkrorlqr authored Aug 24, 2024
1 parent 036e38b commit b4c56fc
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 5 deletions.
31 changes: 26 additions & 5 deletions cloud/storage/core/libs/aio/service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <cloud/storage/core/libs/common/file_io_service.h>
#include <cloud/storage/core/libs/common/thread.h>

#include <util/stream/file.h>
#include <util/string/builder.h>
#include <util/system/file.h>
#include <util/system/thread.h>
Expand Down Expand Up @@ -45,12 +46,32 @@ class TAsyncIOContext
io_context* Context = nullptr;

public:
explicit TAsyncIOContext(size_t nr)
explicit TAsyncIOContext(int nr)
{
int ret = io_setup(nr, &Context);
Y_ABORT_UNLESS(ret == 0,
"unable to initialize context: %s",
LastSystemErrorText(-ret));
int code = 0;
int iterations = 0;
const int maxIterations = 1000;
const auto waitTime = TDuration::MilliSeconds(100);
while (iterations < maxIterations) {
++iterations;
code = io_setup(nr, &Context);
if (code == -EAGAIN) {
const auto aioNr =
TIFStream("/proc/sys/fs/aio-nr").ReadLine();
const auto aioMaxNr =
TIFStream("/proc/sys/fs/aio-max-nr").ReadLine();
Cerr << "retrying EAGAIN from io_setup, aio-nr/max: "
<< aioNr << "/" << aioMaxNr << Endl;
Sleep(waitTime);
} else {
break;
}
}

Y_ABORT_UNLESS(code == 0,
"unable to initialize context: %s, iterations: %d",
LastSystemErrorText(-code),
iterations);
}

~TAsyncIOContext()
Expand Down
27 changes: 27 additions & 0 deletions cloud/storage/core/libs/aio/service_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
#include <util/generic/array_ref.h>
#include <util/generic/scope.h>
#include <util/generic/size_literals.h>
#include <util/stream/file.h>
#include <util/system/file.h>
#include <util/thread/factory.h>

namespace NCloud {

Expand Down Expand Up @@ -81,6 +83,31 @@ Y_UNIT_TEST_SUITE(TAioTest)
UNIT_ASSERT_VALUES_EQUAL('X', val);
}
}

Y_UNIT_TEST(ShouldRetryIoSetupErrors)
{
const auto eventCountLimit =
FromString<size_t>(TIFStream("/proc/sys/fs/aio-max-nr").ReadLine());
const auto service1EventCount = eventCountLimit / 2;
auto service1 = CreateAIOService(service1EventCount);
auto promise1 = NThreading::NewPromise<void>();
auto promise2 = NThreading::NewPromise<void>();
SystemThreadFactory()->Run([&] () {
promise1.SetValue();

const auto service2EventCount =
eventCountLimit - service1EventCount + 1;
// should cause EAGAIN from io_setup until service1 is destroyed
auto service2 = CreateAIOService(service2EventCount);
Y_UNUSED(service2);
promise2.SetValue();
});

promise1.GetFuture().GetValueSync();
Sleep(TDuration::Seconds(1));
service1.reset();
promise2.GetFuture().GetValue(TDuration::Seconds(5));
}
}

} // namespace NCloud

0 comments on commit b4c56fc

Please sign in to comment.