From 09d32f1f2b37e13f0d56e025ccecddf1fa9db76d Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Fri, 28 Jun 2024 01:47:55 -0700 Subject: [PATCH] Fix build and C++ tests for FreeBSD (#10480) --- .github/workflows/freebsd.yml | 33 +++++++++++++++++++++++++ include/xgboost/collective/poll_utils.h | 4 ++- src/c_api/coll_c_api.cc | 21 +++++++++++++--- src/collective/socket.cc | 4 ++- tests/cpp/collective/test_worker.h | 2 +- tests/cpp/common/test_random.cc | 10 ++++++-- tests/cpp/test_cache.cc | 6 ++++- tests/cpp/test_learner.cc | 10 ++++++-- 8 files changed, 79 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/freebsd.yml diff --git a/.github/workflows/freebsd.yml b/.github/workflows/freebsd.yml new file mode 100644 index 000000000000..c56395964699 --- /dev/null +++ b/.github/workflows/freebsd.yml @@ -0,0 +1,33 @@ +name: FreeBSD + +on: [push, pull_request] + +permissions: + contents: read # to fetch code (actions/checkout) + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ubuntu-latest + name: A job to run test in FreeBSD + steps: + - uses: actions/checkout@v4 + with: + submodules: 'true' + - name: Test in FreeBSD + id: test + uses: vmactions/freebsd-vm@v1 + with: + usesh: true + prepare: | + pkg install -y cmake git ninja googletest + + run: | + mkdir build + cd build + cmake .. -GNinja -DGOOGLE_TEST=ON + ninja -v + ./testxgboost diff --git a/include/xgboost/collective/poll_utils.h b/include/xgboost/collective/poll_utils.h index 514e0a5c6633..a4d2fbacda27 100644 --- a/include/xgboost/collective/poll_utils.h +++ b/include/xgboost/collective/poll_utils.h @@ -77,6 +77,8 @@ namespace utils { template int PollImpl(PollFD* pfd, int nfds, std::chrono::seconds timeout) noexcept(true) { + // For Windows and Linux, negative timeout means infinite timeout. For freebsd, + // INFTIM(-1) should be used instead. #if defined(_WIN32) #if IS_MINGW() @@ -87,7 +89,7 @@ int PollImpl(PollFD* pfd, int nfds, std::chrono::seconds timeout) noexcept(true) #endif // IS_MINGW() #else - return poll(pfd, nfds, std::chrono::milliseconds(timeout).count()); + return poll(pfd, nfds, timeout.count() < 0 ? -1 : std::chrono::milliseconds(timeout).count()); #endif // IS_MINGW() } diff --git a/src/c_api/coll_c_api.cc b/src/c_api/coll_c_api.cc index 1da22610367b..2cace7930e19 100644 --- a/src/c_api/coll_c_api.cc +++ b/src/c_api/coll_c_api.cc @@ -75,8 +75,11 @@ using CollAPIThreadLocalStore = dmlc::ThreadLocalStore; void WaitImpl(TrackerHandleT *ptr, std::chrono::seconds timeout) { constexpr std::int64_t kDft{collective::DefaultTimeoutSec()}; - std::chrono::seconds wait_for{collective::HasTimeout(timeout) ? std::min(kDft, timeout.count()) - : kDft}; + std::int64_t timeout_clipped = kDft; + if (collective::HasTimeout(timeout)) { + timeout_clipped = std::min(kDft, static_cast(timeout.count())); + } + std::chrono::seconds wait_for{timeout_clipped}; common::Timer timer; timer.Start(); @@ -171,7 +174,19 @@ XGB_DLL int XGTrackerFree(TrackerHandle handle) { common::Timer timer; timer.Start(); // Make sure no one else is waiting on the tracker. - while (!ptr->first.unique()) { + + // Quote from https://en.cppreference.com/w/cpp/memory/shared_ptr/use_count#Notes: + // + // In multithreaded environment, `use_count() == 1` does not imply that the object is + // safe to modify because accesses to the managed object by former shared owners may not + // have completed, and because new shared owners may be introduced concurrently. + // + // - We don't have the first case since we never access the raw pointer. + // + // - We don't hve the second case for most of the scenarios since tracker is an unique + // object, if the free function is called before another function calls, it's likely + // to be a bug in the user code. The use_count should only decrease in this function. + while (ptr->first.use_count() != 1) { auto ela = timer.Duration().count(); if (collective::HasTimeout(ptr->first->Timeout()) && ela > ptr->first->Timeout().count()) { LOG(WARNING) << "Time out " << ptr->first->Timeout().count() diff --git a/src/collective/socket.cc b/src/collective/socket.cc index 38e727caf836..21f4abfcca7b 100644 --- a/src/collective/socket.cc +++ b/src/collective/socket.cc @@ -22,10 +22,12 @@ namespace xgboost::collective { SockAddress MakeSockAddress(StringView host, in_port_t port) { struct addrinfo hints; std::memset(&hints, 0, sizeof(hints)); - hints.ai_protocol = SOCK_STREAM; + hints.ai_socktype = SOCK_STREAM; struct addrinfo *res = nullptr; int sig = getaddrinfo(host.c_str(), nullptr, &hints, &res); if (sig != 0) { + LOG(FATAL) << "Failed to get addr info for: " << host + << ", error: " << gai_strerror(sig); return {}; } if (res->ai_family == static_cast(SockDomain::kV4)) { diff --git a/tests/cpp/collective/test_worker.h b/tests/cpp/collective/test_worker.h index 230c3796d860..66c6ce9bf24e 100644 --- a/tests/cpp/collective/test_worker.h +++ b/tests/cpp/collective/test_worker.h @@ -105,7 +105,7 @@ inline Json MakeTrackerConfig(std::string host, std::int32_t n_workers, config["port"] = Integer{0}; config["n_workers"] = Integer{n_workers}; config["sortby"] = Integer{static_cast(Tracker::SortBy::kHost)}; - config["timeout"] = timeout.count(); + config["timeout"] = static_cast(timeout.count()); return config; } diff --git a/tests/cpp/common/test_random.cc b/tests/cpp/common/test_random.cc index a517764754c5..45c20e4030f7 100644 --- a/tests/cpp/common/test_random.cc +++ b/tests/cpp/common/test_random.cc @@ -68,14 +68,20 @@ TEST(ColumnSampler, GPUTest) { // Test if different threads using the same seed produce the same result TEST(ColumnSampler, ThreadSynchronisation) { Context ctx; - const int64_t num_threads = 100; + // NOLINTBEGIN(clang-analyzer-deadcode.DeadStores) +#if defined(__linux__) + std::int64_t const n_threads = std::thread::hardware_concurrency() * 128; +#else + std::int64_t const n_threads = std::thread::hardware_concurrency(); +#endif + // NOLINTEND(clang-analyzer-deadcode.DeadStores) int n = 128; size_t iterations = 10; size_t levels = 5; std::vector reference_result; std::vector feature_weights; bool success = true; // Cannot use google test asserts in multithreaded region -#pragma omp parallel num_threads(num_threads) +#pragma omp parallel num_threads(n_threads) { for (auto j = 0ull; j < iterations; j++) { ColumnSampler cs(j); diff --git a/tests/cpp/test_cache.cc b/tests/cpp/test_cache.cc index 351730181f9a..cd4b28b0536b 100644 --- a/tests/cpp/test_cache.cc +++ b/tests/cpp/test_cache.cc @@ -59,7 +59,11 @@ TEST(DMatrixCache, MultiThread) { std::size_t constexpr kRows = 2, kCols = 1, kCacheSize = 3; auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); - auto n = std::thread::hardware_concurrency() * 128u; +#if defined(__linux__) + auto const n = std::thread::hardware_concurrency() * 128; +#else + auto const n = std::thread::hardware_concurrency(); +#endif CHECK_NE(n, 0); std::vector> results(n); diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc index 976ae2147a06..c25f684a42d1 100644 --- a/tests/cpp/test_learner.cc +++ b/tests/cpp/test_learner.cc @@ -267,8 +267,14 @@ TEST(Learner, MultiThreadedPredict) { learner->Configure(); std::vector threads; - for (uint32_t thread_id = 0; - thread_id < 2 * std::thread::hardware_concurrency(); ++thread_id) { + +#if defined(__linux__) + auto n_threads = std::thread::hardware_concurrency() * 4u; +#else + auto n_threads = std::thread::hardware_concurrency(); +#endif + + for (decltype(n_threads) thread_id = 0; thread_id < n_threads; ++thread_id) { threads.emplace_back([learner, p_data] { size_t constexpr kIters = 10; auto &entry = learner->GetThreadLocal().prediction_entry;