Skip to content

Commit

Permalink
[Breaking] Remove ResumableFileSlot and rely on high ulimits
Browse files Browse the repository at this point in the history
ResumableFileSlot became to difficult to manage, instead of
managing resources this way, we use much higher
DEFAULT_OPEN_FILE_PERMITS, set ulimit (when unix) and warn
user if the limits are likely too low.

Breaking: Removed idle_file_descriptor_timeout_millis from config.

closes: #1288, #513, #1298, #527
  • Loading branch information
allada committed Feb 9, 2025
1 parent 7afe286 commit a47c20e
Show file tree
Hide file tree
Showing 22 changed files with 555 additions and 964 deletions.
12 changes: 11 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@
"--locked"
"--features nix"
"--branch"
"--profile smol"
"--ignore-filename-regex '.*(genproto|vendor-cargo-deps|crates).*'"
];
cargoLlvmCovExtraArgs = "--html --output-dir $out";
Expand Down
15 changes: 0 additions & 15 deletions nativelink-config/src/cas_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -716,21 +716,6 @@ pub struct GlobalConfig {
#[serde(deserialize_with = "convert_numeric_with_shellexpand")]
pub max_open_files: usize,

/// If a file descriptor is idle for this many milliseconds, it will be closed.
/// In the event a client or store takes a long time to send or receive data
/// the file descriptor will be closed, and since `max_open_files` blocks new
/// `open_file` requests until a slot opens up, it will allow new requests to be
/// processed. If a read or write is attempted on a closed file descriptor, the
/// file will be reopened and the operation will continue.
///
/// On services where worker(s) and scheduler(s) live in the same process, this
/// also prevents deadlocks if a file->file copy is happening, but cannot open
/// a new file descriptor because the limit has been reached.
///
/// Default: 1000 (1 second)
#[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
pub idle_file_descriptor_timeout_millis: u64,

/// This flag can be used to prevent metrics from being collected at runtime.
/// Metrics are still able to be collected, but this flag prevents metrics that
/// are collected at runtime (performance metrics) from being tallied. The
Expand Down
4 changes: 1 addition & 3 deletions nativelink-macro/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,10 @@ pub fn nativelink_test(attr: TokenStream, item: TokenStream) -> TokenStream {
async fn #fn_name(#fn_inputs) #fn_output {
// Error means already initialized, which is ok.
let _ = nativelink_util::init_tracing();
// If already set it's ok.
let _ = nativelink_util::fs::set_idle_file_descriptor_timeout(std::time::Duration::from_millis(100));

#[warn(clippy::disallowed_methods)]
::std::sync::Arc::new(::nativelink_util::origin_context::OriginContext::new()).wrap_async(
::nativelink_util::__tracing::trace_span!("test"), async move {
::nativelink_util::__tracing::error_span!(stringify!(#fn_name)), async move {
#fn_block
}
)
Expand Down
3 changes: 0 additions & 3 deletions nativelink-store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,6 @@ aws-sdk-s3 = { version = "=1.68.0", features = [
"rt-tokio",
], default-features = false }
aws-smithy-runtime-api = "1.7.3"
serial_test = { version = "3.2.0", features = [
"async",
], default-features = false }
serde_json = "1.0.135"
fred = { version = "10.0.3", default-features = false, features = ["mocks"] }
tracing-subscriber = { version = "0.3.19", default-features = false }
10 changes: 6 additions & 4 deletions nativelink-store/src/fast_slow_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

use std::borrow::BorrowMut;
use std::cmp::{max, min};
use std::ffi::OsString;
use std::ops::Range;
use std::pin::Pin;
use std::sync::atomic::{AtomicU64, Ordering};
Expand Down Expand Up @@ -224,9 +225,10 @@ impl StoreDriver for FastSlowStore {
async fn update_with_whole_file(
self: Pin<&Self>,
key: StoreKey<'_>,
mut file: fs::ResumeableFileSlot,
path: OsString,
mut file: fs::FileSlot,
upload_size: UploadSizeInfo,
) -> Result<Option<fs::ResumeableFileSlot>, Error> {
) -> Result<Option<fs::FileSlot>, Error> {
if self
.fast_store
.optimized_for(StoreOptimizations::FileUpdates)
Expand All @@ -246,7 +248,7 @@ impl StoreDriver for FastSlowStore {
}
return self
.fast_store
.update_with_whole_file(key, file, upload_size)
.update_with_whole_file(key, path, file, upload_size)
.await;
}

Expand All @@ -269,7 +271,7 @@ impl StoreDriver for FastSlowStore {
}
return self
.slow_store
.update_with_whole_file(key, file, upload_size)
.update_with_whole_file(key, path, file, upload_size)
.await;
}

Expand Down
Loading

0 comments on commit a47c20e

Please sign in to comment.