Skip to content

Commit 32162bb

Browse files
MOZGIIIHoverbear
andauthored
enhancement(kubernetes_logs source): Expose the performance related parameters (#4751)
* Expose the performance related parameters to allow configuration in the sensitive scenarios Signed-off-by: MOZGIII <[email protected]> * Fix the description at glob_minimum_cooldown_ms at src/sources/kubernetes_logs/mod.rs Signed-off-by: MOZGIII <[email protected]> * Set default_glob_minimum_cooldown_ms to 60 seconds Signed-off-by: MOZGIII <[email protected]> * Remove the duplication of the description from the glob_minimum_cooldown comment Signed-off-by: MOZGIII <[email protected]> * Fix the comment at max_read_bytes at FileServer construction Signed-off-by: MOZGIII <[email protected]> * Fix a typo at src/sources/kubernetes_logs/mod.rs Co-authored-by: Ana Hobden <[email protected]> Co-authored-by: Ana Hobden <[email protected]>
1 parent 36e0b4f commit 32162bb

File tree

1 file changed

+43
-7
lines changed
  • src/sources/kubernetes_logs

1 file changed

+43
-7
lines changed

src/sources/kubernetes_logs/mod.rs

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use file_source::{FileServer, FileServerShutdown, Fingerprinter};
2424
use futures::{future::FutureExt, sink::Sink, stream::StreamExt};
2525
use k8s_openapi::api::core::v1::Pod;
2626
use serde::{Deserialize, Serialize};
27+
use std::convert::TryInto;
2728
use std::path::PathBuf;
2829
use std::time::Duration;
2930

@@ -73,6 +74,22 @@ pub struct Config {
7374

7475
/// A list of glob patterns to exclude from reading the files.
7576
exclude_paths_glob_patterns: Vec<PathBuf>,
77+
78+
/// Max amount of bytes to read from a single file before switching over
79+
/// to the next file.
80+
/// This allows distributing the reads more or less evenly accross
81+
/// the files.
82+
#[serde(default = "default_max_read_bytes")]
83+
max_read_bytes: usize,
84+
85+
/// This value specifies not exactly the globbing, but interval
86+
/// between the polling the files to watch from the `paths_provider`.
87+
/// This is quite efficient, yet might still create some load of the
88+
/// file system; in addition, it is currently coupled with chechsum dumping
89+
/// in the underlying file server, so setting it too low may introduce
90+
/// a significant overhead.
91+
#[serde(default = "default_glob_minimum_cooldown_ms")]
92+
glob_minimum_cooldown_ms: usize,
7693
}
7794

7895
inventory::submit! {
@@ -138,6 +155,8 @@ struct Source {
138155
field_selector: String,
139156
label_selector: String,
140157
exclude_paths: Vec<glob::Pattern>,
158+
max_read_bytes: usize,
159+
glob_minimum_cooldown: Duration,
141160
}
142161

143162
impl Source {
@@ -166,6 +185,11 @@ impl Source {
166185
})
167186
.collect::<crate::Result<Vec<_>>>()?;
168187

188+
let glob_minimum_cooldown =
189+
Duration::from_millis(config.glob_minimum_cooldown_ms.try_into().expect(
190+
"unable to convert glob_minimum_cooldown_ms from usize to u64 without data loss",
191+
));
192+
169193
Ok(Self {
170194
client,
171195
data_dir,
@@ -174,6 +198,8 @@ impl Source {
174198
field_selector,
175199
label_selector,
176200
exclude_paths,
201+
max_read_bytes: config.max_read_bytes,
202+
glob_minimum_cooldown,
177203
})
178204
}
179205

@@ -190,6 +216,8 @@ impl Source {
190216
field_selector,
191217
label_selector,
192218
exclude_paths,
219+
max_read_bytes,
220+
glob_minimum_cooldown,
193221
} = self;
194222

195223
let watcher = k8s::api_watcher::ApiWatcher::new(client, Pod::watch_pod_for_all_namespaces);
@@ -213,7 +241,7 @@ impl Source {
213241
let paths_provider = K8sPathsProvider::new(state_reader.clone(), exclude_paths);
214242
let annotator = PodMetadataAnnotator::new(state_reader, fields_spec);
215243

216-
// TODO: maybe some of the parameters have to be configurable.
244+
// TODO: maybe more of the parameters have to be configurable.
217245

218246
// The 16KB is the maximum size of the payload at single line for both
219247
// docker and CRI log formats.
@@ -224,8 +252,11 @@ impl Source {
224252
let file_server = FileServer {
225253
// Use our special paths provider.
226254
paths_provider,
227-
// This is the default value for the read buffer size.
228-
max_read_bytes: 2048,
255+
// Max amount of bytes to read from a single file before switching
256+
// over to the next file.
257+
// This allows distributing the reads more or less evenly accross
258+
// the files.
259+
max_read_bytes,
229260
// We want to use checkpoining mechanism, and resume from where we
230261
// left off.
231262
start_at_beginning: false,
@@ -242,10 +273,7 @@ impl Source {
242273
data_dir,
243274
// This value specifies not exactly the globbing, but interval
244275
// between the polling the files to watch from the `paths_provider`.
245-
// This is quite efficient, yet might still create some load of the
246-
// file system, so this call is 10 times larger than the default for
247-
// the files.
248-
glob_minimum_cooldown: Duration::from_secs(10),
276+
glob_minimum_cooldown,
249277
// The shape of the log files is well-known in the Kubernetes
250278
// environment, so we pick the a specially crafted fingerprinter
251279
// for the log files.
@@ -368,6 +396,14 @@ fn default_self_node_name_env_template() -> String {
368396
format!("${{{}}}", SELF_NODE_NAME_ENV_KEY.to_owned())
369397
}
370398

399+
fn default_max_read_bytes() -> usize {
400+
2048
401+
}
402+
403+
fn default_glob_minimum_cooldown_ms() -> usize {
404+
60000
405+
}
406+
371407
/// This function construct the effective field selector to use, based on
372408
/// the specified configuration.
373409
fn prepare_field_selector(config: &Config) -> crate::Result<String> {

0 commit comments

Comments
 (0)