From 32518ef3b4508e4be20524eaca7ed731cb431f1a Mon Sep 17 00:00:00 2001 From: Dekel Pilli Date: Thu, 12 Sep 2024 18:17:26 +1000 Subject: [PATCH 1/5] feat(sample transform): add sample_rate_key config option --- src/transforms/sample/config.rs | 11 +++++++- src/transforms/sample/transform.rs | 42 +++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/transforms/sample/config.rs b/src/transforms/sample/config.rs index d05338493ef80..29fd24813918b 100644 --- a/src/transforms/sample/config.rs +++ b/src/transforms/sample/config.rs @@ -1,6 +1,6 @@ use vector_lib::config::{LegacyKey, LogNamespace}; use vector_lib::configurable::configurable_component; -use vrl::owned_value_path; +use vector_lib::lookup::{lookup_v2::OptionalValuePath, owned_value_path, path}; use vrl::value::Kind; use crate::{ @@ -43,6 +43,9 @@ pub struct SampleConfig { /// sampled together, but that overall `1/N` transactions are sampled. #[configurable(metadata(docs::examples = "message"))] pub key_field: Option, + #[serde(default = "default_sample_rate_key")] + #[configurable(metadata(docs::examples = "sample_rate"))] + pub sample_rate_key: OptionalValuePath, /// A logical condition used to exclude events from sampling. pub exclude: Option, @@ -54,6 +57,7 @@ impl GenerateConfig for SampleConfig { rate: 10, key_field: None, exclude: None::, + sample_rate_key: default_sample_rate_key(), }) .unwrap() } @@ -105,6 +109,10 @@ impl TransformConfig for SampleConfig { } } +pub fn default_sample_rate_key() -> OptionalValuePath { + OptionalValuePath::from(owned_value_path!("sample_rate")) +} + #[cfg(test)] mod tests { use super::*; @@ -127,6 +135,7 @@ mod tests { rate: 1, key_field: None, exclude: None, + sample_rate_key: default_sample_rate_key(), }; let (tx, rx) = mpsc::channel(1); let (topology, mut out) = create_topology(ReceiverStream::new(rx), config).await; diff --git a/src/transforms/sample/transform.rs b/src/transforms/sample/transform.rs index d9c7434d15566..1a6983d90ced2 100644 --- a/src/transforms/sample/transform.rs +++ b/src/transforms/sample/transform.rs @@ -1,11 +1,12 @@ use vector_lib::config::LegacyKey; use vrl::event_path; +use vector_lib::lookup::lookup_v2::OptionalValuePath; use crate::{ conditions::Condition, event::Event, internal_events::SampleEventDiscarded, - transforms::{FunctionTransform, OutputBuffer}, + transforms::{FunctionTransform, OutputBuffer, sample::config::default_sample_rate_key}, }; #[derive(Clone)] @@ -15,6 +16,7 @@ pub struct Sample { key_field: Option, exclude: Option, count: u64, + sample_rate_key: OptionalValuePath, } impl Sample { @@ -26,6 +28,7 @@ impl Sample { rate: u64, key_field: Option, exclude: Option, + sample_rate_key: OptionalValuePath, ) -> Self { Self { name, @@ -33,6 +36,7 @@ impl Sample { key_field, exclude, count: 0, + sample_rate_key } } } @@ -83,13 +87,15 @@ impl FunctionTransform for Sample { event.namespace().insert_source_metadata( self.name.as_str(), event, - Some(LegacyKey::Overwrite(vrl::path!("sample_rate"))), - vrl::path!("sample_rate"), + self.sample_rate_key.path.map(|path| LegacyKey::Overwrite(path)), + self.sample_rate_key.path, self.rate.to_string(), ); } Event::Trace(ref mut event) => { - event.insert(event_path!("sample_rate"), self.rate.to_string()); + if let Some(path) = self.sample_rate_key.path { + event.insert(path, self.rate.to_string()); + } } Event::Metric(_) => panic!("component can never receive metric events"), }; @@ -138,6 +144,7 @@ mod tests { log_schema().message_key().unwrap().to_string().as_str(), "na", )), + default_sample_rate_key(), ); let total_passed = events .into_iter() @@ -160,6 +167,7 @@ mod tests { log_schema().message_key().unwrap().to_string().as_str(), "na", )), + default_sample_rate_key(), ); let total_passed = events .into_iter() @@ -185,6 +193,7 @@ mod tests { log_schema().message_key().unwrap().to_string().as_str(), "na", )), + default_sample_rate_key(), ); let first_run = events @@ -220,6 +229,7 @@ mod tests { log_schema().message_key().unwrap().to_string().as_str(), "important", )), + default_sample_rate_key(), ); let iterations = 0..1000; let total_passed = iterations @@ -243,6 +253,7 @@ mod tests { 0, key_field.clone(), Some(condition_contains("other_field", "foo")), + default_sample_rate_key(), ); let iterations = 0..1000; let total_passed = iterations @@ -265,6 +276,7 @@ mod tests { 10, key_field.clone(), Some(condition_contains(&message_key, "na")), + default_sample_rate_key(), ); let passing = events .into_iter() @@ -279,13 +291,30 @@ mod tests { 25, key_field.clone(), Some(condition_contains(&message_key, "na")), + OptionalValuePath::from(owned_value_path!("custom_sample_rate")), ); let passing = events .into_iter() .filter(|s| !s.as_log()[&message_key].to_string_lossy().contains("na")) .find_map(|event| transform_one(&mut sampler, event)) .unwrap(); - assert_eq!(passing.as_log()["sample_rate"], "25".into()); + assert_eq!(passing.as_log()["custom_sample_rate"], "25".into()); + assert!(passing.as_log().get("sample_rate").is_none()); + + let events = random_events(10000); + let mut sampler = Sample::new( + "sample".to_string(), + 50, + key_field.clone(), + Some(condition_contains(&message_key, "na")), + OptionalValuePath::from(""), + ); + let passing = events + .into_iter() + .filter(|s| !s.as_log()[&message_key].to_string_lossy().contains("na")) + .find_map(|event| transform_one(&mut sampler, event)) + .unwrap(); + assert!(passing.as_log().get("sample_rate").is_none()); // If the event passed the regex check, don't include the sampling rate let mut sampler = Sample::new( @@ -293,6 +322,7 @@ mod tests { 25, key_field.clone(), Some(condition_contains(&message_key, "na")), + default_sample_rate_key(), ); let event = Event::Log(LogEvent::from("nananana")); let passing = transform_one(&mut sampler, event).unwrap(); @@ -304,7 +334,7 @@ mod tests { fn handles_trace_event() { let event: TraceEvent = LogEvent::from("trace").into(); let trace = Event::Trace(event); - let mut sampler = Sample::new("sample".to_string(), 2, None, None); + let mut sampler = Sample::new("sample".to_string(), 2, None, None, default_sample_rate_key()); let iterations = 0..2; let total_passed = iterations .filter_map(|_| transform_one(&mut sampler, trace.clone())) From b6a4a5edf16d31ead2bcab29330cfc1a818f1ced Mon Sep 17 00:00:00 2001 From: Dekel Pilli Date: Sun, 27 Oct 2024 10:29:10 +1100 Subject: [PATCH 2/5] docs: Add chanogelog entry for sample_rate_key change --- changelog.d/configurable-sample-rate-key.enhancement.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changelog.d/configurable-sample-rate-key.enhancement.md diff --git a/changelog.d/configurable-sample-rate-key.enhancement.md b/changelog.d/configurable-sample-rate-key.enhancement.md new file mode 100644 index 0000000000000..a89e9a8ea4012 --- /dev/null +++ b/changelog.d/configurable-sample-rate-key.enhancement.md @@ -0,0 +1,3 @@ +The `sample` transform now has a `sample_rate_key` configuration option, which default to `sample_rate`, that allows configuring which key is used to attach the sample rate to sampled events. If set to an empty string, the sample rate will not be attached to sampled events. + +authors: dekelpilli From 8210cbbd32267104a8e7189842d170ea9df34765 Mon Sep 17 00:00:00 2001 From: Dekel Pilli Date: Tue, 29 Oct 2024 07:59:08 +1100 Subject: [PATCH 3/5] Add sample_rate_key description --- src/transforms/sample/config.rs | 4 +++- src/transforms/sample/transform.rs | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/transforms/sample/config.rs b/src/transforms/sample/config.rs index b8d56fa159c4f..c996f5f1dfcb0 100644 --- a/src/transforms/sample/config.rs +++ b/src/transforms/sample/config.rs @@ -44,8 +44,10 @@ pub struct SampleConfig { /// sampled together, but that overall `1/N` transactions are sampled. #[configurable(metadata(docs::examples = "message"))] pub key_field: Option, + + /// The event key in which the sample rate is stored. If set to an empty string, the sample rate will not be added to the event. + #[configurable(metadata(docs::examples = "sample_rate"))] #[serde(default = "default_sample_rate_key")] - #[configurable(metadata(docs::examples = "sample_rate"), description = "")] pub sample_rate_key: OptionalValuePath, /// The value to group events into separate buckets to be sampled independently. diff --git a/src/transforms/sample/transform.rs b/src/transforms/sample/transform.rs index 1e805bcf2e297..f6a98362f9d02 100644 --- a/src/transforms/sample/transform.rs +++ b/src/transforms/sample/transform.rs @@ -9,8 +9,8 @@ use crate::{ template::Template, transforms::{FunctionTransform, OutputBuffer}, }; -use vector_lib::lookup::OwnedTargetPath; use vector_lib::lookup::lookup_v2::OptionalValuePath; +use vector_lib::lookup::OwnedTargetPath; #[derive(Clone)] pub struct Sample { @@ -129,7 +129,7 @@ impl FunctionTransform for Sample { ); } Event::Trace(ref mut event) => { - event.insert(&OwnedTargetPath::event(path.clone()), self.rate.to_string()); + event.insert(&OwnedTargetPath::event(path.clone()), self.rate.to_string()); } Event::Metric(_) => panic!("component can never receive metric events"), }; From cb7998cca21ae8739d7da7ca5bf202b3e6ccbd1e Mon Sep 17 00:00:00 2001 From: Dekel Pilli Date: Tue, 29 Oct 2024 10:03:03 +1100 Subject: [PATCH 4/5] docs: add cue docs --- .../cue/reference/components/transforms/base/sample.cue | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/website/cue/reference/components/transforms/base/sample.cue b/website/cue/reference/components/transforms/base/sample.cue index d6b573e409334..705b9766b7870 100644 --- a/website/cue/reference/components/transforms/base/sample.cue +++ b/website/cue/reference/components/transforms/base/sample.cue @@ -48,4 +48,12 @@ base: components: transforms: sample: configuration: { 1500, ] } + sample_rate_key: { + description: "The event key in which the sample rate is stored. If set to an empty string, the sample rate will not be added to the event." + required: false + type: string: { + default: "sample_rate" + examples: ["sample_rate"] + } + } } From 9e51a64dca34cef02ea18e661d54e78a664f4633 Mon Sep 17 00:00:00 2001 From: Dekel Pilli Date: Thu, 31 Oct 2024 13:59:46 +1100 Subject: [PATCH 5/5] docs: correct sample docs whitespace --- website/cue/reference/components/transforms/base/sample.cue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/cue/reference/components/transforms/base/sample.cue b/website/cue/reference/components/transforms/base/sample.cue index 705b9766b7870..fd698ecf7ba7b 100644 --- a/website/cue/reference/components/transforms/base/sample.cue +++ b/website/cue/reference/components/transforms/base/sample.cue @@ -48,7 +48,7 @@ base: components: transforms: sample: configuration: { 1500, ] } - sample_rate_key: { + sample_rate_key: { description: "The event key in which the sample rate is stored. If set to an empty string, the sample rate will not be added to the event." required: false type: string: {