Skip to content

Commit

Permalink
chore: add passive sampling option to tracing
Browse files Browse the repository at this point in the history
When we have many services in the same flow, we want to defer the sampling
logic to the parent service (which will defer to the root service in the end).

The PassiveSampler is really good for that, as it will only sample traces that
have one external reference. This assumes that the parent service will send the
trace info to stitch, if we should keep it, and won't, if we should discard it.
This is what some of our services do already today, and it would really help us
onboard to foundations!

This also fixes the previous rate limiting test, that would always return an
empty traces list, as they were sent to a non-existent scope.
  • Loading branch information
lbarthon authored and inikulin committed Aug 27, 2024
1 parent afd9094 commit 9f9b4b7
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 32 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ foundations = { version = "3", path = "./foundations" }
foundations-macros = { version = "3", path = "./foundations-macros" }
bindgen = { version = "0.68.1", default-features = false }
cc = "1.0"
cf-rustracing = "1.0"
cf-rustracing = "1.0.1"
cf-rustracing-jaeger = "1.1"
clap = "4.4"
darling = "0.20.10"
Expand Down
78 changes: 63 additions & 15 deletions foundations/src/telemetry/settings/tracing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,8 @@ pub struct TracingSettings {
/// The output for the collected traces.
pub output: TracesOutput,

/// Sampling ratio.
///
/// This can be any fractional value between `0.0` and `1.0`.
/// Where `1.0` means "sample everything", and `0.0` means "don't sample anything".
#[serde(default = "TracingSettings::default_sampling_ratio")]
pub sampling_ratio: f64,

/// Settings for rate limiting emission of traces
pub rate_limit: RateLimitingSettings,
/// The strategy used to sample traces.
pub sampling_strategy: SamplingStrategy,
}

#[cfg(not(feature = "settings"))]
Expand All @@ -41,8 +34,7 @@ impl Default for TracingSettings {
Self {
enabled: TracingSettings::default_enabled(),
output: Default::default(),
sampling_ratio: TracingSettings::default_sampling_ratio(),
rate_limit: Default::default(),
sampling_strategy: Default::default(),
}
}
}
Expand All @@ -51,10 +43,6 @@ impl TracingSettings {
fn default_enabled() -> bool {
true
}

fn default_sampling_ratio() -> f64 {
1.0
}
}

/// The output for the collected traces.
Expand Down Expand Up @@ -125,3 +113,63 @@ impl JaegerThriftUdpOutputSettings {
server_addr
}
}

/// Settings used when active sampling is enabled.
#[cfg_attr(feature = "settings", settings(crate_path = "crate"))]
#[cfg_attr(not(feature = "settings"), derive(Clone, Debug, serde::Deserialize))]
pub struct ActiveSamplingSettings {
/// Sampling ratio.
///
/// This can be any fractional value between `0.0` and `1.0`.
/// Where `1.0` means "sample everything", and `0.0` means "don't sample anything".
#[serde(default = "ActiveSamplingSettings::default_sampling_ratio")]
pub sampling_ratio: f64,

/// Settings for rate limiting emission of traces
pub rate_limit: RateLimitingSettings,
}

impl ActiveSamplingSettings {
fn default_sampling_ratio() -> f64 {
1.0
}
}

#[cfg(not(feature = "settings"))]
impl Default for ActiveSamplingSettings {
fn default() -> Self {
Self {
sampling_ratio: ActiveSamplingSettings::default_sampling_ratio(),
rate_limit: Default::default(),
}
}
}

/// The sampling strategy used for tracing purposes.
#[cfg_attr(
feature = "settings",
settings(crate_path = "crate", impl_default = false)
)]
#[cfg_attr(not(feature = "settings"), derive(Clone, Debug, serde::Deserialize))]
pub enum SamplingStrategy {
/// This only samples traces which have one or more references.
///
/// Passive sampling is meant to be used when we want to defer the sampling logic to the parent
/// service. The traces will only be sent if the parent service sent trace stitching data.
/// Backed by [cf_rustracing::sampler::PassiveSampler]
Passive,

/// Active sampling.
///
/// This will sample a percentage of traces, specified in [sampling ratio], and also supports
/// rate limiting traces - see [RateLimitingSettings].
///
/// [sampling ratio]: crate::telemetry::settings::ActiveSamplingSettings::sampling_ratio
Active(ActiveSamplingSettings),
}

impl Default for SamplingStrategy {
fn default() -> Self {
Self::Active(Default::default())
}
}
16 changes: 11 additions & 5 deletions foundations/src/telemetry/tracing/init.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::internal::{SharedSpan, Tracer};
use super::output_jaeger_thrift_udp;
use crate::telemetry::scope::ScopeStack;
use crate::telemetry::settings::{TracesOutput, TracingSettings};
use crate::telemetry::settings::{SamplingStrategy, TracesOutput, TracingSettings};
use crate::{BootstrapResult, ServiceInfo};
use cf_rustracing_jaeger::span::SpanReceiver;
use futures_util::future::BoxFuture;
Expand All @@ -10,6 +10,7 @@ use once_cell::sync::{Lazy, OnceCell};
#[cfg(feature = "telemetry-otlp-grpc")]
use super::output_otlp_grpc;

use cf_rustracing::sampler::{PassiveSampler, Sampler};
#[cfg(feature = "testing")]
use std::borrow::Cow;

Expand All @@ -18,7 +19,7 @@ use crate::telemetry::tracing::rate_limit::RateLimitingProbabilisticSampler;
static HARNESS: OnceCell<TracingHarness> = OnceCell::new();

static NOOP_HARNESS: Lazy<TracingHarness> = Lazy::new(|| {
let (noop_tracer, _) = Tracer::new(Default::default());
let (noop_tracer, _) = Tracer::new(RateLimitingProbabilisticSampler::default().boxed());

TracingHarness {
tracer: noop_tracer,
Expand Down Expand Up @@ -60,9 +61,14 @@ impl TracingHarness {
pub(crate) fn create_tracer_and_span_rx(
settings: &TracingSettings,
) -> BootstrapResult<(Tracer, SpanReceiver)> {
Ok(Tracer::new(RateLimitingProbabilisticSampler::new(
settings,
)?))
let sampler = match &settings.sampling_strategy {
SamplingStrategy::Passive => PassiveSampler.boxed(),
SamplingStrategy::Active(settings) => {
RateLimitingProbabilisticSampler::new(settings)?.boxed()
}
};

Ok(Tracer::new(sampler))
}

// NOTE: does nothing if tracing has already been initialized in this process.
Expand Down
4 changes: 2 additions & 2 deletions foundations/src/telemetry/tracing/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ use super::init::TracingHarness;
use super::StartTraceOptions;
use rand::{self, Rng};

use crate::telemetry::tracing::rate_limit::RateLimitingProbabilisticSampler;
use cf_rustracing::sampler::BoxSampler;
use cf_rustracing::tag::Tag;
use cf_rustracing_jaeger::span::{Span, SpanContext, SpanContextState};
use std::borrow::Cow;
use std::error::Error;
use std::sync::Arc;

pub(crate) type Tracer = cf_rustracing::Tracer<RateLimitingProbabilisticSampler, SpanContextState>;
pub(crate) type Tracer = cf_rustracing::Tracer<BoxSampler<SpanContextState>, SpanContextState>;

#[derive(Debug, Clone)]
pub(crate) struct SharedSpan {
Expand Down
2 changes: 1 addition & 1 deletion foundations/src/telemetry/tracing/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ pub struct StartTraceOptions {
///
/// Can be used to enforce trace sampling by providing `Some(1.0)` value.
///
/// [sampling ratio]: crate::telemetry::settings::TracingSettings::sampling_ratio
/// [sampling ratio]: crate::telemetry::settings::ActiveSamplingSettings::sampling_ratio
/// [tracing initializaion]: crate::telemetry::init
pub override_sampling_ratio: Option<f64>,
}
Expand Down
4 changes: 2 additions & 2 deletions foundations/src/telemetry/tracing/rate_limit.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::telemetry::settings::TracingSettings;
use crate::telemetry::settings::ActiveSamplingSettings;
use cf_rustracing::sampler::Sampler;
use cf_rustracing::span::CandidateSpan;
use cf_rustracing::{sampler::ProbabilisticSampler, Result};
Expand Down Expand Up @@ -26,7 +26,7 @@ impl Default for RateLimitingProbabilisticSampler {
impl RateLimitingProbabilisticSampler {
/// If `sampling_rate` is not in the range `0.0...1.0`,
/// it will return an error with the kind `ErrorKind::InvalidInput`.
pub(crate) fn new(settings: &TracingSettings) -> Result<Self> {
pub(crate) fn new(settings: &ActiveSamplingSettings) -> Result<Self> {
let rate_limiter = if settings.rate_limit.enabled {
settings
.rate_limit
Expand Down
39 changes: 33 additions & 6 deletions foundations/tests/tracing.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use foundations::telemetry::settings::{RateLimitingSettings, TracingSettings};
use foundations::telemetry::settings::{
ActiveSamplingSettings, RateLimitingSettings, SamplingStrategy, TracingSettings,
};
use foundations::telemetry::tracing;
use foundations::telemetry::TestTelemetryContext;
use foundations_macros::with_test_telemetry;
Expand All @@ -18,16 +20,41 @@ fn test_rate_limiter(mut ctx: TestTelemetryContext) {
assert_eq!(ctx.traces(Default::default()).len(), 10);

ctx.set_tracing_settings(TracingSettings {
rate_limit: RateLimitingSettings {
enabled: true,
max_events_per_second: 5,
},
sampling_strategy: SamplingStrategy::Active(ActiveSamplingSettings {
rate_limit: RateLimitingSettings {
enabled: true,
max_events_per_second: 5,
},
..Default::default()
}),
..Default::default()
});

let _scope = ctx.scope();
for i in 10..20 {
make_test_trace(i)
}

assert!(ctx.traces(Default::default()).len() < 20);
assert_eq!(ctx.traces(Default::default()).len(), 5);
}

#[with_test_telemetry(test)]
fn test_passive_sampler(mut ctx: TestTelemetryContext) {
for i in 0..10 {
make_test_trace(i)
}

assert_eq!(ctx.traces(Default::default()).len(), 10);

ctx.set_tracing_settings(TracingSettings {
sampling_strategy: SamplingStrategy::Passive,
..Default::default()
});

let _scope = ctx.scope();
for i in 10..20 {
make_test_trace(i)
}

assert_eq!(ctx.traces(Default::default()).len(), 0);
}

0 comments on commit 9f9b4b7

Please sign in to comment.