diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index e33cf760433..ea56ba2a593 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -152,6 +152,7 @@ static VALUE _native_initialize( VALUE allocation_counting_enabled, VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, + VALUE dynamic_sampling_rate_overhead_target_percentage, VALUE allocation_sample_every ); static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr); @@ -226,7 +227,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) { // https://bugs.ruby-lang.org/issues/18007 for a discussion around this. rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new); - rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 8); + rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1); @@ -295,6 +296,7 @@ static VALUE _native_initialize( VALUE allocation_counting_enabled, VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, + VALUE dynamic_sampling_rate_overhead_target_percentage, VALUE allocation_sample_every ) { ENFORCE_BOOLEAN(gc_profiling_enabled); @@ -302,6 +304,7 @@ static VALUE _native_initialize( ENFORCE_BOOLEAN(no_signals_workaround_enabled); ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled); ENFORCE_TYPE(allocation_sample_every, T_FIXNUM); + ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT); struct cpu_and_wall_time_worker_state *state; TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state); @@ -310,6 +313,7 @@ static VALUE _native_initialize( state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue); state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue); state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue); + dynamic_sampling_rate_set_overhead_target_percentage(&state->dynamic_sampling_rate, NUM2DBL(dynamic_sampling_rate_overhead_target_percentage)); state->allocation_sample_every = NUM2INT(allocation_sample_every); if (state->allocation_sample_every < 0) { diff --git a/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c b/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c index 76616273e52..02cc6ee0b1b 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c +++ b/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.c @@ -19,7 +19,7 @@ // // Instead of sampling at a fixed sample rate, the actual sampling rate should be decided by also observing the impact // that running the profiler is having. This protects against issues such as the profiler being deployed in very busy -//machines or containers with unrealistic CPU restrictions. +// machines or containers with unrealistic CPU restrictions. // // ### Implementation // @@ -35,13 +35,13 @@ // sample. If it's not, it will skip sampling. // // Finally, as an additional optimization, there's a `dynamic_sampling_rate_get_sleep()` which, given the current -// wall-time, will return the time remaining (*there's an exception, check below) until the next sample. +// wall-time, will return the time remaining (*there's an exception, check function) until the next sample. // // --- // This is the wall-time overhead we're targeting. E.g. we target to spend no more than 2%, or 1.2 seconds per minute, -// taking profiling samples. -#define WALL_TIME_OVERHEAD_TARGET_PERCENTAGE 2.0 // % +// taking profiling samples by default. +#define DEFAULT_WALL_TIME_OVERHEAD_TARGET_PERCENTAGE 2.0 // % // See `dynamic_sampling_rate_get_sleep()` for details #define MAX_SLEEP_TIME_NS MILLIS_AS_NS(100) // See `dynamic_sampling_rate_after_sample()` for details @@ -49,6 +49,11 @@ void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state) { atomic_init(&state->next_sample_after_monotonic_wall_time_ns, 0); + dynamic_sampling_rate_set_overhead_target_percentage(state, DEFAULT_WALL_TIME_OVERHEAD_TARGET_PERCENTAGE); +} + +void dynamic_sampling_rate_set_overhead_target_percentage(dynamic_sampling_rate_state *state, double overhead_target_percentage) { + state->overhead_target_percentage = overhead_target_percentage; } void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state) { @@ -76,7 +81,7 @@ bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, lon } void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long wall_time_ns_after_sample, uint64_t sampling_time_ns) { - double overhead_target = (double) WALL_TIME_OVERHEAD_TARGET_PERCENTAGE; + double overhead_target = state->overhead_target_percentage; // The idea here is that we're targeting a maximum % of wall-time spent sampling. // So for instance, if sampling_time_ns is 2% of the time we spend working, how much is the 98% we should spend @@ -93,48 +98,51 @@ void dynamic_sampling_rate_after_sample(dynamic_sampling_rate_state *state, long // --- // Below here is boilerplate to expose the above code to Ruby so that we can test it with RSpec as usual. -VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns); -VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample); -VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns); +VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns); +VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample); +VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns); void collectors_dynamic_sampling_rate_init(VALUE profiling_module) { VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors"); VALUE dynamic_sampling_rate_module = rb_define_module_under(collectors_module, "DynamicSamplingRate"); VALUE testing_module = rb_define_module_under(dynamic_sampling_rate_module, "Testing"); - rb_define_singleton_method(testing_module, "_native_get_sleep", _native_get_sleep, 2); - rb_define_singleton_method(testing_module, "_native_should_sample", _native_should_sample, 2); - rb_define_singleton_method(testing_module, "_native_after_sample", _native_after_sample, 2); + rb_define_singleton_method(testing_module, "_native_get_sleep", _native_get_sleep, 3); + rb_define_singleton_method(testing_module, "_native_should_sample", _native_should_sample, 3); + rb_define_singleton_method(testing_module, "_native_after_sample", _native_after_sample, 3); } -VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns) { +VALUE _native_get_sleep(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE current_monotonic_wall_time_ns) { ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM); ENFORCE_TYPE(current_monotonic_wall_time_ns, T_FIXNUM); dynamic_sampling_rate_state state; dynamic_sampling_rate_init(&state); + dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage)); atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns)); return ULL2NUM(dynamic_sampling_rate_get_sleep(&state, NUM2LONG(current_monotonic_wall_time_ns))); } -VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample) { +VALUE _native_should_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE simulated_next_sample_after_monotonic_wall_time_ns, VALUE wall_time_ns_before_sample) { ENFORCE_TYPE(simulated_next_sample_after_monotonic_wall_time_ns, T_FIXNUM); ENFORCE_TYPE(wall_time_ns_before_sample, T_FIXNUM); dynamic_sampling_rate_state state; dynamic_sampling_rate_init(&state); + dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage)); atomic_store(&state.next_sample_after_monotonic_wall_time_ns, NUM2LONG(simulated_next_sample_after_monotonic_wall_time_ns)); return dynamic_sampling_rate_should_sample(&state, NUM2LONG(wall_time_ns_before_sample)) ? Qtrue : Qfalse; } -VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns) { +VALUE _native_after_sample(DDTRACE_UNUSED VALUE self, VALUE overhead_target_percentage, VALUE wall_time_ns_after_sample, VALUE sampling_time_ns) { ENFORCE_TYPE(wall_time_ns_after_sample, T_FIXNUM); ENFORCE_TYPE(sampling_time_ns, T_FIXNUM); dynamic_sampling_rate_state state; dynamic_sampling_rate_init(&state); + dynamic_sampling_rate_set_overhead_target_percentage(&state, NUM2DBL(overhead_target_percentage)); dynamic_sampling_rate_after_sample(&state, NUM2LONG(wall_time_ns_after_sample), NUM2ULL(sampling_time_ns)); diff --git a/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h b/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h index 0196d1f12b5..6076fdbfe5b 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h +++ b/ext/ddtrace_profiling_native_extension/collectors_dynamic_sampling_rate.h @@ -4,10 +4,14 @@ #include typedef struct { + // This is the wall-time overhead we're targeting. E.g. by default, we target to spend no more than 2%, or 1.2 seconds + // per minute, taking profiling samples. + double overhead_target_percentage; atomic_long next_sample_after_monotonic_wall_time_ns; } dynamic_sampling_rate_state; void dynamic_sampling_rate_init(dynamic_sampling_rate_state *state); +void dynamic_sampling_rate_set_overhead_target_percentage(dynamic_sampling_rate_state *state, double overhead_target_percentage); void dynamic_sampling_rate_reset(dynamic_sampling_rate_state *state); uint64_t dynamic_sampling_rate_get_sleep(dynamic_sampling_rate_state *state, long current_monotonic_wall_time_ns); bool dynamic_sampling_rate_should_sample(dynamic_sampling_rate_state *state, long wall_time_ns_before_sample); diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index 0b8ac838742..558e45cabef 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -391,6 +391,34 @@ def initialize(*_) end end end + + # Configures how much wall-time overhead the profiler targets. The profiler will dynamically adjust the + # interval between samples it takes so as to try and maintain the property that it spends no longer than + # this amount of wall-clock time profiling. For example, with the default value of 2%, the profiler will + # try and cause no more than 1.2 seconds per minute of overhead. Decreasing this value will reduce the + # accuracy of the data collected. Increasing will impact the application. + # + # We do not recommend tweaking this value. + # + # This value should be a percentage i.e. a number between 0 and 100, not 0 and 1. + # + # @default `DD_PROFILING_OVERHEAD_TARGET_PERCENTAGE` as a float, otherwise 2.0 + option :overhead_target_percentage do |o| + o.type :float + o.env 'DD_PROFILING_OVERHEAD_TARGET_PERCENTAGE' + o.default 2.0 + end + + # Controls how often the profiler reports data, in seconds. Cannot be lower than 60 seconds. + # + # We do not recommend tweaking this value. + # + # @default `DD_PROFILING_UPLOAD_PERIOD` environment variable, otherwise 60 + option :upload_period_seconds do |o| + o.type :int + o.env 'DD_PROFILING_UPLOAD_PERIOD' + o.default 60 + end end # @public_api diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb index 1b2315034c9..d1a06a13964 100644 --- a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +++ b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb @@ -18,6 +18,7 @@ def initialize( allocation_counting_enabled:, no_signals_workaround_enabled:, thread_context_collector:, + dynamic_sampling_rate_overhead_target_percentage:, idle_sampling_helper: IdleSamplingHelper.new, # **NOTE**: This should only be used for testing; disabling the dynamic sampling rate will increase the # profiler overhead! @@ -45,6 +46,7 @@ def initialize( allocation_counting_enabled, no_signals_workaround_enabled, dynamic_sampling_rate_enabled, + dynamic_sampling_rate_overhead_target_percentage, allocation_sample_every, ) @worker_thread = nil diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index 6b03ea564c1..5c50d602c94 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -7,7 +7,7 @@ module Component # Passing in a `nil` tracer is supported and will disable the following profiling features: # * Code Hotspots panel in the trace viewer, as well as scoping a profile down to a span # * Endpoint aggregation in the profiler UX, including normalization (resource per endpoint call) - def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) + def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) # rubocop:disable Metrics/MethodLength require_relative '../profiling/diagnostics/environment_logger' Profiling::Diagnostics::EnvironmentLogger.collect_and_log! @@ -41,6 +41,8 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) no_signals_workaround_enabled = no_signals_workaround_enabled?(settings) timeline_enabled = settings.profiling.advanced.experimental_timeline_enabled + overhead_target_percentage = valid_overhead_target(settings.profiling.advanced.overhead_target_percentage) + upload_period_seconds = [60, settings.profiling.advanced.upload_period_seconds].max recorder = Datadog::Profiling::StackRecorder.new( cpu_time_enabled: RUBY_PLATFORM.include?('linux'), # Only supported on Linux currently @@ -58,6 +60,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled, no_signals_workaround_enabled: no_signals_workaround_enabled, thread_context_collector: thread_context_collector, + dynamic_sampling_rate_overhead_target_percentage: overhead_target_percentage, allocation_sample_every: 0, ) @@ -68,7 +71,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) exporter = build_profiler_exporter(settings, recorder, internal_metadata: internal_metadata) transport = build_profiler_transport(settings, agent_settings) - scheduler = Profiling::Scheduler.new(exporter: exporter, transport: transport) + scheduler = Profiling::Scheduler.new(exporter: exporter, transport: transport, interval: upload_period_seconds) Profiling::Profiler.new(worker: worker, scheduler: scheduler) end @@ -245,6 +248,19 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) true end end + + private_class_method def self.valid_overhead_target(overhead_target_percentage) + if overhead_target_percentage > 0 && overhead_target_percentage <= 20 + overhead_target_percentage + else + Datadog.logger.error( + 'Ignoring invalid value for profiling overhead_target_percentage setting: ' \ + "#{overhead_target_percentage.inspect}. Falling back to default value." + ) + + 2.0 + end + end end end end diff --git a/lib/datadog/profiling/scheduler.rb b/lib/datadog/profiling/scheduler.rb index 4cf51b011e4..ddb21f071be 100644 --- a/lib/datadog/profiling/scheduler.rb +++ b/lib/datadog/profiling/scheduler.rb @@ -5,12 +5,11 @@ module Datadog module Profiling - # Periodically (every DEFAULT_INTERVAL_SECONDS) takes a profile from the `Exporter` and reports it using the + # Periodically (every interval, 60 seconds by default) takes a profile from the `Exporter` and reports it using the # configured transport. Runs on its own background thread. class Scheduler < Core::Worker include Core::Workers::Polling - DEFAULT_INTERVAL_SECONDS = 60 MINIMUM_INTERVAL_SECONDS = 0 # We sleep for at most this duration seconds before reporting data to avoid multi-process applications all @@ -28,8 +27,7 @@ class Scheduler < Core::Worker def initialize( exporter:, transport:, - fork_policy: Core::Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default - interval: DEFAULT_INTERVAL_SECONDS, + interval:, fork_policy: Core::Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default, # seconds enabled: true ) @exporter = exporter @@ -115,8 +113,8 @@ def flush_events # # During PR review (https://github.com/DataDog/dd-trace-rb/pull/1807) we discussed the possible alternative of # just sleeping before starting the scheduler loop. We ended up not going with that option to avoid the first - # profile containing up to DEFAULT_INTERVAL_SECONDS + DEFAULT_FLUSH_JITTER_MAXIMUM_SECONDS instead of the - # usual DEFAULT_INTERVAL_SECONDS size. + # profile containing up to interval + DEFAULT_FLUSH_JITTER_MAXIMUM_SECONDS instead of the + # usual interval seconds. if run_loop? jitter_seconds = rand * DEFAULT_FLUSH_JITTER_MAXIMUM_SECONDS # floating point number between (0.0...maximum) sleep(jitter_seconds) diff --git a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs index d4876a7bd63..f819fcdbdff 100644 --- a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs +++ b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs @@ -12,6 +12,7 @@ module Datadog allocation_counting_enabled: bool, no_signals_workaround_enabled: bool, thread_context_collector: Datadog::Profiling::Collectors::ThreadContext, + dynamic_sampling_rate_overhead_target_percentage: Float, ?idle_sampling_helper: Datadog::Profiling::Collectors::IdleSamplingHelper, ?dynamic_sampling_rate_enabled: bool, ?allocation_sample_every: Integer, @@ -25,6 +26,7 @@ module Datadog bool allocation_counting_enabled, bool no_signals_workaround_enabled, bool dynamic_sampling_rate_enabled, + Float dynamic_sampling_rate_overhead_target_percentage, ::Integer allocation_sample_every, ) -> true diff --git a/sig/datadog/profiling/component.rbs b/sig/datadog/profiling/component.rbs index 75f739c8784..675143e397c 100644 --- a/sig/datadog/profiling/component.rbs +++ b/sig/datadog/profiling/component.rbs @@ -24,6 +24,8 @@ module Datadog def self.incompatible_libmysqlclient_version?: (untyped settings) -> bool def self.incompatible_passenger_version?: () -> bool + def self.flush_interval: (untyped settings) -> ::Numeric + def self.valid_overhead_target: (::Float overhead_target_percentage) -> ::Float end end end diff --git a/sig/datadog/profiling/ext.rbs b/sig/datadog/profiling/ext.rbs index 106a866e4f8..f68c2a377bb 100644 --- a/sig/datadog/profiling/ext.rbs +++ b/sig/datadog/profiling/ext.rbs @@ -6,6 +6,8 @@ module Datadog ENV_MAX_FRAMES: "DD_PROFILING_MAX_FRAMES" ENV_AGENTLESS: "DD_PROFILING_AGENTLESS" ENV_ENDPOINT_COLLECTION_ENABLED: "DD_PROFILING_ENDPOINT_COLLECTION_ENABLED" + ENV_DYNAMIC_SAMPLING_RATE_OVERHEAD_TARGET_PERCENTAGE: "DD_PROFILING_DYNAMIC_SAMPLING_RATE_OVERHEAD_TARGET_PERCENTAGE" + DEFAULT_DYNAMIC_SAMPLING_RATE_OVERHEAD_TARGET_PERCENTAGE: Float module Transport module HTTP diff --git a/sig/datadog/profiling/scheduler.rbs b/sig/datadog/profiling/scheduler.rbs index dbf64446ee8..fabd51ecabb 100644 --- a/sig/datadog/profiling/scheduler.rbs +++ b/sig/datadog/profiling/scheduler.rbs @@ -7,7 +7,7 @@ module Datadog exporter: Datadog::Profiling::Exporter, transport: Datadog::Profiling::HttpTransport, ?fork_policy: untyped, - ?interval: ::Integer, + ?interval: ::Numeric, ?enabled: bool, ) -> void diff --git a/spec/datadog/core/configuration/settings_spec.rb b/spec/datadog/core/configuration/settings_spec.rb index 8e676e504d6..a1e180c83c0 100644 --- a/spec/datadog/core/configuration/settings_spec.rb +++ b/spec/datadog/core/configuration/settings_spec.rb @@ -637,6 +637,72 @@ .to(true) end end + + describe '#overhead_target_percentage' do + subject(:timeout_seconds) { settings.profiling.advanced.overhead_target_percentage } + + context 'when DD_PROFILING_OVERHEAD_TARGET_PERCENTAGE' do + around do |example| + ClimateControl.modify('DD_PROFILING_OVERHEAD_TARGET_PERCENTAGE' => environment) do + example.run + end + end + + context 'is not defined' do + let(:environment) { nil } + + it { is_expected.to eq(2.0) } + end + + context 'is defined' do + let(:environment) { '1.23' } + + it { is_expected.to eq(1.23) } + end + end + end + + describe '#overhead_target_percentage=' do + it 'updates the #overhead_target_percentage setting' do + expect { settings.profiling.advanced.overhead_target_percentage = 4.56 } + .to change { settings.profiling.advanced.overhead_target_percentage } + .from(2.0) + .to(4.56) + end + end + + describe '#upload_period_seconds' do + subject(:max_frames) { settings.profiling.advanced.upload_period_seconds } + + context 'when DD_PROFILING_UPLOAD_PERIOD' do + around do |example| + ClimateControl.modify('DD_PROFILING_UPLOAD_PERIOD' => environment) do + example.run + end + end + + context 'is not defined' do + let(:environment) { nil } + + it { is_expected.to eq(60) } + end + + context 'is defined' do + let(:environment) { '123' } + + it { is_expected.to eq(123) } + end + end + end + + describe '#upload_period_seconds=' do + it 'updates the #upload_period_seconds setting' do + expect { settings.profiling.advanced.upload_period_seconds = 90 } + .to change { settings.profiling.advanced.upload_period_seconds } + .from(60) + .to(90) + end + end end describe '#upload' do diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index ccc830cf475..43c63adfc24 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -14,17 +14,18 @@ let(:no_signals_workaround_enabled) { false } let(:timeline_enabled) { false } let(:options) { {} } - - subject(:cpu_and_wall_time_worker) do - described_class.new( + let(:worker_settings) do + { gc_profiling_enabled: gc_profiling_enabled, allocation_counting_enabled: allocation_counting_enabled, no_signals_workaround_enabled: no_signals_workaround_enabled, thread_context_collector: build_thread_context_collector(recorder), - **options - ) + dynamic_sampling_rate_overhead_target_percentage: 2.0, + } end + subject(:cpu_and_wall_time_worker) { described_class.new(**worker_settings, **options) } + describe '.new' do it 'creates the garbage collection tracepoint in the disabled state' do expect(described_class::Testing._native_gc_tracepoint(cpu_and_wall_time_worker)).to_not be_enabled @@ -878,12 +879,7 @@ def samples_from_pprof_without_gc_and_overhead(pprof_data) end def build_another_instance - described_class.new( - gc_profiling_enabled: gc_profiling_enabled, - allocation_counting_enabled: allocation_counting_enabled, - no_signals_workaround_enabled: no_signals_workaround_enabled, - thread_context_collector: build_thread_context_collector(build_stack_recorder) - ) + described_class.new(**worker_settings) end def build_thread_context_collector(recorder) diff --git a/spec/datadog/profiling/collectors/dynamic_sampling_rate_spec.rb b/spec/datadog/profiling/collectors/dynamic_sampling_rate_spec.rb index 02ac2404f48..8faf38c8db7 100644 --- a/spec/datadog/profiling/collectors/dynamic_sampling_rate_spec.rb +++ b/spec/datadog/profiling/collectors/dynamic_sampling_rate_spec.rb @@ -4,18 +4,25 @@ RSpec.describe Datadog::Profiling::Collectors::DynamicSamplingRate do before { skip_if_profiling_not_supported(self) } + let(:max_overhead_target) { 2.0 } + describe 'dynamic_sampling_rate_after_sample' do let(:current_monotonic_wall_time_ns) { 123 } it 'sets the next_sample_after_monotonic_wall_time_ns based on the current timestamp and max overhead target' do - max_overhead_target = 2.0 # WALL_TIME_OVERHEAD_TARGET_PERCENTAGE sampling_time_ns = 456 # The idea here is -- if sampling_time_ns is 2% of the time we spend working, how much is the 98% we should spend # sleeping? expected_time_to_sleep = sampling_time_ns * ((100 - max_overhead_target) / max_overhead_target) - expect(described_class::Testing._native_after_sample(current_monotonic_wall_time_ns, sampling_time_ns)) + expect( + described_class::Testing._native_after_sample( + max_overhead_target, + current_monotonic_wall_time_ns, + sampling_time_ns + ) + ) .to be(current_monotonic_wall_time_ns + expected_time_to_sleep.to_i) end @@ -24,7 +31,13 @@ max_time_until_next_sample_ns = 10_000_000_000 # MAX_TIME_UNTIL_NEXT_SAMPLE_NS sampling_time_ns = 60_000_000_000 - expect(described_class::Testing._native_after_sample(current_monotonic_wall_time_ns, sampling_time_ns)) + expect( + described_class::Testing._native_after_sample( + max_overhead_target, + current_monotonic_wall_time_ns, + sampling_time_ns + ) + ) .to be(current_monotonic_wall_time_ns + max_time_until_next_sample_ns) end end @@ -34,7 +47,11 @@ let(:next_sample_after_monotonic_wall_time_ns) { 10 } subject(:dynamic_sampling_rate_should_sample) do - described_class::Testing._native_should_sample(next_sample_after_monotonic_wall_time_ns, wall_time_ns_before_sample) + described_class::Testing._native_should_sample( + max_overhead_target, + next_sample_after_monotonic_wall_time_ns, + wall_time_ns_before_sample + ) end context 'when wall_time_ns_before_sample is before next_sample_after_monotonic_wall_time_ns' do @@ -52,7 +69,11 @@ let(:next_sample_after_monotonic_wall_time_ns) { 1_000_000_000 } subject(:dynamic_sampling_rate_get_sleep) do - described_class::Testing._native_get_sleep(next_sample_after_monotonic_wall_time_ns, current_monotonic_wall_time_ns) + described_class::Testing._native_get_sleep( + max_overhead_target, + next_sample_after_monotonic_wall_time_ns, + current_monotonic_wall_time_ns + ) end context 'when current_monotonic_wall_time_ns is before next_sample_after_monotonic_wall_time_ns' do diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index 7c64aa27f21..7d7a28d4931 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -73,12 +73,17 @@ it 'initializes a CpuAndWallTimeWorker collector' do expect(described_class).to receive(:no_signals_workaround_enabled?).and_return(:no_signals_result) + expect(settings.profiling.advanced).to receive(:overhead_target_percentage) + .and_return(:overhead_target_percentage_config) + expect(described_class).to receive(:valid_overhead_target) + .with(:overhead_target_percentage_config).and_return(:overhead_target_percentage_config) expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with( gc_profiling_enabled: anything, allocation_counting_enabled: anything, no_signals_workaround_enabled: :no_signals_result, thread_context_collector: instance_of(Datadog::Profiling::Collectors::ThreadContext), + dynamic_sampling_rate_overhead_target_percentage: :overhead_target_percentage_config, allocation_sample_every: 0, ) @@ -236,6 +241,26 @@ build_profiler_component end + context 'when upload_period_seconds is below 60 seconds' do + before { settings.profiling.advanced.upload_period_seconds = 59 } + + it 'ignores this setting and creates a scheduler with an interval of 60 seconds' do + expect(Datadog::Profiling::Scheduler).to receive(:new).with(a_hash_including(interval: 60)) + + build_profiler_component + end + end + + context 'when upload_period_seconds is over 60 seconds' do + before { settings.profiling.advanced.upload_period_seconds = 61 } + + it 'creates a scheduler with the given interval' do + expect(Datadog::Profiling::Scheduler).to receive(:new).with(a_hash_including(interval: 61)) + + build_profiler_component + end + end + it 'initializes the exporter with a code provenance collector' do expect(Datadog::Profiling::Exporter).to receive(:new) do |code_provenance_collector:, **_| expect(code_provenance_collector).to be_a_kind_of(Datadog::Profiling::Collectors::CodeProvenance) @@ -280,6 +305,36 @@ end end + describe '.valid_overhead_target' do + subject(:valid_overhead_target) { described_class.send(:valid_overhead_target, overhead_target_percentage) } + + [0, 20.1].each do |invalid_value| + let(:overhead_target_percentage) { invalid_value } + + context "when overhead_target_percentage is invalid value (#{invalid_value})" do + it 'logs an error' do + expect(Datadog.logger).to receive(:error).with( + /Ignoring invalid value for profiling overhead_target_percentage/ + ) + + valid_overhead_target + end + + it 'falls back to the default value' do + expect(valid_overhead_target).to eq 2.0 + end + end + end + + context 'when overhead_target_percentage is valid' do + let(:overhead_target_percentage) { 1.5 } + + it 'returns the value' do + expect(valid_overhead_target).to eq 1.5 + end + end + end + describe '.no_signals_workaround_enabled?' do subject(:no_signals_workaround_enabled?) { described_class.send(:no_signals_workaround_enabled?, settings) } diff --git a/spec/datadog/profiling/scheduler_spec.rb b/spec/datadog/profiling/scheduler_spec.rb index 1c8b51b3ec8..86aacb26246 100644 --- a/spec/datadog/profiling/scheduler_spec.rb +++ b/spec/datadog/profiling/scheduler_spec.rb @@ -8,9 +8,10 @@ let(:described_class) { Datadog::Profiling::Scheduler } let(:exporter) { instance_double(Datadog::Profiling::Exporter) } let(:transport) { instance_double(Datadog::Profiling::HttpTransport) } + let(:interval) { 60 } # seconds let(:options) { {} } - subject(:scheduler) { described_class.new(exporter: exporter, transport: transport, **options) } + subject(:scheduler) { described_class.new(exporter: exporter, transport: transport, interval: interval, **options) } describe '.new' do describe 'default settings' do @@ -147,7 +148,7 @@ it 'changes its wait interval after flushing' do expect(scheduler).to receive(:loop_wait_time=) do |value| - expected_interval = described_class.const_get(:DEFAULT_INTERVAL_SECONDS) - flush_time + expected_interval = interval - flush_time expect(value).to be <= expected_interval end