From 9c4624444791a3bd3e3909653e135280bdeac53e Mon Sep 17 00:00:00 2001 From: orioldsm <150839949+orioldsm@users.noreply.github.com> Date: Thu, 26 Sep 2024 10:32:06 -0500 Subject: [PATCH] Track scheduled vs enqueued metrics (#34) --- README.md | 24 ++++--- lib/sidekiq/instrument/middleware/client.rb | 18 +++-- lib/sidekiq/instrument/version.rb | 2 +- .../client_middleware_spec.rb | 67 +++++++++++-------- 4 files changed, 67 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index f1bee8a..8364e00 100644 --- a/README.md +++ b/README.md @@ -75,16 +75,18 @@ Sidekiq::Instrument::WorkerMetrics.namespace = ## StatsD Keys For each job, the following metrics will be reported: -1. **shared.sidekiq._queue_._job_.enqueue**: counter incremented each time a +1. **shared.sidekiq._queue_._job_.schedule**: counter incremented each time a + job is scheduled to be pushed onto the queue later. +2. **shared.sidekiq._queue_._job_.enqueue**: counter incremented each time a job is pushed onto the queue. -2. **shared.sidekiq._queue_._job_.dequeue**: counter incremented just before +3. **shared.sidekiq._queue_._job_.dequeue**: counter incremented just before worker begins performing a job. -3. **shared.sidekiq._queue_._job_.runtime**: timer of the total time spent +4. **shared.sidekiq._queue_._job_.runtime**: timer of the total time spent in `perform`, in milliseconds. -4. **shared.sidekiq._queue_._job_.error**: counter incremented each time a +5. **shared.sidekiq._queue_._job_.error**: counter incremented each time a job fails. -For job retry attempts, the above 4 metrics will still be reported but the enqueue/dequeue metrics +For job retry attempts, metrics 2-5 will still be reported but the enqueue/dequeue metrics will have a `.retry` appended: 1. **shared.sidekiq._queue_._job_.enqueue.retry** @@ -103,13 +105,15 @@ For each worker, the following metrics and tags will be reported: ## DogStatsD Keys For each job, the following metrics and tags will be reported: -1. **sidekiq.enqueue (tags: {queue: _queue_, worker: _job_})**: counter incremented each time a +1. **sidekiq.schedule (tags: {queue: _queue_, worker: _job_})**: counter incremented each time a + job is scheduled to be pushed onto the queue later. +2. **sidekiq.enqueue (tags: {queue: _queue_, worker: _job_})**: counter incremented each time a job is pushed onto the queue. -2. **sidekiq.dequeue (tags: {queue: _queue_, worker: _job_})**: counter incremented just before +3. **sidekiq.dequeue (tags: {queue: _queue_, worker: _job_})**: counter incremented just before worker begins performing a job. -3. **sidekiq.runtime (tags: {queue: _queue_, worker: _job_})**: timer of the total time spent +4. **sidekiq.runtime (tags: {queue: _queue_, worker: _job_})**: timer of the total time spent in `perform`, in milliseconds. -4. **sidekiq.error (tags: {queue: _queue_, worker: _job_})**: counter incremented each time a +5. **sidekiq.error (tags: {queue: _queue_, worker: _job_, error: _errorclass_})**: counter incremented each time a job fails. For job retry attempts, the above 4 metrics will still be reported but the enqueue/dequeue metrics @@ -127,7 +131,7 @@ For each worker, the following metrics and tags will be reported: ## Worker -**WARNING: The metrics reported by this Worker are currently inaccurate.** +**WARNING: The Redis count metrics reported by this Worker are currently inaccurate.** There is a worker, `Sidekiq::Instrument::Worker`, that submits gauges for various interesting statistics; namely, the bulk of the information in `Sidekiq::Stats` diff --git a/lib/sidekiq/instrument/middleware/client.rb b/lib/sidekiq/instrument/middleware/client.rb index 6d9d8db..5dc0bb0 100644 --- a/lib/sidekiq/instrument/middleware/client.rb +++ b/lib/sidekiq/instrument/middleware/client.rb @@ -12,12 +12,18 @@ def call(worker_class, job, queue, _redis_pool) klass = Object.const_get(worker_class.to_s) class_instance = klass.new - # This is needed because the ClientMiddleware is called twice for scheduled jobs - # - Once when it gets scheduled - # - Once when it gets dequeued for processing - # We only want to increment the enqueue metric when the job is scheduled and - # Sidekiq::Context.current[:class] is only ever set when the job is scheduled - if Sidekiq::Context.current[:class].present? + # Depending on the type of perform called, this method can be hit either + # once or twice for the same Job ID. + # + # perform_async: + # - once when it is enqueued, with no job['at'] key + # perform_in: + # - once when it is scheduled, with job['at'] key + # - once when it is enqueued, without job['at'] key + if job['at'].present? + Statter.statsd.increment(metric_name(class_instance, 'schedule')) + Statter.dogstatsd&.increment('sidekiq.schedule', worker_dog_options(class_instance, job)) + else WorkerMetrics.trace_workers_increment_counter(klass.name.underscore) Statter.statsd.increment(metric_name(class_instance, 'enqueue')) Statter.dogstatsd&.increment('sidekiq.enqueue', worker_dog_options(class_instance, job)) diff --git a/lib/sidekiq/instrument/version.rb b/lib/sidekiq/instrument/version.rb index ed564bc..f3efa3b 100644 --- a/lib/sidekiq/instrument/version.rb +++ b/lib/sidekiq/instrument/version.rb @@ -1,5 +1,5 @@ module Sidekiq module Instrument - VERSION = '0.7.3' + VERSION = '0.7.4' end end diff --git a/spec/sidekiq-instrument/client_middleware_spec.rb b/spec/sidekiq-instrument/client_middleware_spec.rb index 8c2b0b3..beb38e6 100644 --- a/spec/sidekiq-instrument/client_middleware_spec.rb +++ b/spec/sidekiq-instrument/client_middleware_spec.rb @@ -28,11 +28,7 @@ end end - context 'with Sidekiq::Context.current[:class] (job being enqueued)' do - before do - Sidekiq::Context.current[:class] = 'MyWorker' - end - + context 'with nil job["at"] (perform_async)' do context 'without statsd_metric_name' do it 'increments the StatsD enqueue counter' do expect do @@ -93,36 +89,53 @@ end end - context 'without the Sidekiq::Context.current[:class] (job being dequeued)' do - before do - Sidekiq::Context.current[:class] = nil - end + context 'with job["at"] (perform_in)' do + context 'without statsd_metric_name' do + it 'increments the StatsD schedule counter' do + expect do + MyWorker.perform_in(1) + end.to trigger_statsd_increment('shared.sidekiq.default.MyWorker.schedule') + end - it 'does not increment the StatsD enqueue counter' do - expect do - MyWorker.perform_async - end.not_to trigger_statsd_increment('shared.sidekiq.default.MyWorker.enqueue') + it 'increments the DogStatsD schedule counter' do + expect( + Sidekiq::Instrument::Statter.dogstatsd + ).to receive(:increment).with('sidekiq.schedule', { tags: ['queue:default', 'worker:my_worker'] }).once + MyWorker.perform_in(1) + end + + context 'with additional tag(s)' do + it 'increments DogStatsD schedule counter with additional tag(s)' do + tag = 'test_worker' + + expect( + Sidekiq::Instrument::Statter.dogstatsd + ).to receive(:increment).with('sidekiq.schedule', { tags: ['queue:default', 'worker:my_worker', tag] }).once + MyWorker.set(tags: [tag]).perform_in(1) + end + end end - it 'does not increment the DogStatsD enqueue counter' do - expect( - Sidekiq::Instrument::Statter.dogstatsd - ).not_to receive(:increment).with('sidekiq.enqueue', { tags: ['queue:default', 'worker:my_worker'] }) - MyWorker.perform_async + context 'with statsd_metric_name' do + it 'increments the schedule counter' do + expect do + MyOtherWorker.perform_in(1) + end.to trigger_statsd_increment('my_other_worker.schedule') + end end - context 'with WorkerMetrics.enabled true' do + context 'without optional DogStatsD client' do before do - Redis.new.flushall - Redis.new.hset(worker_metric_name, 'my_other_worker', 0) + @tmp = Sidekiq::Instrument::Statter.dogstatsd + Sidekiq::Instrument::Statter.dogstatsd = nil end - it 'does not increment the in_queue counter' do - Sidekiq::Instrument::WorkerMetrics.enabled = true - MyOtherWorker.perform_async - expect(Redis.new.hget(worker_metric_name, 'my_other_worker')).to eq('0') - MyOtherWorker.perform_async - expect(Redis.new.hget(worker_metric_name, 'my_other_worker')).to eq('0') + after do + Sidekiq::Instrument::Statter.dogstatsd = @tmp + end + + it 'does not error' do + expect { MyWorker.perform_in(1) }.not_to raise_error end end end