Skip to content

Commit

Permalink
DEBUG-2334 Dynamic Instrumentation code tracker component (#3942)
Browse files Browse the repository at this point in the history
  • Loading branch information
p-datadog authored Oct 2, 2024
1 parent 45d8f51 commit 1f1afc1
Show file tree
Hide file tree
Showing 11 changed files with 379 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ end

group :check do
if RUBY_VERSION >= '3.0.0' && RUBY_PLATFORM != 'java'
gem 'rbs', '~> 3.5.0', require: false
gem 'rbs', '~> 3.6', require: false
gem 'steep', '~> 1.7.0', require: false
end
gem 'ruby_memcheck', '>= 3' if RUBY_VERSION >= '3.4.0' && RUBY_PLATFORM != 'java'
Expand Down
168 changes: 168 additions & 0 deletions lib/datadog/di/code_tracker.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# frozen_string_literal: true

module Datadog
module DI
# Tracks loaded Ruby code by source file and maintains a map from
# source file to the loaded code (instruction sequences).
# Also arranges for code in the loaded files to be instrumented by
# line probes that have already been received by the library.
#
# The loaded code is used to target line trace points when installing
# line probes which dramatically improves efficiency of line trace points.
#
# Note that, since most files will only be loaded one time (via the
# "require" mechanism), the code tracker needs to be global and not be
# recreated when the DI component is created.
#
# @api private
class CodeTracker
def initialize
@registry = {}
@trace_point_lock = Mutex.new
@registry_lock = Mutex.new
@compiled_trace_point = nil
end

# Starts tracking loaded code.
#
# This method should generally be called early in application boot
# process, because any code loaded before code tracking is enabled
# will not be instrumentable via line probes.
#
# Normally tracking should remain active for the lifetime of the
# process and would not be ever stopped.
def start
trace_point_lock.synchronize do
# If this code tracker is already running, we can do nothing or
# restart it (by disabling the trace point and recreating it).
# It is likely that some applications will attempt to activate
# DI more than once where the intention is to just activate DI;
# do not break such applications by clearing out the registry.
# For now, until there is a use case for recreating the trace point,
# do nothing if the code tracker has already started.
return if @compiled_trace_point

# Note: .trace enables the trace point.
@compiled_trace_point = TracePoint.trace(:script_compiled) do |tp|
# Useful attributes of the trace point object here:
# .instruction_sequence
# .instruction_sequence.path (either absolute file path for
# loaded or required code, or for eval'd code, if filename
# is specified as argument to eval, then this is the provided
# filename, otherwise this is a synthesized
# "(eval at <definition-file>:<line>)" string)
# .instruction_sequence.absolute_path (absolute file path when
# load or require are used to load code, nil for eval'd code
# regardless of whether filename was specified as an argument
# to eval on ruby 3.1+, same as path for eval'd code on ruby 3.0
# and lower)
# .method_id
# .path (refers to the code location that called the require/eval/etc.,
# not where the loaded code is; use .path on the instruction sequence
# to obtain the location of the compiled code)
# .eval_script
#
# For now just map the path to the instruction sequence.
path = tp.instruction_sequence.absolute_path
# Do not store mapping for eval'd code, since there is no way
# to target such code from dynamic instrumentation UI.
# eval'd code always sets tp.eval_script.
# When tp.eval_script is nil, code is either 'load'ed or 'require'd.
# steep, of course, complains about indexing with +path+
# without checking that it is not nil, so here, maybe there is
# some situation where path would in fact be nil and
# steep would end up saving the day.
if path && !tp.eval_script
registry_lock.synchronize do
registry[path] = tp.instruction_sequence
end
end
end
end
end

# Returns whether this code tracker has been activated and is
# tracking.
def active?
trace_point_lock.synchronize do
!!@compiled_trace_point
end
end

# Returns an array of RubVM::InstructionSequence (i.e. the compiled code)
# for the provided path.
#
# The argument can be a full path to a Ruby source code file or a
# suffix (basename + one or more directories preceding the basename).
# The idea with suffix matches is that file paths are likely to
# be different between development and production environments and
# the source control system uses relative paths and doesn't have
# absolute paths at all.
#
# Suffix matches are not guaranteed to be correct, meaning there may
# be multiple files with the same basename and they may all match a
# given suffix. In such cases, this method will return all matching
# paths (and all of these paths will be attempted to be instrumented
# by upstream code).
#
# If the suffix matches one of the paths completely (which requires it
# to be an absolute path), only the exactly matching path is returned.
# Otherwise all known paths that end in the suffix are returned.
# If no paths match, an empty array is returned.
def iseqs_for_path(suffix)
registry_lock.synchronize do
exact = registry[suffix]
return [exact] if exact

inexact = []
registry.each do |path, iseq|
# Exact match is not possible here, meaning any matching path
# has to be longer than the suffix. Require full component matches,
# meaning either the first character of the suffix is a slash
# or the previous character in the path is a slash.
# For now only check for forward slashes for Unix-like OSes;
# backslash is a legitimate character of a file name in Unix
# therefore simply permitting forward or back slash is not
# sufficient, we need to perform an OS check to know which
# path separator to use.
if path.length > suffix.length && path.end_with?(suffix)
previous_char = path[path.length - suffix.length - 1]
inexact << iseq if previous_char == "/" || suffix[0] == "/"
end
end
inexact
end
end

# Stops tracking code that is being loaded.
#
# This method should ordinarily never be called - if a file is loaded
# when code tracking is not active, this file will not be instrumentable
# by line probes.
#
# This method is intended for test suite use only, where multiple
# code tracker instances are created, to fully clean up the old instances.
def stop
# Permit multiple stop calls.
trace_point_lock.synchronize do
@compiled_trace_point&.disable
# Clear the instance variable so that the trace point may be
# reinstated in the future.
@compiled_trace_point = nil
end
registry_lock.synchronize do
registry.clear
end
end

private

# Mapping from paths of loaded files to RubyVM::InstructionSequence
# objects representing compiled code of those files.
attr_reader :registry

attr_reader :trace_point_lock
attr_reader :registry_lock
end
end
end
23 changes: 23 additions & 0 deletions sig/datadog/di/code_tracker.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
module Datadog
module DI
class CodeTracker
@registry: Hash[String,RubyVM::InstructionSequence]

@lock: Thread::Mutex

@compiled_trace_point: TracePoint?

def initialize: () -> void

def start: () -> void
def active?: () -> bool
def iseqs_for_path: (String suffix) -> (::Array[RubyVM::InstructionSequence])
def stop: () -> void

private
attr_reader registry: Hash[String,RubyVM::InstructionSequence]
attr_reader trace_point_lock: Thread::Mutex
attr_reader registry_lock: Thread::Mutex
end
end
end
2 changes: 2 additions & 0 deletions spec/datadog/di/code_tracker_load_class.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class CodeTrackerLoadClass
end
2 changes: 2 additions & 0 deletions spec/datadog/di/code_tracker_require_class.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class CodeTrackerRequireClass
end
168 changes: 168 additions & 0 deletions spec/datadog/di/code_tracker_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
require "datadog/di/spec_helper"
require "datadog/di/code_tracker"

RSpec.describe Datadog::DI::CodeTracker do
di_test

let(:tracker) do
described_class.new
end

describe "#start" do
after do
tracker.stop
end

it "tracks loaded file" do
# The expectations appear to be lazy-loaded, therefore
# we need to invoke the same expectation before starting
# code tracking as we'll be using later in the test.
expect(tracker.send(:registry)).to be_empty
tracker.start
# Should still be empty here.
expect(tracker.send(:registry)).to be_empty
load File.join(File.dirname(__FILE__), "code_tracker_load_class.rb")
expect(tracker.send(:registry).length).to eq(1)

path = tracker.send(:registry).to_a.dig(0, 0)
# The path in the registry should be absolute.
expect(path[0]).to eq "/"
# The full path is dependent on the environment/system
# running the tests, but we can assert on the basename
# which will be the same.
expect(File.basename(path)).to eq("code_tracker_load_class.rb")
# And, we should in fact have a full path.
expect(path).to start_with("/")
end

it "tracks required file" do
# The expectations appear to be lazy-loaded, therefore
# we need to invoke the same expectation before starting
# code tracking as we'll be using later in the test.
expect(tracker.send(:registry)).to be_empty
tracker.start
# Should still be empty here.
expect(tracker.send(:registry)).to be_empty
require_relative "code_tracker_require_class"
expect(tracker.send(:registry).length).to eq(1)

path = tracker.send(:registry).to_a.dig(0, 0)
# The path in the registry should be absolute.
expect(path[0]).to eq "/"
# The full path is dependent on the environment/system
# running the tests, but we can assert on the basename
# which will be the same.
expect(File.basename(path)).to eq("code_tracker_require_class.rb")
# And, we should in fact have a full path.
expect(path).to start_with("/")
end

context "eval without location" do
it "does not track eval'd code" do
# The expectations appear to be lazy-loaded, therefore
# we need to invoke the same expectation before starting
# code tracking as we'll be using later in the test.
expect(tracker.send(:registry)).to be_empty
tracker.start
# Should still be empty here.
expect(tracker.send(:registry)).to be_empty
eval "1 + 2" # standard:disable Style/EvalWithLocation
# Should still be empty here.
expect(tracker.send(:registry)).to be_empty
end
end

context "eval with location" do
it "does not track eval'd code" do
# The expectations appear to be lazy-loaded, therefore
# we need to invoke the same expectation before starting
# code tracking as we'll be using later in the test.
expect(tracker.send(:registry)).to be_empty
tracker.start
# Should still be empty here.
expect(tracker.send(:registry)).to be_empty
eval "1 + 2", nil, __FILE__, __LINE__
# Should still be empty here.
expect(tracker.send(:registry)).to be_empty
end
end
end

describe "#active?" do
context "when started" do
before do
tracker.start
end

after do
tracker.stop
end

it "is true" do
expect(tracker.active?).to be true
end
end

context "when stopped" do
before do
tracker.start
tracker.stop
end

it "is false" do
expect(tracker.active?).to be false
end
end
end

describe "#iseqs_for_path" do
around do |example|
tracker.start

load File.join(File.dirname(__FILE__), "code_tracker_test_class_1.rb")
load File.join(File.dirname(__FILE__), "code_tracker_test_class_2.rb")
load File.join(File.dirname(__FILE__), "code_tracker_test_class_3.rb")
load File.join(File.dirname(__FILE__), "code_tracker_test_classes", "code_tracker_test_class_1.rb")
expect(tracker.send(:registry).each.to_a.length).to eq(4)

# To be able to assert on the registry, replace values (iseqs)
# with the keys.
(registry = tracker.send(:registry)).each do |k, v|
registry[k] = k
end

example.run

tracker.stop
end

context "exact match for full path" do
let(:path) do
File.join(File.dirname(__FILE__), "code_tracker_test_class_1.rb")
end

it "returns the exact match only" do
expect(tracker.iseqs_for_path(path)).to eq([path])
end
end

context "basename match" do
let(:expected) do
[
File.join(File.dirname(__FILE__), "code_tracker_test_class_1.rb"),
File.join(File.dirname(__FILE__), "code_tracker_test_classes", "code_tracker_test_class_1.rb"),
]
end

it "returns the exact match only" do
expect(tracker.iseqs_for_path("code_tracker_test_class_1.rb")).to eq(expected)
end
end

context "match not on path component boundary" do
it "returns no matches" do
expect(tracker.iseqs_for_path("1.rb")).to eq([])
end
end
end
end
2 changes: 2 additions & 0 deletions spec/datadog/di/code_tracker_test_class_1.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class CodeTrackerTestClass1
end
2 changes: 2 additions & 0 deletions spec/datadog/di/code_tracker_test_class_2.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class CodeTrackerTestClass2
end
2 changes: 2 additions & 0 deletions spec/datadog/di/code_tracker_test_class_3.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class CodeTrackerTestClass3
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Different name to not conflict with the upper-level class definition.
# Note that file basenames need to be identical for some of the test cases.
class SubdirCodeTrackerTestClass1
end
Loading

0 comments on commit 1f1afc1

Please sign in to comment.