diff --git a/Gemfile b/Gemfile index de40709fa8f..3e123675675 100644 --- a/Gemfile +++ b/Gemfile @@ -82,7 +82,7 @@ end group :check do if RUBY_VERSION >= '3.0.0' && RUBY_PLATFORM != 'java' - gem 'rbs', '~> 3.5.0', require: false + gem 'rbs', '~> 3.6', require: false gem 'steep', '~> 1.7.0', require: false end gem 'ruby_memcheck', '>= 3' if RUBY_VERSION >= '3.4.0' && RUBY_PLATFORM != 'java' diff --git a/lib/datadog/di/code_tracker.rb b/lib/datadog/di/code_tracker.rb new file mode 100644 index 00000000000..dbfe67a51b7 --- /dev/null +++ b/lib/datadog/di/code_tracker.rb @@ -0,0 +1,168 @@ +# frozen_string_literal: true + +module Datadog + module DI + # Tracks loaded Ruby code by source file and maintains a map from + # source file to the loaded code (instruction sequences). + # Also arranges for code in the loaded files to be instrumented by + # line probes that have already been received by the library. + # + # The loaded code is used to target line trace points when installing + # line probes which dramatically improves efficiency of line trace points. + # + # Note that, since most files will only be loaded one time (via the + # "require" mechanism), the code tracker needs to be global and not be + # recreated when the DI component is created. + # + # @api private + class CodeTracker + def initialize + @registry = {} + @trace_point_lock = Mutex.new + @registry_lock = Mutex.new + @compiled_trace_point = nil + end + + # Starts tracking loaded code. + # + # This method should generally be called early in application boot + # process, because any code loaded before code tracking is enabled + # will not be instrumentable via line probes. + # + # Normally tracking should remain active for the lifetime of the + # process and would not be ever stopped. + def start + trace_point_lock.synchronize do + # If this code tracker is already running, we can do nothing or + # restart it (by disabling the trace point and recreating it). + # It is likely that some applications will attempt to activate + # DI more than once where the intention is to just activate DI; + # do not break such applications by clearing out the registry. + # For now, until there is a use case for recreating the trace point, + # do nothing if the code tracker has already started. + return if @compiled_trace_point + + # Note: .trace enables the trace point. + @compiled_trace_point = TracePoint.trace(:script_compiled) do |tp| + # Useful attributes of the trace point object here: + # .instruction_sequence + # .instruction_sequence.path (either absolute file path for + # loaded or required code, or for eval'd code, if filename + # is specified as argument to eval, then this is the provided + # filename, otherwise this is a synthesized + # "(eval at :)" string) + # .instruction_sequence.absolute_path (absolute file path when + # load or require are used to load code, nil for eval'd code + # regardless of whether filename was specified as an argument + # to eval on ruby 3.1+, same as path for eval'd code on ruby 3.0 + # and lower) + # .method_id + # .path (refers to the code location that called the require/eval/etc., + # not where the loaded code is; use .path on the instruction sequence + # to obtain the location of the compiled code) + # .eval_script + # + # For now just map the path to the instruction sequence. + path = tp.instruction_sequence.absolute_path + # Do not store mapping for eval'd code, since there is no way + # to target such code from dynamic instrumentation UI. + # eval'd code always sets tp.eval_script. + # When tp.eval_script is nil, code is either 'load'ed or 'require'd. + # steep, of course, complains about indexing with +path+ + # without checking that it is not nil, so here, maybe there is + # some situation where path would in fact be nil and + # steep would end up saving the day. + if path && !tp.eval_script + registry_lock.synchronize do + registry[path] = tp.instruction_sequence + end + end + end + end + end + + # Returns whether this code tracker has been activated and is + # tracking. + def active? + trace_point_lock.synchronize do + !!@compiled_trace_point + end + end + + # Returns an array of RubVM::InstructionSequence (i.e. the compiled code) + # for the provided path. + # + # The argument can be a full path to a Ruby source code file or a + # suffix (basename + one or more directories preceding the basename). + # The idea with suffix matches is that file paths are likely to + # be different between development and production environments and + # the source control system uses relative paths and doesn't have + # absolute paths at all. + # + # Suffix matches are not guaranteed to be correct, meaning there may + # be multiple files with the same basename and they may all match a + # given suffix. In such cases, this method will return all matching + # paths (and all of these paths will be attempted to be instrumented + # by upstream code). + # + # If the suffix matches one of the paths completely (which requires it + # to be an absolute path), only the exactly matching path is returned. + # Otherwise all known paths that end in the suffix are returned. + # If no paths match, an empty array is returned. + def iseqs_for_path(suffix) + registry_lock.synchronize do + exact = registry[suffix] + return [exact] if exact + + inexact = [] + registry.each do |path, iseq| + # Exact match is not possible here, meaning any matching path + # has to be longer than the suffix. Require full component matches, + # meaning either the first character of the suffix is a slash + # or the previous character in the path is a slash. + # For now only check for forward slashes for Unix-like OSes; + # backslash is a legitimate character of a file name in Unix + # therefore simply permitting forward or back slash is not + # sufficient, we need to perform an OS check to know which + # path separator to use. + if path.length > suffix.length && path.end_with?(suffix) + previous_char = path[path.length - suffix.length - 1] + inexact << iseq if previous_char == "/" || suffix[0] == "/" + end + end + inexact + end + end + + # Stops tracking code that is being loaded. + # + # This method should ordinarily never be called - if a file is loaded + # when code tracking is not active, this file will not be instrumentable + # by line probes. + # + # This method is intended for test suite use only, where multiple + # code tracker instances are created, to fully clean up the old instances. + def stop + # Permit multiple stop calls. + trace_point_lock.synchronize do + @compiled_trace_point&.disable + # Clear the instance variable so that the trace point may be + # reinstated in the future. + @compiled_trace_point = nil + end + registry_lock.synchronize do + registry.clear + end + end + + private + + # Mapping from paths of loaded files to RubyVM::InstructionSequence + # objects representing compiled code of those files. + attr_reader :registry + + attr_reader :trace_point_lock + attr_reader :registry_lock + end + end +end diff --git a/sig/datadog/di/code_tracker.rbs b/sig/datadog/di/code_tracker.rbs new file mode 100644 index 00000000000..57b9534667b --- /dev/null +++ b/sig/datadog/di/code_tracker.rbs @@ -0,0 +1,23 @@ +module Datadog + module DI + class CodeTracker + @registry: Hash[String,RubyVM::InstructionSequence] + + @lock: Thread::Mutex + + @compiled_trace_point: TracePoint? + + def initialize: () -> void + + def start: () -> void + def active?: () -> bool + def iseqs_for_path: (String suffix) -> (::Array[RubyVM::InstructionSequence]) + def stop: () -> void + + private + attr_reader registry: Hash[String,RubyVM::InstructionSequence] + attr_reader trace_point_lock: Thread::Mutex + attr_reader registry_lock: Thread::Mutex + end + end +end diff --git a/spec/datadog/di/code_tracker_load_class.rb b/spec/datadog/di/code_tracker_load_class.rb new file mode 100644 index 00000000000..ebc4f6f4c20 --- /dev/null +++ b/spec/datadog/di/code_tracker_load_class.rb @@ -0,0 +1,2 @@ +class CodeTrackerLoadClass +end diff --git a/spec/datadog/di/code_tracker_require_class.rb b/spec/datadog/di/code_tracker_require_class.rb new file mode 100644 index 00000000000..915b8b174f6 --- /dev/null +++ b/spec/datadog/di/code_tracker_require_class.rb @@ -0,0 +1,2 @@ +class CodeTrackerRequireClass +end diff --git a/spec/datadog/di/code_tracker_spec.rb b/spec/datadog/di/code_tracker_spec.rb new file mode 100644 index 00000000000..2ccca9e2244 --- /dev/null +++ b/spec/datadog/di/code_tracker_spec.rb @@ -0,0 +1,168 @@ +require "datadog/di/spec_helper" +require "datadog/di/code_tracker" + +RSpec.describe Datadog::DI::CodeTracker do + di_test + + let(:tracker) do + described_class.new + end + + describe "#start" do + after do + tracker.stop + end + + it "tracks loaded file" do + # The expectations appear to be lazy-loaded, therefore + # we need to invoke the same expectation before starting + # code tracking as we'll be using later in the test. + expect(tracker.send(:registry)).to be_empty + tracker.start + # Should still be empty here. + expect(tracker.send(:registry)).to be_empty + load File.join(File.dirname(__FILE__), "code_tracker_load_class.rb") + expect(tracker.send(:registry).length).to eq(1) + + path = tracker.send(:registry).to_a.dig(0, 0) + # The path in the registry should be absolute. + expect(path[0]).to eq "/" + # The full path is dependent on the environment/system + # running the tests, but we can assert on the basename + # which will be the same. + expect(File.basename(path)).to eq("code_tracker_load_class.rb") + # And, we should in fact have a full path. + expect(path).to start_with("/") + end + + it "tracks required file" do + # The expectations appear to be lazy-loaded, therefore + # we need to invoke the same expectation before starting + # code tracking as we'll be using later in the test. + expect(tracker.send(:registry)).to be_empty + tracker.start + # Should still be empty here. + expect(tracker.send(:registry)).to be_empty + require_relative "code_tracker_require_class" + expect(tracker.send(:registry).length).to eq(1) + + path = tracker.send(:registry).to_a.dig(0, 0) + # The path in the registry should be absolute. + expect(path[0]).to eq "/" + # The full path is dependent on the environment/system + # running the tests, but we can assert on the basename + # which will be the same. + expect(File.basename(path)).to eq("code_tracker_require_class.rb") + # And, we should in fact have a full path. + expect(path).to start_with("/") + end + + context "eval without location" do + it "does not track eval'd code" do + # The expectations appear to be lazy-loaded, therefore + # we need to invoke the same expectation before starting + # code tracking as we'll be using later in the test. + expect(tracker.send(:registry)).to be_empty + tracker.start + # Should still be empty here. + expect(tracker.send(:registry)).to be_empty + eval "1 + 2" # standard:disable Style/EvalWithLocation + # Should still be empty here. + expect(tracker.send(:registry)).to be_empty + end + end + + context "eval with location" do + it "does not track eval'd code" do + # The expectations appear to be lazy-loaded, therefore + # we need to invoke the same expectation before starting + # code tracking as we'll be using later in the test. + expect(tracker.send(:registry)).to be_empty + tracker.start + # Should still be empty here. + expect(tracker.send(:registry)).to be_empty + eval "1 + 2", nil, __FILE__, __LINE__ + # Should still be empty here. + expect(tracker.send(:registry)).to be_empty + end + end + end + + describe "#active?" do + context "when started" do + before do + tracker.start + end + + after do + tracker.stop + end + + it "is true" do + expect(tracker.active?).to be true + end + end + + context "when stopped" do + before do + tracker.start + tracker.stop + end + + it "is false" do + expect(tracker.active?).to be false + end + end + end + + describe "#iseqs_for_path" do + around do |example| + tracker.start + + load File.join(File.dirname(__FILE__), "code_tracker_test_class_1.rb") + load File.join(File.dirname(__FILE__), "code_tracker_test_class_2.rb") + load File.join(File.dirname(__FILE__), "code_tracker_test_class_3.rb") + load File.join(File.dirname(__FILE__), "code_tracker_test_classes", "code_tracker_test_class_1.rb") + expect(tracker.send(:registry).each.to_a.length).to eq(4) + + # To be able to assert on the registry, replace values (iseqs) + # with the keys. + (registry = tracker.send(:registry)).each do |k, v| + registry[k] = k + end + + example.run + + tracker.stop + end + + context "exact match for full path" do + let(:path) do + File.join(File.dirname(__FILE__), "code_tracker_test_class_1.rb") + end + + it "returns the exact match only" do + expect(tracker.iseqs_for_path(path)).to eq([path]) + end + end + + context "basename match" do + let(:expected) do + [ + File.join(File.dirname(__FILE__), "code_tracker_test_class_1.rb"), + File.join(File.dirname(__FILE__), "code_tracker_test_classes", "code_tracker_test_class_1.rb"), + ] + end + + it "returns the exact match only" do + expect(tracker.iseqs_for_path("code_tracker_test_class_1.rb")).to eq(expected) + end + end + + context "match not on path component boundary" do + it "returns no matches" do + expect(tracker.iseqs_for_path("1.rb")).to eq([]) + end + end + end +end diff --git a/spec/datadog/di/code_tracker_test_class_1.rb b/spec/datadog/di/code_tracker_test_class_1.rb new file mode 100644 index 00000000000..60f4b30f80c --- /dev/null +++ b/spec/datadog/di/code_tracker_test_class_1.rb @@ -0,0 +1,2 @@ +class CodeTrackerTestClass1 +end diff --git a/spec/datadog/di/code_tracker_test_class_2.rb b/spec/datadog/di/code_tracker_test_class_2.rb new file mode 100644 index 00000000000..c99b1367fd5 --- /dev/null +++ b/spec/datadog/di/code_tracker_test_class_2.rb @@ -0,0 +1,2 @@ +class CodeTrackerTestClass2 +end diff --git a/spec/datadog/di/code_tracker_test_class_3.rb b/spec/datadog/di/code_tracker_test_class_3.rb new file mode 100644 index 00000000000..eaa42bbe766 --- /dev/null +++ b/spec/datadog/di/code_tracker_test_class_3.rb @@ -0,0 +1,2 @@ +class CodeTrackerTestClass3 +end diff --git a/spec/datadog/di/code_tracker_test_classes/code_tracker_test_class_1.rb b/spec/datadog/di/code_tracker_test_classes/code_tracker_test_class_1.rb new file mode 100644 index 00000000000..27075513397 --- /dev/null +++ b/spec/datadog/di/code_tracker_test_classes/code_tracker_test_class_1.rb @@ -0,0 +1,4 @@ +# Different name to not conflict with the upper-level class definition. +# Note that file basenames need to be identical for some of the test cases. +class SubdirCodeTrackerTestClass1 +end diff --git a/spec/datadog/di/spec_helper.rb b/spec/datadog/di/spec_helper.rb index b8817efbb02..5fe045e4cea 100644 --- a/spec/datadog/di/spec_helper.rb +++ b/spec/datadog/di/spec_helper.rb @@ -6,6 +6,11 @@ def di_test skip "Dynamic instrumentation is not supported on JRuby" end end + if RUBY_VERSION < "2.6" + before(:all) do + skip "Dynamic instrumentation requires Ruby 2.6 or higher" + end + end end end end