Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PROF-9469] Skip heap samples with age 0 #3573

Merged
merged 5 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions benchmarks/profiler_memory_sample_serialize.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def setup
@heap_size_enabled = ENV['HEAP_SIZE'] == 'true'
@heap_sample_every = (ENV['HEAP_SAMPLE_EVERY'] || '1').to_i
@retain_every = (ENV['RETAIN_EVERY'] || '10').to_i
@skip_end_gc = ENV['SKIP_END_GC'] == 'true'
@recorder_factory = proc {
Datadog::Profiling::StackRecorder.new(
cpu_time_enabled: false,
Expand All @@ -68,7 +69,7 @@ def run_benchmark
suite: report_to_dogstatsd_if_enabled_via_environment_variable(benchmark_name: 'profiler_memory_sample_serialize')
)

x.report("sample+serialize #{ENV['CONFIG']} retain_every=#{@retain_every} heap_samples=#{@heap_samples_enabled} heap_size=#{@heap_size_enabled} heap_sample_every=#{@heap_sample_every}") do
x.report("sample+serialize #{ENV['CONFIG']} retain_every=#{@retain_every} heap_samples=#{@heap_samples_enabled} heap_size=#{@heap_size_enabled} heap_sample_every=#{@heap_sample_every} skip_end_gc=#{@skip_end_gc}") do
recorder = @recorder_factory.call
samples_per_second = 100
simulate_seconds = 60
Expand All @@ -79,7 +80,7 @@ def run_benchmark
retained_objs << obj if (i % @retain_every).zero?
end

GC.start
GC.start unless @skip_end_gc

recorder.serialize
end
Expand Down
46 changes: 43 additions & 3 deletions ext/datadog_profiling_native_extension/heap_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@
#define CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
#endif

// Minimum age (in GC generations) of heap objects we want to include in heap
// recorder iterations. Object with age 0 represent objects that have yet to undergo
// a GC and, thus, may just be noise/trash at instant of iteration and are usually not
// relevant for heap profiles as the great majority should be trivially reclaimed
// during the next GC.
#define ITERATION_MIN_AGE 1

// Check to see if an object should not be included in a heap recorder iteration.
// This centralizes the checking logic to ensure it's equally applied between
// preparation and iteration codepaths.
#define NOT_INCLUDED_IN_ITERATION(obj_record) obj_record->object_data.gen_age < ITERATION_MIN_AGE
AlexJF marked this conversation as resolved.
Show resolved Hide resolved

// A compact representation of a stacktrace frame for a heap allocation.
typedef struct {
char *name;
Expand Down Expand Up @@ -137,6 +149,11 @@ struct heap_recorder {
// mutation of the data so iteration can occur without acquiring a lock.
// NOTE: Contrary to object_records, this table has no ownership of its data.
st_table *object_records_snapshot;
// The GC gen/epoch/count in which we prepared the current iteration.
//
// This enables us to calculate the age of iterated objects in the above snapshot by
// comparing it against an object's alloc_gen.
size_t iteration_gen;

// Data for a heap recording that was started but not yet ended
recording active_recording;
Expand Down Expand Up @@ -353,6 +370,8 @@ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) {
return;
}

heap_recorder->iteration_gen = rb_gc_count();

if (heap_recorder->object_records_snapshot != NULL) {
// we could trivially handle this but we raise to highlight and catch unexpected usages.
rb_raise(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished.");
AlexJF marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -466,6 +485,19 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext

VALUE ref;

size_t iteration_gen = recorder->iteration_gen;
size_t alloc_gen = record->object_data.alloc_gen;
// Guard against potential overflows given unsigned types here.
record->object_data.gen_age = alloc_gen < iteration_gen ? iteration_gen - alloc_gen : 0;

if (NOT_INCLUDED_IN_ITERATION(record)) {
// If an object won't be included in the current iteration, there's
// no point checking for liveness or updating its size, so exit early.
// NOTE: This means that there should be an equivalent check during actual
// iteration otherwise we'd iterate/expose stale object data.
return ST_CONTINUE;
}

if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) {
// Id no longer associated with a valid ref. Need to delete this object record!
on_committed_object_record_cleanup(recorder, record);
Expand Down Expand Up @@ -525,8 +557,16 @@ static int st_object_records_iterate(DDTRACE_UNUSED st_data_t key, st_data_t val
const heap_stack *stack = record->heap_record->stack;
iteration_context *context = (iteration_context*) extra;

ddog_prof_Location *locations = context->heap_recorder->reusable_locations;
const heap_recorder *recorder = context->heap_recorder;

if (NOT_INCLUDED_IN_ITERATION(record)) {
// Skip objects that should not be included in iteration
// NOTE: This matches the short-circuiting condition in st_object_record_update
// and prevents iteration over stale objects.
return ST_CONTINUE;
}

ddog_prof_Location *locations = recorder->reusable_locations;
for (uint16_t i = 0; i < stack->frames_len; i++) {
const heap_frame *frame = &stack->frames[i];
ddog_prof_Location *location = &locations[i];
Expand Down Expand Up @@ -725,9 +765,9 @@ void object_record_free(object_record *record) {

VALUE object_record_inspect(object_record *record) {
heap_frame top_frame = record->heap_record->stack->frames[0];
VALUE inspect = rb_sprintf("obj_id=%ld weight=%d size=%zu location=%s:%d alloc_gen=%zu ",
VALUE inspect = rb_sprintf("obj_id=%ld weight=%d size=%zu location=%s:%d alloc_gen=%zu gen_age=%zu ",
record->obj_id, record->object_data.weight, record->object_data.size, top_frame.filename,
(int) top_frame.line, record->object_data.alloc_gen);
(int) top_frame.line, record->object_data.alloc_gen, record->object_data.gen_age);

const char *class = record->object_data.class;
if (class != NULL) {
Expand Down
8 changes: 7 additions & 1 deletion ext/datadog_profiling_native_extension/heap_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ typedef struct live_object_data {
// could be seen as being representative of 50 objects.
unsigned int weight;

// Size of this object on last flush/update.
// Size of this object in memory.
// NOTE: This only gets updated during heap_recorder_prepare_iteration and only
// for those objects that meet the minimum iteration age requirements.
size_t size;

// The class of the object that we're tracking.
Expand All @@ -39,6 +41,10 @@ typedef struct live_object_data {
// This enables us to calculate the age of this object in terms of GC executions.
size_t alloc_gen;

// The age of this object in terms of GC generations.
// NOTE: This only gets updated during heap_recorder_prepare_iteration
size_t gen_age;

// Whether this object was previously seen as being frozen. If this is the case,
// we'll skip any further size updates since frozen objects are supposed to be
// immutable.
Expand Down
11 changes: 3 additions & 8 deletions ext/datadog_profiling_native_extension/stack_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ struct call_serialize_without_gvl_arguments {
// Set by caller
struct stack_recorder_state *state;
ddog_Timespec finish_timestamp;
size_t gc_count_before_serialize;

// Set by callee
ddog_prof_Profile *profile;
Expand Down Expand Up @@ -489,7 +488,6 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
struct call_serialize_without_gvl_arguments args = {
.state = state,
.finish_timestamp = finish_timestamp,
.gc_count_before_serialize = rb_gc_count(),
.serialize_ran = false
};

Expand Down Expand Up @@ -613,8 +611,6 @@ typedef struct heap_recorder_iteration_context {

bool error;
char error_msg[MAX_LEN_HEAP_ITERATION_ERROR_MSG];

size_t profile_gen;
} heap_recorder_iteration_context;

static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) {
Expand Down Expand Up @@ -643,7 +639,7 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
}
labels[label_offset++] = (ddog_prof_Label) {
.key = DDOG_CHARSLICE_C("gc gen age"),
.num = context->profile_gen - object_data->alloc_gen,
.num = object_data->gen_age,
};

ddog_prof_Profile_Result result = ddog_prof_Profile_add(
Expand All @@ -670,13 +666,12 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
return true;
}

static void build_heap_profile_without_gvl(struct stack_recorder_state *state, ddog_prof_Profile *profile, size_t gc_count_before_serialize) {
static void build_heap_profile_without_gvl(struct stack_recorder_state *state, ddog_prof_Profile *profile) {
heap_recorder_iteration_context iteration_context = {
.state = state,
.profile = profile,
.error = false,
.error_msg = {0},
.profile_gen = gc_count_before_serialize,
};
bool iterated = heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context);
// We wait until we're out of the iteration to grab the gvl and raise. This is important because during
Expand All @@ -698,7 +693,7 @@ static void *call_serialize_without_gvl(void *call_args) {

// Now that we have the inactive profile with all but heap samples, lets fill it with heap data
// without needing to race with the active sampler
build_heap_profile_without_gvl(args->state, args->profile, args->gc_count_before_serialize);
build_heap_profile_without_gvl(args->state, args->profile);

// Note: The profile gets reset by the serialize call
args->result = ddog_prof_Profile_serialize(args->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,10 @@
test_num_allocated_object.times { |i| live_objects[i] = CpuAndWallTimeWorkerSpec::TestStruct.new }
allocation_line = __LINE__ - 1

# Force a GC to happen here to ensure all the live_objects have age > 0.
# Otherwise they wouldn't show up in the serialized pprof below
GC.start

cpu_and_wall_time_worker.stop

test_struct_heap_sample = lambda { |sample|
Expand Down
46 changes: 46 additions & 0 deletions spec/datadog/profiling/stack_recorder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,48 @@ def sample_allocation(obj)
expect(summed_values).to eq(expected_summed_values)
end

it 'does not include samples with age = 0' do
test_num_allocated_objects = 123
test_num_age_bigger_0 = 57
live_objects = Array.new(test_num_allocated_objects)

allocator_proc = proc { |i|
live_objects[i] = "this is string number #{i}"
sample_allocation(live_objects[i])
}

sample_line = __LINE__ - 3

# First allocate a bunch of objects with age > 0. We expect to
# see these at the end
test_num_age_bigger_0.times(&allocator_proc)
# Force the above allocations to have gc age > 0
GC.start

begin
# Need to disable GC during this entire stretch to ensure rb_gc_count is
# the same between sample_allocation and pprof serialization.
GC.disable

# Allocate another set of objects that will necessarily have age = 0 since
# we disabled GC immediate before and will only enable it at test's end.
(test_num_age_bigger_0..test_num_allocated_objects).each(&allocator_proc)

# Grab all exported heap samples and sum their values
sum_exported_heap_samples = heap_samples
.select { |s| s.has_location?(path: __FILE__, line: sample_line) }
.map { |s| s.values[:'heap-live-samples'] }
.reduce(:+)

# Multiply expectation by sample_rate to be able to compare with weighted samples
# We expect total exported sum to match the weighted samples with age > 0
expect(sum_exported_heap_samples).to be test_num_age_bigger_0 * sample_rate
ensure
# Whatever happens, make sure we reenable GC
GC.enable
end
end

it "aren't lost when they happen concurrently with a long serialization" do
described_class::Testing._native_start_fake_slow_heap_serialization(stack_recorder)

Expand All @@ -525,6 +567,8 @@ def sample_allocation(obj)

described_class::Testing._native_end_fake_slow_heap_serialization(stack_recorder)

GC.start # Force a GC so the live_objects above have age > 0 and show up in heap samples

relevant_sample = heap_samples.find { |s| s.has_location?(path: __FILE__, line: sample_line) }
expect(relevant_sample).not_to be nil
expect(relevant_sample.values[:'heap-live-samples']).to eq test_num_allocated_object * sample_rate
Expand Down Expand Up @@ -642,6 +686,8 @@ def create_obj_in_recycled_slot(should_sample_original: false)
sample_allocation(recycled_obj)
sample_line = __LINE__ - 1

GC.start # Ensure recycled sample has age > 0 so it shows up in serialized profile

recycled_sample = heap_samples.find { |s| s.has_location?(path: __FILE__, line: sample_line) }
expect(recycled_sample).not_to be nil
end
Expand Down
Loading