From b9e7f5bd5f189b3c7c0fd2c2ff209550db74d3d3 Mon Sep 17 00:00:00 2001 From: Dale Hamel Date: Mon, 23 Mar 2026 15:45:16 -0400 Subject: [PATCH] Fix data race in GC frame recording between signal handler and postponed job stackprof_record_gc_samples() wrote fake GC frames (garbage collection, marking, sweeping) directly into the shared _stackprof.frames_buffer, then called stackprof_record_sample_for_stack() which reads from that buffer. A signal arriving during this processing calls stackprof_buffer_sample() from the signal handler, invoking rb_profile_frames() which overwrites frames_buffer with real stack frames. This caused the raw sample data to reference fake GC frame IDs that never got added to the frames hash, producing profiles where frame lookups fail for GC-related entries. Fix: stackprof_record_sample_for_stack() now takes explicit frames_buffer and lines_buffer parameters. stackprof_record_gc_samples() passes stack-local arrays for GC frames, making them immune to signal handler interference. stackprof_record_buffer() passes the shared global buffer explicitly (safe because buffer_count guards against re-entry). --- ext/stackprof/stackprof.c | 43 ++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/ext/stackprof/stackprof.c b/ext/stackprof/stackprof.c index 5241b80..bf3ca8b 100644 --- a/ext/stackprof/stackprof.c +++ b/ext/stackprof/stackprof.c @@ -537,7 +537,7 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment) } void -stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta) +stackprof_record_sample_for_stack(int num, VALUE *frames_buffer, int *lines_buffer, uint64_t sample_timestamp, int64_t timestamp_delta) { int i, n; VALUE prev_frame = Qnil; @@ -571,8 +571,8 @@ stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t ti * in the raw buffer are stored in the opposite direction of stacks * in the frames buffer that came from Ruby. */ for (i = num-1, n = 0; i >= 0; i--, n++) { - VALUE frame = _stackprof.frames_buffer[i]; - int line = _stackprof.lines_buffer[i]; + VALUE frame = frames_buffer[i]; + int line = lines_buffer[i]; // Encode the line in to the upper 16 bits. uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame; @@ -594,8 +594,8 @@ stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t ti _stackprof.raw_sample_index = _stackprof.raw_samples_len; _stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)num; for (i = num-1; i >= 0; i--) { - VALUE frame = _stackprof.frames_buffer[i]; - int line = _stackprof.lines_buffer[i]; + VALUE frame = frames_buffer[i]; + int line = lines_buffer[i]; // Encode the line in to the upper 16 bits. uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame; @@ -626,8 +626,8 @@ stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t ti } for (i = 0; i < num; i++) { - int line = _stackprof.lines_buffer[i]; - VALUE frame = _stackprof.frames_buffer[i]; + int line = lines_buffer[i]; + VALUE frame = frames_buffer[i]; frame_data_t *frame_data = sample_for(frame); if (frame_data->seen_at_sample_number != _stackprof.overall_samples) { @@ -710,27 +710,28 @@ stackprof_record_gc_samples(void) for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) { int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval); + /* Use local arrays for GC frames to avoid a data race with the + * signal handler, which can overwrite _stackprof.frames_buffer + * via stackprof_buffer_sample/rb_profile_frames at any time. */ + VALUE gc_frames[2]; + int gc_lines[2] = {0, 0}; + if (_stackprof.unrecorded_gc_marking_samples) { - _stackprof.frames_buffer[0] = FAKE_FRAME_MARK; - _stackprof.lines_buffer[0] = 0; - _stackprof.frames_buffer[1] = FAKE_FRAME_GC; - _stackprof.lines_buffer[1] = 0; + gc_frames[0] = FAKE_FRAME_MARK; + gc_frames[1] = FAKE_FRAME_GC; _stackprof.unrecorded_gc_marking_samples--; - stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta); + stackprof_record_sample_for_stack(2, gc_frames, gc_lines, start_timestamp, timestamp_delta); } else if (_stackprof.unrecorded_gc_sweeping_samples) { - _stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP; - _stackprof.lines_buffer[0] = 0; - _stackprof.frames_buffer[1] = FAKE_FRAME_GC; - _stackprof.lines_buffer[1] = 0; + gc_frames[0] = FAKE_FRAME_SWEEP; + gc_frames[1] = FAKE_FRAME_GC; _stackprof.unrecorded_gc_sweeping_samples--; - stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta); + stackprof_record_sample_for_stack(2, gc_frames, gc_lines, start_timestamp, timestamp_delta); } else { - _stackprof.frames_buffer[0] = FAKE_FRAME_GC; - _stackprof.lines_buffer[0] = 0; - stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta); + gc_frames[0] = FAKE_FRAME_GC; + stackprof_record_sample_for_stack(1, gc_frames, gc_lines, start_timestamp, timestamp_delta); } } _stackprof.during_gc += _stackprof.unrecorded_gc_samples; @@ -743,7 +744,7 @@ stackprof_record_gc_samples(void) static void stackprof_record_buffer(void) { - stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec); + stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.frames_buffer, _stackprof.lines_buffer, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec); // reset the buffer _stackprof.buffer_count = 0;