From 35f8602cc8d0f63cdad0e66da8ee4a1783107109 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 7 May 2026 07:39:21 +0200 Subject: [PATCH 1/4] v8: attach values to CPU samples Signed-off-by: Attila Szegedi --- deps/v8/include/v8-profiler.h | 85 ++++++++++++++++++++++- deps/v8/src/api/api.cc | 48 +++++++++++-- deps/v8/src/profiler/cpu-profiler.cc | 14 +++- deps/v8/src/profiler/cpu-profiler.h | 11 +++ deps/v8/src/profiler/profile-generator.cc | 9 +-- deps/v8/src/profiler/profile-generator.h | 7 +- deps/v8/src/profiler/tick-sample.cc | 4 +- deps/v8/src/profiler/tick-sample.h | 6 +- 8 files changed, 167 insertions(+), 17 deletions(-) diff --git a/deps/v8/include/v8-profiler.h b/deps/v8/include/v8-profiler.h index 927fa12e319026..a3768ecefeccc1 100644 --- a/deps/v8/include/v8-profiler.h +++ b/deps/v8/include/v8-profiler.h @@ -31,6 +31,68 @@ using NativeObject = void*; using SnapshotObjectId = uint32_t; using ProfilerId = uint32_t; +/** + * Embedder-supplied callback invoked in signal-handler context as each + * CPU profile sample is captured. The returned pointer is stored on the + * sample and retrievable via CpuProfile::GetSampleContext. + * + * Signal-safety contract: this function is invoked from a POSIX signal + * handler (or thread-suspension context on Windows). It MUST NOT allocate + * memory, acquire locks, call any V8 API, or perform any other operation that + * is not async-signal-safe. It SHOULD limit itself to reading from memory the + * embedder keeps stable for the duration of profiling, and returning a `void*` + * whose meaning is defined by the embedder. + * + * The returned pointer is treated as opaque by V8 and is not dereferenced. + * + * The helper LookupCpedMapAlignedPointer below is provided for the common + * case in which an embedder uses a JS Map stored in + * ContinuationPreservedEmbedderData as a registry through which several + * independent libraries can attach their own continuation-bound data. + */ +using SampleContextExtractor = void* (*)(Isolate*); + +/** + * Helper for SampleContextExtractor implementations that follow a common + * pattern: the embedder uses a JS Map placed in + * ContinuationPreservedEmbedderData as a shared registry, allowing multiple + * independent libraries to each store their own continuation-bound data + * under their own key in that Map without interfering with one another. + * + * This helper performs that lookup. It treats the current CPED as a JS Map, + * looks up the entry whose key has the tagged address `key_addr`, and if + * the value is a JS object with at least one internal field, returns the + * aligned pointer stored at internal field 0 (which the embedder is + * expected to have set via SetAlignedPointerInInternalField). Returns + * nullptr if CPED is not a JS Map, the key is not present, the value is + * not a JS object with an internal field, or the embedder has not stored + * an aligned pointer there. + * + * `key_addr` is the tagged address of the lookup key. The caller must + * obtain it freshly at each invocation by reading through a stable slot + * that V8 keeps GC-coherently updated — typically the persistent-handle + * slot of a v8::Global<> the embedder owns. Caching the address across + * calls would be unsafe because V8 updates the slot's contents during + * compaction. Since embedders can't necessarily reference i::Address type, + * we use uintptr_t that it typedefs. The addressed key object must have its + * hash already precomputed in order to not trigger hash computation in the + * helper. This is trivially satisfied if it was ever set as a key in a map, but + * can also be guaranteed by invoking GetIdentityHash() early on it once outside + * of signal handling. + * + * Signal-safety: performs only signal-safe operations (no allocation, no + * locks, no V8 API calls beyond raw memory reads of fixed-layout objects). + * MUST NOT be called while a V8 GC is in progress, because the helper + * walks V8 heap state (CPED, JSMap, OrderedHashMap, JSObject internal + * fields) which may be mid-compaction. Embedders should install + * Isolate::AddGCPrologueCallback / AddGCEpilogueCallback to observe GC and + * refrain from invoking this helper while in GC. It is safe though to capture + * this helper's return value once at the prologue (a safe point on the JS + * thread) and serve it from a cache while GC is in progress. + */ +V8_EXPORT void* LookupCpedMapAlignedPointer(Isolate* isolate, + uintptr_t key_addr); + struct CpuProfileDeoptFrame { int script_id; size_t position; @@ -272,6 +334,15 @@ class V8_EXPORT CpuProfile { */ EmbedderStateTag GetSampleEmbedderState(int index) const; + /** + * Returns the embedder-supplied sample context for the sample at the given + * index. The pointer was produced by the SampleContextExtractor installed on + * the CpuProfilingOptions used to start this profile. If no extractor was + * installed, or the extractor returned nullptr for this sample, returns + * nullptr. + */ + void* GetSampleContext(int index) const; + /** * Returns time when the profile recording was stopped (in microseconds) * since some unspecified starting point. @@ -394,12 +465,20 @@ class V8_EXPORT CpuProfilingOptions { * \param filter_context If specified, profiles will only contain frames * using this context. Other frames will be elided. * \param profile_source Identifies the source of this CPU profile. + * \param sample_context_extractor Optional embedder callback invoked in + * signal-handler context as each sample is + * captured. The returned pointer is stored + * on the sample and retrievable via + * CpuProfile::GetSampleContext. See + * SampleContextExtractor for the + * signal-safety contract. */ CpuProfilingOptions( CpuProfilingMode mode = kLeafNodeLineNumbers, unsigned max_samples = kNoSampleLimit, int sampling_interval_us = 0, MaybeLocal filter_context = MaybeLocal(), - CpuProfileSource profile_source = CpuProfileSource::kUnspecified); + CpuProfileSource profile_source = CpuProfileSource::kUnspecified, + SampleContextExtractor sample_context_extractor = nullptr); CpuProfilingOptions(CpuProfilingOptions&&) = default; CpuProfilingOptions& operator=(CpuProfilingOptions&&) = default; @@ -408,6 +487,9 @@ class V8_EXPORT CpuProfilingOptions { unsigned max_samples() const { return max_samples_; } int sampling_interval_us() const { return sampling_interval_us_; } CpuProfileSource profile_source() const { return profile_source_; } + SampleContextExtractor sample_context_extractor() const { + return sample_context_extractor_; + } private: friend class internal::CpuProfile; @@ -420,6 +502,7 @@ class V8_EXPORT CpuProfilingOptions { int sampling_interval_us_; Global filter_context_; CpuProfileSource profile_source_; + SampleContextExtractor sample_context_extractor_ = nullptr; }; /** diff --git a/deps/v8/src/api/api.cc b/deps/v8/src/api/api.cc index 9ef4e3b4a66006..bdde52687667ad 100644 --- a/deps/v8/src/api/api.cc +++ b/deps/v8/src/api/api.cc @@ -11465,6 +11465,42 @@ EmbedderStateTag CpuProfile::GetSampleEmbedderState(int index) const { return profile->sample(index).embedder_state_tag; } +void* CpuProfile::GetSampleContext(int index) const { + const i::CpuProfile* profile = reinterpret_cast(this); + return profile->sample(index).sample_context; +} + +void* LookupCpedMapAlignedPointer(Isolate* isolate, uintptr_t key_addr) { + i::Isolate* i_isolate = reinterpret_cast(isolate); + + i::Tagged cped_obj = + i_isolate->isolate_data()->continuation_preserved_embedder_data(); + if (!IsJSMap(cped_obj)) return nullptr; + i::Tagged map = i::Cast(cped_obj); + + i::Tagged table_obj = map->table(); + if (!IsOrderedHashMap(table_obj)) return nullptr; + i::Tagged table = + i::Cast(table_obj); + + i::Tagged key(static_cast(key_addr)); + + i::InternalIndex entry = table->FindEntry(i_isolate, key); + if (!entry.is_found()) return nullptr; + + i::Tagged value_obj = table->ValueAt(entry); + if (!IsJSObject(value_obj)) return nullptr; + i::Tagged holder = i::Cast(value_obj); + + void* aligned_ptr = nullptr; + if (!i::EmbedderDataSlot(holder, 0).ToAlignedPointer( + i_isolate, &aligned_ptr, + {i::kFirstEmbedderDataTag, i::kLastEmbedderDataTag})) { + return nullptr; + } + return aligned_ptr; +} + int64_t CpuProfile::GetStartTime() const { const i::CpuProfile* profile = reinterpret_cast(this); return profile->start_time().since_origin().InMicroseconds(); @@ -11501,15 +11537,15 @@ CpuProfiler* CpuProfiler::New(Isolate* v8_isolate, reinterpret_cast(v8_isolate), naming_mode, logging_mode)); } -CpuProfilingOptions::CpuProfilingOptions(CpuProfilingMode mode, - unsigned max_samples, - int sampling_interval_us, - MaybeLocal filter_context, - CpuProfileSource profile_source) +CpuProfilingOptions::CpuProfilingOptions( + CpuProfilingMode mode, unsigned max_samples, int sampling_interval_us, + MaybeLocal filter_context, CpuProfileSource profile_source, + SampleContextExtractor sample_context_extractor) : mode_(mode), max_samples_(max_samples), sampling_interval_us_(sampling_interval_us), - profile_source_(profile_source) { + profile_source_(profile_source), + sample_context_extractor_(sample_context_extractor) { if (!filter_context.IsEmpty()) { Local local_filter_context = filter_context.ToLocalChecked(); filter_context_.Reset(v8::Isolate::GetCurrent(), local_filter_context); diff --git a/deps/v8/src/profiler/cpu-profiler.cc b/deps/v8/src/profiler/cpu-profiler.cc index 8244caf9390f47..da5f6efd0e4610 100644 --- a/deps/v8/src/profiler/cpu-profiler.cc +++ b/deps/v8/src/profiler/cpu-profiler.cc @@ -57,9 +57,15 @@ class CpuSampler : public sampler::Sampler { } // Every bailout up until here resulted in a dropped sample. From now on, // the sample is created in the buffer. + + void* sample_context = nullptr; + if (auto extractor = processor_->sample_context_extractor()) { + sample_context = extractor(reinterpret_cast(isolate)); + } sample->Init(isolate, regs, TickSample::kIncludeCEntryFrame, /* update_stats */ true, - /* use_simulator_reg_state */ true, processor_->period()); + /* use_simulator_reg_state */ true, processor_->period(), + /* trace_id */ std::nullopt, sample_context); if (is_counting_samples_ && !sample->timestamp.IsNull()) { if (sample->state == JS) ++js_sample_count_; if (sample->state == EXTERNAL) ++external_sample_count_; @@ -250,7 +256,7 @@ void SamplingEventsProcessor::SymbolizeAndAddToProfiles( tick_sample.state, tick_sample.embedder_state, reinterpret_cast
(tick_sample.context), reinterpret_cast
(tick_sample.embedder_context), - tick_sample.trace_id_); + tick_sample.trace_id_, tick_sample.sample_context_); } ProfilerEventsProcessor::SampleProcessingResult @@ -656,6 +662,10 @@ CpuProfilingResult CpuProfiler::StartProfiling( TRACE_EVENT0("v8", "CpuProfiler::StartProfiling"); AdjustSamplingInterval(); StartProcessorIfNotStarted(); + auto sample_context_extractor = options.sample_context_extractor(); + if (sample_context_extractor != nullptr) { + processor_->set_sample_context_extractor(sample_context_extractor); + } // Collect script rundown at the start of profiling if trace category is // turned on diff --git a/deps/v8/src/profiler/cpu-profiler.h b/deps/v8/src/profiler/cpu-profiler.h index bbfc6432533e5a..a63786eee66c4e 100644 --- a/deps/v8/src/profiler/cpu-profiler.h +++ b/deps/v8/src/profiler/cpu-profiler.h @@ -188,6 +188,14 @@ class V8_EXPORT_PRIVATE ProfilerEventsProcessor : public base::Thread, virtual void SetSamplingInterval(base::TimeDelta) {} + using SampleContextExtractor = void* (*)(v8::Isolate*); + void set_sample_context_extractor(SampleContextExtractor fn) { + sample_context_extractor_.store(fn, std::memory_order_release); + } + SampleContextExtractor sample_context_extractor() const { + return sample_context_extractor_.load(std::memory_order_acquire); + } + protected: ProfilerEventsProcessor(Isolate* isolate, Symbolizer* symbolizer, ProfilerCodeObserver* code_observer, @@ -214,6 +222,9 @@ class V8_EXPORT_PRIVATE ProfilerEventsProcessor : public base::Thread, std::atomic last_code_event_id_; unsigned last_processed_code_event_id_; Isolate* isolate_; + + private: + std::atomic sample_context_extractor_{nullptr}; }; class V8_EXPORT_PRIVATE SamplingEventsProcessor diff --git a/deps/v8/src/profiler/profile-generator.cc b/deps/v8/src/profiler/profile-generator.cc index 42f63c7707274b..bbad8ddbf6fc89 100644 --- a/deps/v8/src/profiler/profile-generator.cc +++ b/deps/v8/src/profiler/profile-generator.cc @@ -646,7 +646,8 @@ void CpuProfile::AddPath(base::TimeTicks timestamp, bool update_stats, base::TimeDelta sampling_interval, StateTag state_tag, EmbedderStateTag embedder_state_tag, - const std::optional trace_id) { + const std::optional trace_id, + void* sample_context) { if (!CheckSubsample(sampling_interval)) return; ProfileNode* top_frame_node = top_down_.AddPathFromEnd(path, src_pos, update_stats, options_.mode()); @@ -659,7 +660,7 @@ void CpuProfile::AddPath(base::TimeTicks timestamp, if (should_record_sample) { samples_.push_back({top_frame_node, timestamp, src_pos, state_tag, - embedder_state_tag, trace_id}); + embedder_state_tag, trace_id, sample_context}); } else if (is_buffer_full && delegate_ != nullptr) { const auto task_runner = V8::GetCurrentPlatform()->GetForegroundTaskRunner( reinterpret_cast(profiler_->isolate())); @@ -1230,7 +1231,7 @@ void CpuProfilesCollection::AddPathToCurrentProfiles( LineAndColumn src_pos, bool update_stats, base::TimeDelta sampling_interval, StateTag state, EmbedderStateTag embedder_state_tag, Address native_context_address, Address embedder_native_context_address, - const std::optional trace_id) { + const std::optional trace_id, void* sample_context) { // As starting / stopping profiles is rare relatively to this // method, we don't bother minimizing the duration of lock holding, // e.g. copying contents of the list to a local vector. @@ -1254,7 +1255,7 @@ void CpuProfilesCollection::AddPathToCurrentProfiles( timestamp, accepts_context ? path : empty_path, src_pos, update_stats, sampling_interval, state, accepts_embedder_context ? embedder_state_tag : EmbedderStateTag::EMPTY, - trace_id); + trace_id, sample_context); } } diff --git a/deps/v8/src/profiler/profile-generator.h b/deps/v8/src/profiler/profile-generator.h index 38b3c713de6f3b..8fa9023d83d616 100644 --- a/deps/v8/src/profiler/profile-generator.h +++ b/deps/v8/src/profiler/profile-generator.h @@ -424,6 +424,7 @@ class CpuProfile { StateTag state_tag; EmbedderStateTag embedder_state_tag; const std::optional trace_id; + void* sample_context; }; V8_EXPORT_PRIVATE CpuProfile( @@ -441,7 +442,8 @@ class CpuProfile { LineAndColumn src_pos, bool update_stats, base::TimeDelta sampling_interval, StateTag state, EmbedderStateTag embedder_state, - const std::optional trace_id = std::nullopt); + const std::optional trace_id = std::nullopt, + void* sample_context = nullptr); void FinishProfile(); const char* title() const { return title_; } @@ -588,7 +590,8 @@ class V8_EXPORT_PRIVATE CpuProfilesCollection { EmbedderStateTag embedder_state_tag, Address native_context_address = kNullAddress, Address native_embedder_context_address = kNullAddress, - const std::optional trace_id = std::nullopt); + const std::optional trace_id = std::nullopt, + void* sample_context = nullptr); // Called from profile generator thread. void UpdateNativeContextAddressForCurrentProfiles(Address from, Address to); diff --git a/deps/v8/src/profiler/tick-sample.cc b/deps/v8/src/profiler/tick-sample.cc index af97b96d7a0f45..be5b2e561769d6 100644 --- a/deps/v8/src/profiler/tick-sample.cc +++ b/deps/v8/src/profiler/tick-sample.cc @@ -167,7 +167,8 @@ DISABLE_ASAN void TickSample::Init(Isolate* v8_isolate, bool update_stats, bool use_simulator_reg_state, base::TimeDelta sampling_interval, - const std::optional trace_id) { + const std::optional trace_id, + void* sample_context) { update_stats_ = update_stats; SampleInfo info; RegisterState regs = reg_state; @@ -209,6 +210,7 @@ DISABLE_ASAN void TickSample::Init(Isolate* v8_isolate, } sampling_interval_ = sampling_interval; trace_id_ = trace_id; + sample_context_ = sample_context; timestamp = base::TimeTicks::Now(); } diff --git a/deps/v8/src/profiler/tick-sample.h b/deps/v8/src/profiler/tick-sample.h index b02400777d04fd..b227bc3a655406 100644 --- a/deps/v8/src/profiler/tick-sample.h +++ b/deps/v8/src/profiler/tick-sample.h @@ -39,7 +39,8 @@ struct V8_EXPORT TickSample { RecordCEntryFrame record_c_entry_frame, bool update_stats, bool use_simulator_reg_state = true, base::TimeDelta sampling_interval = base::TimeDelta(), - const std::optional trace_id = std::nullopt); + const std::optional trace_id = std::nullopt, + void* sample_context = nullptr); /** * Get a call stack sample from the isolate. * \param isolate The isolate. @@ -100,6 +101,9 @@ struct V8_EXPORT TickSample { bool update_stats_ = true; // An identifier to associate the sample with a trace event. std::optional trace_id_; + // Embedder-supplied opaque value captured by SampleContextExtractor. See + // v8::SampleContextExtractor and v8::CpuProfile::GetSampleContext. + void* sample_context_ = nullptr; void* stack[kMaxFramesCount]; // Call stack. }; From f391ab5c7c36d8ff42cd8adf41557d909c1dfe71 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 7 May 2026 07:40:41 +0200 Subject: [PATCH 2/4] Add new CPU profiling API that allows both retrieval of profiling data as a structured tree as well as associating values with samples --- lib/v8.js | 142 +++++++- node.gyp | 2 + src/node_binding.cc | 1 + src/node_cpu_profiler.cc | 736 +++++++++++++++++++++++++++++++++++++++ src/node_cpu_profiler.h | 171 +++++++++ src/node_v8.cc | 38 -- 6 files changed, 1039 insertions(+), 51 deletions(-) create mode 100644 src/node_cpu_profiler.cc create mode 100644 src/node_cpu_profiler.h diff --git a/lib/v8.js b/lib/v8.js index bb174f8d524305..2857a9d5f91862 100644 --- a/lib/v8.js +++ b/lib/v8.js @@ -119,8 +119,6 @@ const binding = internalBinding('v8'); const { cachedDataVersionTag, setFlagsFromString: _setFlagsFromString, - startCpuProfile: _startCpuProfile, - stopCpuProfile: _stopCpuProfile, startHeapProfile: _startHeapProfile, stopHeapProfile: _stopHeapProfile, isStringOneByteRepresentation: _isStringOneByteRepresentation, @@ -178,21 +176,115 @@ function setFlagsFromString(flags) { _setFlagsFromString(flags); } +// Lazy: pull async_hooks (and the cpu_profiler binding) only on first use of +// startCpuProfile, since they're not free to load. +let lazyCpuProfilerBits; +function getCpuProfilerBits() { + if (lazyCpuProfilerBits === undefined) { + const { AsyncLocalStorage } = require('async_hooks'); + const { + validateFunction, + validateObject, + } = require('internal/validators'); + const { + SampledCpuProfiler, + } = internalBinding('cpu_profiler'); + lazyCpuProfilerBits = { + AsyncLocalStorage, + validateFunction, + validateObject, + SampledCpuProfiler, + }; + } + return lazyCpuProfilerBits; +} + class SyncCPUProfileHandle { - #id = null; + #binding; + #als; #stopped = false; - constructor(id) { - this.#id = id; + constructor(binding, als) { + this.#binding = binding; + this.#als = als; } + /** + * Stop the profiler and return the V8 JSON profile string. + * @returns {string | undefined} + */ stop() { if (this.#stopped) { return; } this.#stopped = true; - return _stopCpuProfile(this.#id); - }; + return this.#binding.stop(); + } + + /** + * Stop the profiler and return a structured profile object tree: + * { startTime, endTime, droppedContexts, + * topDownRoot: { functionName, scriptName, lineNumber, columnNumber, + * hitCount, contexts?, children } } + * @returns {object | undefined} + */ + stopAndCapture() { + if (this.#stopped) { + return; + } + this.#stopped = true; + return this.#binding.stopAndCapture(); + } + + /** + * Capture a profile of samples since the last start/snapshot, and continue + * profiling with a fresh internal session. Returns the same shape as + * stopAndCapture(). Throws if the profiler has been stopped. + * @returns {object} + */ + snapshot() { + if (this.#stopped) { + throw new Error('CPU profile has been stopped'); + } + return this.#binding.snapshot(); + } + + /** + * Run `fn(...args)` with `value` recorded as the context for any samples + * captured during its synchronous and propagated-async execution. Requires + * { withContext: true } at start time. + */ + runWithContext(value, fn, ...args) { + if (this.#als === null) { + throw new Error( + 'runWithContext requires { withContext: true } on startCpuProfile'); + } + if (this.#stopped) { + throw new Error('CPU profile has been stopped'); + } + const bits = getCpuProfilerBits(); + bits.validateFunction(fn, 'fn'); + const holder = this.#binding.createContextHolder(value); + return this.#als.run(holder, fn, ...args); + } + + /** + * Enter a scope in which `value` is the current sample context, for the + * rest of the active AsyncLocalStorage scope. Mirrors the naming of + * AsyncLocalStorage.enterWith. Requires { withContext: true } at start + * time. + */ + enterWithContext(value) { + if (this.#als === null) { + throw new Error( + 'enterWithContext requires { withContext: true } on startCpuProfile'); + } + if (this.#stopped) { + throw new Error('CPU profile has been stopped'); + } + const holder = this.#binding.createContextHolder(value); + this.#als.enterWith(holder); + } [SymbolDispose]() { this.stop(); @@ -216,19 +308,43 @@ class SyncHeapProfileHandle { } /** - * Starting CPU Profile. + * Start CPU profiling. + * * @param {object} [options] - * @param {number} [options.sampleInterval] - * @param {number} [options.maxBufferSize] + * @param {number} [options.sampleInterval] V8's per-profile sampling interval + * in milliseconds. Default is V8's internal default. + * @param {number} [options.maxBufferSize] Maximum number of samples V8 will + * buffer for this profile. Default unlimited. + * @param {boolean} [options.withContext] When true, the handle exposes + * runWithContext() / enterWithContext() for associating arbitrary JS values + * with the samples captured during their execution scope. The values are + * surfaced on each sample of the structured profile returned by + * stopAndCapture() or snapshot(). When false (default), no per-sample + * context tracking is performed and there is no extractor overhead. + * @param {number} [options.contextBufferSize] Maximum number of samples that + * can carry a context value during a single profile session. Once + * exceeded, further samples are recorded with no context and the + * `droppedContexts` counter on the result is incremented. Default 60000 + * which is enough for 60 seconds of sampling at the default interval of + * 10ms. Only meaningful with withContext. * @returns {SyncCPUProfileHandle} */ -function startCpuProfile(options) { +function startCpuProfile(options = {}) { const { samplingIntervalMicros, maxSamples, } = normalizeCpuProfileOptions(options); - const id = _startCpuProfile(samplingIntervalMicros, maxSamples); - return new SyncCPUProfileHandle(id); + + const bits = getCpuProfilerBits(); + const withContext = options.withContext === true; + const contextBufferSize = options.contextBufferSize ?? 60000; + + const als = withContext ? new bits.AsyncLocalStorage() : null; + const binding = new bits.SampledCpuProfiler( + als ?? {}, contextBufferSize, samplingIntervalMicros, maxSamples, + withContext); + binding.start(); + return new SyncCPUProfileHandle(binding, als); } /** diff --git a/node.gyp b/node.gyp index c06e95a98e5ce9..da7604e6c8e2ef 100644 --- a/node.gyp +++ b/node.gyp @@ -112,6 +112,7 @@ 'src/node_api.cc', 'src/node_binding.cc', 'src/node_blob.cc', + 'src/node_cpu_profiler.cc', 'src/node_buffer.cc', 'src/node_builtins.cc', 'src/node_config.cc', @@ -249,6 +250,7 @@ 'src/node_binding.h', 'src/node_blob.h', 'src/node_buffer.h', + 'src/node_cpu_profiler.h', 'src/node_builtins.h', 'src/node_config_file.h', 'src/node_constants.h', diff --git a/src/node_binding.cc b/src/node_binding.cc index ee6fda2947db77..18ff87984ef805 100644 --- a/src/node_binding.cc +++ b/src/node_binding.cc @@ -47,6 +47,7 @@ V(config) \ V(constants) \ V(contextify) \ + V(cpu_profiler) \ V(credentials) \ V(diagnostics_channel) \ V(encoding_binding) \ diff --git a/src/node_cpu_profiler.cc b/src/node_cpu_profiler.cc new file mode 100644 index 00000000000000..af5ea487cbebf2 --- /dev/null +++ b/src/node_cpu_profiler.cc @@ -0,0 +1,736 @@ +// Copyright Joyent, Inc. and other Node contributors. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to permit +// persons to whom the Software is furnished to do so, subject to the +// following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +// USE OR OTHER DEALINGS IN THE SOFTWARE. + +#include "node_cpu_profiler.h" + +#include + +#include "base_object-inl.h" +#include "env-inl.h" +#include "memory_tracker-inl.h" +#include "node.h" +#include "node_errors.h" +#include "node_external_reference.h" +#include "util-inl.h" +#include "v8-profiler.h" +#include "v8.h" + +namespace node { +namespace cpu_profiler { + +namespace i = v8::internal; + +using v8::Context; +using v8::CpuProfile; +using v8::CpuProfiler; +using v8::CpuProfilingOptions; +using v8::CpuProfilingResult; +using v8::CpuProfilingStatus; +using v8::FunctionCallbackInfo; +using v8::FunctionTemplate; +using v8::Global; +using v8::HandleScope; +using v8::Isolate; +using v8::Local; +using v8::Object; +using v8::ObjectTemplate; +using v8::String; +using v8::Value; + +namespace { + +// Per-isolate active profiler, using thread_local because in Node's threading +// model each isolate is bound to one thread. +thread_local SampledCpuProfiler* t_active_profiler = nullptr; + +constexpr int kHolderInternalFieldCount = 1; +constexpr int kHolderSharedPtrSlot = 0; + +// Weak callback: when a ContextHolder JS object becomes unreachable, free the +// heap-allocated shared_ptr_slot. +struct HolderWeakData { + std::shared_ptr>* shared_ptr_slot; + Global handle; +}; + +void HolderWeakCallback(const v8::WeakCallbackInfo& info) { + HolderWeakData* data = info.GetParameter(); + delete data->shared_ptr_slot; + delete data; +} + +// Extracts the address of a v8::Global's underlying slot. The slot is +// V8's persistent-handle entry: its address is stable for the Global's +// lifetime, while V8 keeps the slot's *contents* (the tagged pointer to the +// referent) GC-coherently updated through compaction. Reading through this +// slot in signal-handler context is signal-safe (single load) and yields the +// current address of the referent. +// +// `slot()` is protected on v8::api_internal::IndirectHandleBase; we expose it +// by deriving and using-declaring it as public. The derived class adds no +// data members, so static_cast'ing a v8::Global* to this is layout- +// compatible. +class GlobalSlotAccessor : public v8::Global { + public: + using v8::Global::Global; + using v8::Global::slot; +}; + +i::Address* SlotOf(v8::Global* g) { + return static_cast(g)->slot(); +} + +// Builds the structured JS profile tree returned by Stop(). +// +// Lifetime: the v8::CpuProfile* and Session* are borrowed and not retained +// past Run(). Callers can free both after Run() returns; the resulting +// JS objects are plain (no native back-pointers). +class ProfileBuilder { + public: + ProfileBuilder(v8::Isolate* isolate, + v8::Local context, + const v8::CpuProfile* profile, + Session* session) + : isolate_(isolate), + context_(context), + profile_(profile), + session_(session), + // Property name strings are created once and reused for every object. + k_start_time_(NewKey("startTime")), + k_end_time_(NewKey("endTime")), + k_dropped_contexts_(NewKey("droppedContexts")), + k_top_down_root_(NewKey("topDownRoot")), + k_function_name_(NewKey("functionName")), + k_script_name_(NewKey("scriptName")), + k_line_number_(NewKey("lineNumber")), + k_column_number_(NewKey("columnNumber")), + k_hit_count_(NewKey("hitCount")), + k_contexts_(NewKey("contexts")), + k_children_(NewKey("children")), + k_context_(NewKey("context")), + k_timestamp_(NewKey("timestamp")) { + // Pre-bucket samples by node so each node's contexts can be assembled in + // one pass. node_to_samples_[node] -> indices into profile_->samples. + int sample_count = profile_->GetSamplesCount(); + for (int i = 0; i < sample_count; ++i) { + const v8::CpuProfileNode* node = profile_->GetSample(i); + if (node == nullptr) continue; + node_to_samples_[node].push_back(i); + } + } + + v8::Local Run() { + v8::Local result = v8::Object::New(isolate_); + Set(result, k_start_time_, + v8::Number::New(isolate_, + static_cast(profile_->GetStartTime()))); + Set(result, k_end_time_, + v8::Number::New(isolate_, + static_cast(profile_->GetEndTime()))); + Set( + result, k_dropped_contexts_, + v8::Number::New(isolate_, static_cast(session_->dropped.load( + std::memory_order_relaxed)))); + Set(result, k_top_down_root_, BuildNode(profile_->GetTopDownRoot())); + return result; + } + + private: + v8::Local NewKey(const char* s) { + return v8::String::NewFromUtf8(isolate_, s, v8::NewStringType::kInternalized) + .ToLocalChecked(); + } + + void Set(v8::Local obj, + v8::Local key, + v8::Local value) { + obj->Set(context_, key, value).Check(); + } + + v8::Local BuildNode(const v8::CpuProfileNode* node) { + v8::Local obj = v8::Object::New(isolate_); + + Set(obj, k_function_name_, node->GetFunctionName()); + Set(obj, k_script_name_, node->GetScriptResourceName()); + Set(obj, k_line_number_, + v8::Integer::New(isolate_, node->GetLineNumber())); + Set(obj, k_column_number_, + v8::Integer::New(isolate_, node->GetColumnNumber())); + Set(obj, k_hit_count_, + v8::Integer::NewFromUnsigned(isolate_, node->GetHitCount())); + + auto it = node_to_samples_.find(node); + if (it != node_to_samples_.end()) { + // Buffer the context-bearing entries in a vector first, then allocate + // the JS array sized exactly to that count. Sizing the array up front + // by sample_indices.size() and skipping context-less samples leaves + // trailing `undefined` holes. + std::vector> ctx_objs; + ctx_objs.reserve(it->second.size()); + for (int sample_idx : it->second) { + v8::Local ctx_value = ContextValueAt(sample_idx); + if (ctx_value.IsEmpty()) continue; + v8::Local ctx_obj = v8::Object::New(isolate_); + Set(ctx_obj, k_context_, ctx_value); + Set(ctx_obj, k_timestamp_, + v8::Number::New(isolate_, + static_cast( + profile_->GetSampleTimestamp(sample_idx)))); + ctx_objs.push_back(ctx_obj); + } + // If every sample at this node was context-less, omit the field rather + // than setting an empty array. + if (!ctx_objs.empty()) { + v8::Local contexts = + v8::Array::New(isolate_, ctx_objs.data(), ctx_objs.size()); + Set(obj, k_contexts_, contexts); + } + } + + int child_count = node->GetChildrenCount(); + v8::Local children = v8::Array::New(isolate_, child_count); + for (int i = 0; i < child_count; ++i) { + children->Set(context_, static_cast(i), + BuildNode(node->GetChild(i))) + .Check(); + } + Set(obj, k_children_, children); + + return obj; + } + + // Resolves a sample's context void* (slot index + 1, or nullptr) back to + // the JS value its ContextHolder wrapped. Returns Local<>() if the sample + // had no associated context. + v8::Local ContextValueAt(int sample_index) { + void* sample_ctx = profile_->GetSampleContext(sample_index); + if (sample_ctx == nullptr) return {}; + uintptr_t encoded = reinterpret_cast(sample_ctx); + if (encoded == 0) return {}; + uint64_t slot_idx = encoded - 1; + if (slot_idx >= session_->context_buffer.size()) return {}; + auto& shared = session_->context_buffer[slot_idx]; + if (!shared) return {}; + return shared->Get(isolate_); + } + + v8::Isolate* const isolate_; + v8::Local context_; + const v8::CpuProfile* const profile_; + Session* const session_; + + std::unordered_map> + node_to_samples_; + + v8::Local k_start_time_; + v8::Local k_end_time_; + v8::Local k_dropped_contexts_; + v8::Local k_top_down_root_; + v8::Local k_function_name_; + v8::Local k_script_name_; + v8::Local k_line_number_; + v8::Local k_column_number_; + v8::Local k_hit_count_; + v8::Local k_contexts_; + v8::Local k_children_; + v8::Local k_context_; + v8::Local k_timestamp_; +}; + +} // namespace + +Session::Session(size_t buffer_size) : context_buffer(buffer_size) {} + +SampledCpuProfiler::SampledCpuProfiler( + Environment* env, + Local object, + Local holder_template, + Local als_resource_key, + size_t context_buffer_size, + int sampling_interval_us, + uint32_t max_samples, + bool with_context) + : BaseObject(env, object), + holder_template_(env->isolate(), holder_template), + als_resource_key_(env->isolate(), als_resource_key), + context_buffer_size_(context_buffer_size), + sampling_interval_us_(sampling_interval_us), + max_samples_(max_samples), + with_context_(with_context) { + als_key_slot_ = reinterpret_cast(SlotOf(&als_resource_key_)); + MakeWeak(); +} + +SampledCpuProfiler::~SampledCpuProfiler() { + // Best-effort teardown for the case where the JS handle is dropped without + // calling stop()/stopAndCapture(). If a session is still active, drain it + // (this also clears t_active_profiler via StopActiveSession). The profile + // we'd otherwise return is just discarded. + if (active_session_.load(std::memory_order_relaxed) != nullptr) { + CpuProfile* profile = nullptr; + Session* session = nullptr; + StopActiveSession(/* restart = */ false, &profile, &session); + if (profile != nullptr) profile->Delete(); + delete session; + } + // Defensive: should already be cleared by StopActiveSession (or never set + // if no session was running), but stale TLS pointing at a freed instance + // would break future Start calls on this thread. + if (t_active_profiler == this) { + t_active_profiler = nullptr; + } + // Tearing down the v8::CpuProfiler ends its sampling thread (StopProcessor) + // and unregisters the GC pro/epilogue callbacks we installed against + // processor_. + if (cpu_profiler_ != nullptr) { + cpu_profiler_->Dispose(); + cpu_profiler_ = nullptr; + } +} + +void SampledCpuProfiler::New(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + + CHECK(args.IsConstructCall()); + CHECK_GE(args.Length(), 5); + CHECK(args[0]->IsObject()); // ALS resource key (the ALS instance) + CHECK(args[1]->IsUint32()); // context buffer size + CHECK(args[2]->IsInt32()); // sampling interval (microseconds) + CHECK(args[3]->IsUint32()); // V8 max samples + CHECK(args[4]->IsBoolean()); // with_context + + Local als_resource_key = args[0]; + uint32_t buffer_size = + args[1]->Uint32Value(env->context()).FromJust(); + int sampling_interval_us = + args[2]->Int32Value(env->context()).FromJust(); + uint32_t max_samples = + args[3]->Uint32Value(env->context()).FromJust(); + bool with_context = args[4]->BooleanValue(isolate); + + // ContextHolder template: one aligned-pointer internal field for the + // heap-allocated shared_ptr>. + Local holder_template = ObjectTemplate::New(isolate); + holder_template->SetInternalFieldCount(kHolderInternalFieldCount); + + new SampledCpuProfiler(env, + args.This(), + holder_template, + als_resource_key, + with_context ? buffer_size : 0, + sampling_interval_us, + max_samples, + with_context); +} + + + +void SampledCpuProfiler::Start(const FunctionCallbackInfo& args) { + SampledCpuProfiler* self; + ASSIGN_OR_RETURN_UNWRAP(&self, args.This()); + Isolate* isolate = args.GetIsolate(); + + if (self->active_session_.load(std::memory_order_relaxed) != nullptr) { + THROW_ERR_INVALID_STATE(self->env(), + "CPU profile is already running"); + return; + } + + // Can only have one active profiler per isolate/thread. + if (t_active_profiler != nullptr) { + THROW_ERR_INVALID_STATE( + self->env(), + "Another CPU profile is already active in this isolate"); + return; + } + t_active_profiler = self; + + auto* session = new Session(self->context_buffer_size_); + // Publish the session before V8 starts sampling so any tick that fires + // immediately after Start sees a valid session. Compiler fence ensures + // the Session's construction is sequenced before its publication; the + // signal handler's matching atomic_signal_fence(acquire) sees the fully + // constructed Session. + std::atomic_signal_fence(std::memory_order_release); + self->active_session_.store(session, std::memory_order_relaxed); + + if (self->cpu_profiler_ == nullptr) { + self->cpu_profiler_ = CpuProfiler::New(isolate); + } + + // Only install the per-sample extractor when the embedder asked for + // context tracking; otherwise samples are captured at the same cost as + // the original v8.startCpuProfile(). + CpuProfilingOptions options( + v8::kLeafNodeLineNumbers, self->max_samples_, + self->sampling_interval_us_, v8::MaybeLocal(), + v8::CpuProfileSource::kUnspecified, + self->with_context_ ? &SampledCpuProfiler::ExtractContext : nullptr); + + CpuProfilingResult result = self->cpu_profiler_->Start( + v8::String::Empty(isolate), std::move(options)); + if (result.status != CpuProfilingStatus::kStarted && + result.status != CpuProfilingStatus::kAlreadyStarted) { + t_active_profiler = nullptr; + self->active_session_.store(nullptr, std::memory_order_relaxed); + delete session; + THROW_ERR_INVALID_STATE(self->env(), + "v8::CpuProfiler failed to start the profile"); + return; + } + + self->profile_id_ = result.id; + + // Install the GC prologue/epilogue callbacks when using contexts. + if (self->with_context_) { + isolate->AddGCPrologueCallback(&OnGCPrologue, self); + isolate->AddGCEpilogueCallback(&OnGCEpilogue, self); + } +} + +// Detaches the active Session and stops the V8 CpuProfile so the caller can +// drain them. If `restart` is true, allocates a fresh Session, atomically +// publishes it as the new active Session, and starts a new V8 profile so +// sampling continues with minimal gap. +bool SampledCpuProfiler::StopActiveSession(bool restart, + CpuProfile** out_profile, + Session** out_session) { + Session* session = active_session_.load(std::memory_order_relaxed); + if (session == nullptr) { + return false; + } + + v8::Isolate* isolate = env()->isolate(); + + // Unregister GC callbacks first. + if (with_context_) { + isolate->RemoveGCPrologueCallback(&OnGCPrologue, this); + isolate->RemoveGCEpilogueCallback(&OnGCEpilogue, this); + } + in_gc_.store(false, std::memory_order_relaxed); + gc_cached_aligned_ptr_.store(nullptr, std::memory_order_relaxed); + + // Detach active profiler from thread. New signal handler invocations observe + // nullptr and bail. + t_active_profiler = nullptr; + + // Stop V8's profile; in-flight ticks for our profile id are drained. + CpuProfile* profile = cpu_profiler_->Stop(profile_id_); + + // Detach the Session pointer. + active_session_.store(nullptr, std::memory_order_relaxed); + + *out_profile = profile; + *out_session = session; + + if (!restart) { + return true; + } + + // Allocate, start, and publish the new session. + Session* new_session = new Session(context_buffer_size_); + + CpuProfilingOptions options( + v8::kLeafNodeLineNumbers, max_samples_, sampling_interval_us_, + v8::MaybeLocal(), v8::CpuProfileSource::kUnspecified, + with_context_ ? &SampledCpuProfiler::ExtractContext : nullptr); + + CpuProfilingResult result = cpu_profiler_->Start( + v8::String::Empty(env()->isolate()), std::move(options)); + if (result.status != CpuProfilingStatus::kStarted && + result.status != CpuProfilingStatus::kAlreadyStarted) { + delete new_session; + // Restart failed; leave the binding in stopped state and signal failure + // through unchanged active_session_ (still nullptr). + return true; + } + + profile_id_ = result.id; + // Same construction-then-publish pattern as Start: compiler fence + // sequences the new Session before its publication. + std::atomic_signal_fence(std::memory_order_release); + active_session_.store(new_session, std::memory_order_relaxed); + // Reattach the active profiler to the thread. + t_active_profiler = this; + // Reinstall GC callbacks for the new session. + if (with_context_) { + isolate->AddGCPrologueCallback(&OnGCPrologue, this); + isolate->AddGCEpilogueCallback(&OnGCEpilogue, this); + } + return true; +} + +Local SampledCpuProfiler::BuildAndFree(CpuProfile* profile, + Session* session) { + Isolate* isolate = env()->isolate(); + Local context = env()->context(); + Local result; + if (profile != nullptr) { + ProfileBuilder builder(isolate, context, profile, session); + result = builder.Run(); + profile->Delete(); + } else { + result = Object::New(isolate); + } + delete session; + return result; +} + +void SampledCpuProfiler::StopAndCapture( + const FunctionCallbackInfo& args) { + SampledCpuProfiler* self; + ASSIGN_OR_RETURN_UNWRAP(&self, args.This()); + + CpuProfile* profile = nullptr; + Session* session = nullptr; + if (!self->StopActiveSession(/* restart = */ false, &profile, &session)) { + THROW_ERR_INVALID_STATE(self->env(), + "CPU profile is not running"); + return; + } + args.GetReturnValue().Set(self->BuildAndFree(profile, session)); +} + +void SampledCpuProfiler::Snapshot(const FunctionCallbackInfo& args) { + SampledCpuProfiler* self; + ASSIGN_OR_RETURN_UNWRAP(&self, args.This()); + + CpuProfile* profile = nullptr; + Session* session = nullptr; + if (!self->StopActiveSession(/* restart = */ true, &profile, &session)) { + THROW_ERR_INVALID_STATE(self->env(), + "CPU profile is not running"); + return; + } + args.GetReturnValue().Set(self->BuildAndFree(profile, session)); +} + +namespace { + +// OutputStream that accumulates Serialize() chunks into an std::string. +// Modeled on src/util.h's JSONOutputStream but inlined here to avoid the +// dependency on JSONOutputStream being available; we don't need its +// JSON-escaping logic since CpuProfile::Serialize emits already-encoded JSON. +class StringOutputStream : public v8::OutputStream { + public: + v8::OutputStream::WriteResult WriteAsciiChunk(char* data, int size) override { + out_.append(data, size); + return v8::OutputStream::kContinue; + } + void EndOfStream() override {} + std::string out_; +}; + +} // namespace + +void SampledCpuProfiler::Stop(const FunctionCallbackInfo& args) { + SampledCpuProfiler* self; + ASSIGN_OR_RETURN_UNWRAP(&self, args.This()); + + CpuProfile* profile = nullptr; + Session* session = nullptr; + if (!self->StopActiveSession(/* restart = */ false, &profile, &session)) { + THROW_ERR_INVALID_STATE(self->env(), + "CPU profile is not running"); + return; + } + + if (profile != nullptr) { + StringOutputStream stream; + profile->Serialize(&stream, CpuProfile::SerializationFormat::kJSON); + Isolate* isolate = args.GetIsolate(); + Local result; + if (v8::String::NewFromUtf8(isolate, stream.out_.c_str(), + v8::NewStringType::kNormal, + static_cast(stream.out_.size())) + .ToLocal(&result)) { + args.GetReturnValue().Set(result); + } + profile->Delete(); + } + delete session; +} + +void SampledCpuProfiler::CreateContextHolder( + const FunctionCallbackInfo& args) { + SampledCpuProfiler* self; + ASSIGN_OR_RETURN_UNWRAP(&self, args.This()); + Environment* env = self->env(); + Isolate* isolate = env->isolate(); + + CHECK_EQ(args.Length(), 1); + + // Allocate the holder JS object from the template cached at construction. + Local holder_tmpl = self->holder_template_.Get(isolate); + Local holder; + if (!holder_tmpl->NewInstance(env->context()).ToLocal(&holder)) { + return; + } + + // Heap-allocate a shared_ptr that owns a Global wrapping the user's + // value. The shared_ptr's address is stored in the holder's internal field. + // Lifetime: + // - Initial owner: this holder (count = 1). + // - Each sample captured during profiling adds a ref via the context + // buffer copy in ExtractContext (count++). + // - When the holder is GC'd, HolderWeakCallback deletes the heap + // shared_ptr (count--). If samples have copied it, the inner + // Global stays alive until those copies are released. + // - When stop()/post-processing clears the context buffer, all remaining + // refs drop and the Global is finally released. + auto* slot = new std::shared_ptr>( + std::make_shared>(isolate, args[0])); + holder->SetAlignedPointerInInternalField(kHolderSharedPtrSlot, slot, + v8::kEmbedderDataTypeTagDefault); + + // Register a weak callback to delete the holder's shared_ptr when the holder + // is GC'd. + auto* weak = new HolderWeakData{slot, Global(isolate, holder)}; + weak->handle.SetWeak(weak, + HolderWeakCallback, + v8::WeakCallbackType::kParameter); + + args.GetReturnValue().Set(holder); +} + +// static +void SampledCpuProfiler::OnGCPrologue(v8::Isolate* isolate, + v8::GCType, + v8::GCCallbackFlags, + void* data) { + auto* self = static_cast(data); + // Capture the helper's result before flagging GC active. A signal that + // arrives between these two stores will see in_gc_ == false and call the + // helper live — which is still safe because the GC prologue runs before V8 + // starts mutating the heap. The compiler fence prevents the cache store + // from being reordered after the flag store; pairs with the extractor's + // matching atomic_signal_fence(acquire) on the read side. + self->gc_cached_aligned_ptr_.store( + v8::LookupCpedMapAlignedPointer(isolate, *self->als_key_slot_), + std::memory_order_relaxed); + std::atomic_signal_fence(std::memory_order_release); + self->in_gc_.store(true, std::memory_order_relaxed); +} + +// static +void SampledCpuProfiler::OnGCEpilogue(v8::Isolate*, v8::GCType, + v8::GCCallbackFlags, void* data) { + // No fence needed: clearing the flag first means a signal that interleaves + // these two stores takes the live-helper path, which is safe because GC + // has finished and the heap is consistent again. + auto* self = static_cast(data); + self->in_gc_.store(false, std::memory_order_relaxed); + self->gc_cached_aligned_ptr_.store(nullptr, std::memory_order_relaxed); +} + +void* SampledCpuProfiler::ExtractContext(Isolate* isolate) { + // SIGNAL-SAFETY CONTRACT: this function runs in CPU-profiler signal-handler + // context. The OrderedHashMap walk through CPED is delegated to the V8 helper + // v8::LookupCpedMapAlignedPointer, which is signal-safe. The remainder is + // either atomic loads or a shared_ptr copy whose only side effect is an + // atomic ref-count increment. + + // All atomics consulted here synchronize the signal handler with the same + // thread's main-line execution using memory_order_relaxed loads paired with + // std::atomic_signal_fence to prevent compiler reordering. On ARM memory + // model this is cheaper than memory barriers. + SampledCpuProfiler* self = t_active_profiler; + if (self == nullptr) return nullptr; + Session* session = self->active_session_.load(std::memory_order_relaxed); + if (session == nullptr) return nullptr; + // Pair with Start's signal_fence(release) before publishing the session, + // so the Session's constructor effects are observed below. + std::atomic_signal_fence(std::memory_order_acquire); + + // During GC the helper is unsafe to call (it walks heap state mid- + // compaction), so we use the value cached at GC prologue. + void* aligned_ptr; + if (self->in_gc_.load(std::memory_order_relaxed)) { + // Pair with the prologue's signal_fence(release) between cache store + // and flag set: observing in_gc_ == true implies the cache is populated. + std::atomic_signal_fence(std::memory_order_acquire); + aligned_ptr = self->gc_cached_aligned_ptr_.load(std::memory_order_relaxed); + } else { + aligned_ptr = + v8::LookupCpedMapAlignedPointer(isolate, *self->als_key_slot_); + } + if (aligned_ptr == nullptr) return nullptr; + + auto* shared_ptr_slot = + static_cast>*>(aligned_ptr); + + // Reserve the next slot. + uint64_t idx = + session->next_index.fetch_add(1, std::memory_order_relaxed); + if (idx >= session->context_buffer.size()) { + session->dropped.fetch_add(1, std::memory_order_relaxed); + return nullptr; + } + + // shared_ptr copy: only side effect is an atomic ref-count increment. No + // destructor runs because slots are default-constructed empty in Session's + // ctor and we never overwrite (non-wrapping buffer). + session->context_buffer[idx] = *shared_ptr_slot; + + // Pack the slot index as void*. Add 1 so a zero-index sample is + // distinguishable from the "no context" nullptr return; Stop() subtracts 1. + return reinterpret_cast(idx + 1); +} + +void SampledCpuProfiler::Initialize(Local target, + Local unused, + Local context, + void* priv) { + Environment* env = Environment::GetCurrent(context); + Isolate* isolate = env->isolate(); + + Local tmpl = NewFunctionTemplate(isolate, New); + tmpl->InstanceTemplate()->SetInternalFieldCount( + BaseObject::kInternalFieldCount); + + SetProtoMethod(isolate, tmpl, "start", Start); + SetProtoMethod(isolate, tmpl, "stop", Stop); + SetProtoMethod(isolate, tmpl, "stopAndCapture", StopAndCapture); + SetProtoMethod(isolate, tmpl, "snapshot", Snapshot); + SetProtoMethod(isolate, tmpl, "createContextHolder", CreateContextHolder); + + SetConstructorFunction(context, target, "SampledCpuProfiler", tmpl); +} + +void SampledCpuProfiler::RegisterExternalReferences( + ExternalReferenceRegistry* registry) { + registry->Register(New); + registry->Register(Start); + registry->Register(Stop); + registry->Register(StopAndCapture); + registry->Register(Snapshot); + registry->Register(CreateContextHolder); +} + +} // namespace cpu_profiler +} // namespace node + +NODE_BINDING_CONTEXT_AWARE_INTERNAL( + cpu_profiler, node::cpu_profiler::SampledCpuProfiler::Initialize) +NODE_BINDING_EXTERNAL_REFERENCE( + cpu_profiler, + node::cpu_profiler::SampledCpuProfiler::RegisterExternalReferences) diff --git a/src/node_cpu_profiler.h b/src/node_cpu_profiler.h new file mode 100644 index 00000000000000..7f525de69f81ef --- /dev/null +++ b/src/node_cpu_profiler.h @@ -0,0 +1,171 @@ +#ifndef SRC_NODE_CPU_PROFILER_H_ +#define SRC_NODE_CPU_PROFILER_H_ + +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#include +#include +#include +#include + +#include "base_object.h" +#include "v8-profiler.h" +#include "v8.h" + +namespace node { + +class Environment; +class ExternalReferenceRegistry; + +namespace cpu_profiler { + +// Holds per-profile-session state: the buffer of sampled context references +// and the counters reported back to JS. +// +// Lifetime: created by Start() under the JS thread, written into by the +// extractor running in CPU-profiler signal-handler context (also on the JS +// thread, interrupting it), drained and destroyed by Stop() under the JS +// thread after v8::CpuProfiler::Stop() has joined the sampling thread. The +// signal-handler / Stop() race is benign because both run on the same thread +// and a signal handler runs to completion before the interrupted thread +// resumes. +struct Session { + explicit Session(size_t buffer_size); + ~Session() = default; + Session(const Session&) = delete; + Session& operator=(const Session&) = delete; + + // Pre-allocated, non-wrapping. Slots are written by the extractor and read + // by Stop() on the main thread. + std::vector>> context_buffer; + + // Next slot index the extractor will write to. When >= context_buffer.size() + // the extractor bumps `dropped` and returns nullptr instead of capturing. + std::atomic next_index{0}; + std::atomic dropped{0}; +}; + +// SampledCpuProfiler is the C++ side of the JS-facing API. It owns the +// v8::CpuProfiler, the active Session, and the ContextHolder ObjectTemplate. +// Lifecycle is driven from JS: new() creates one, start()/stop() manage the +// active Session. +class SampledCpuProfiler : public BaseObject { + public: + static void Initialize(v8::Local target, + v8::Local unused, + v8::Local context, + void* priv); + static void RegisterExternalReferences(ExternalReferenceRegistry* registry); + + SET_NO_MEMORY_INFO() + SET_MEMORY_INFO_NAME(SampledCpuProfiler) + SET_SELF_SIZE(SampledCpuProfiler) + + // Signal-safe extractor. Installed on Isolate via the SampleContextExtractor + // hook. Walks CPED -> AsyncContextFrame -> OrderedHashMap to find the + // current ContextHolder, captures a strong ref into the Session's context + // buffer, and returns the slot index packed as a void*. + static void* ExtractContext(v8::Isolate* isolate); + + // GC prologue/epilogue callbacks installed on the isolate at Start. The + // prologue captures v8::LookupCpedMapAlignedPointer's result while the heap + // is still consistent and stashes it on the binding; the extractor consults + // it during GC instead of walking the heap. + static void OnGCPrologue(v8::Isolate* isolate, + v8::GCType type, + v8::GCCallbackFlags flags, + void* data); + static void OnGCEpilogue(v8::Isolate* isolate, + v8::GCType type, + v8::GCCallbackFlags flags, + void* data); + + // Public so BaseObject's weak callback can `delete this`. Cleans up an + // active profile session if the JS handle was dropped without calling + // stop()/stopAndCapture(); disposes the underlying v8::CpuProfiler. + ~SampledCpuProfiler(); + + private: + SampledCpuProfiler(Environment* env, + v8::Local object, + v8::Local holder_template, + v8::Local als_resource_key, + size_t context_buffer_size, + int sampling_interval_us, + uint32_t max_samples, + bool with_context); + + // JS-facing entry points. + static void New(const v8::FunctionCallbackInfo& args); + static void Start(const v8::FunctionCallbackInfo& args); + // Stops V8's CpuProfiler, serializes the resulting profile to JSON via + // CpuProfile::Serialize, and returns the string. This is the original + // v8.startCpuProfile().handle.stop() behavior. + static void Stop(const v8::FunctionCallbackInfo& args); + // Stops V8's CpuProfiler and returns a structured object tree + // (functionName, scriptName, lineNumber, columnNumber, hitCount, contexts, + // children). + static void StopAndCapture(const v8::FunctionCallbackInfo& args); + // Stops V8's CpuProfiler, atomically swaps to a fresh Session, restarts the + // CpuProfiler so sampling continues with minimal gap, and returns the tree + // for the just-finished session. Used for continuous profiling. + static void Snapshot(const v8::FunctionCallbackInfo& args); + static void CreateContextHolder( + const v8::FunctionCallbackInfo& args); + + // Helper for StopJson / StopAndCapture / Snapshot. Stops the active V8 + // profile, atomically detaches the Session from the extractor's view (so + // signal-handler invocations bail). Returns the V8 profile pointer (caller + // owns; must Delete()) and the detached Session (caller owns; must delete). + // If `restart` is true, also allocates a fresh Session and restarts V8. + // Returns false if the profiler was not running. + bool StopActiveSession(bool restart, + v8::CpuProfile** out_profile, + Session** out_session); + + // Builds and returns the structured tree for the given profile + session + // pair. Frees both before returning. + v8::Local BuildAndFree(v8::CpuProfile* profile, Session* session); + + v8::CpuProfiler* cpu_profiler_ = nullptr; + + // Owner of the current Session. Reads from the signal handler use the + // atomic; this struct is destroyed only after v8::CpuProfiler::Stop() has + // joined the sampling thread, so no extractor invocation can outlive it. + std::atomic active_session_{nullptr}; + + v8::Global holder_template_; + // The key under which the current ContextHolder lives in the + // AsyncContextFrame map (typically the AsyncLocalStorage instance itself). + v8::Global als_resource_key_; + + // Pointer to the persistent-handle slot inside `als_resource_key_` that + // holds the ALS key's tagged pointer. The slot's address is stable for the + // Global<>'s lifetime; V8 keeps the slot's contents GC-coherently updated + // during compaction. The signal handler dereferences this pointer at each + // sample to obtain the current tagged address of the ALS key, then hands + // it to v8::LookupCpedMapAlignedPointer. Set once at construction. + uintptr_t* als_key_slot_ = nullptr; + + size_t context_buffer_size_; + int sampling_interval_us_; + uint32_t max_samples_; + bool with_context_; + v8::ProfilerId profile_id_ = 0; + + // GC-safety state. v8::LookupCpedMapAlignedPointer walks V8 heap structures + // and is unsafe to invoke during GC. We install GC prologue/epilogue + // callbacks at Start: the prologue calls the helper while the heap is still + // consistent and stashes the result in `gc_cached_aligned_ptr_`; the + // epilogue clears it. The signal-handler extractor consults `in_gc_` and + // sources from the cache rather than calling the helper while GC is active. + std::atomic in_gc_{false}; + std::atomic gc_cached_aligned_ptr_{nullptr}; +}; + +} // namespace cpu_profiler +} // namespace node + +#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#endif // SRC_NODE_CPU_PROFILER_H_ diff --git a/src/node_v8.cc b/src/node_v8.cc index b49c29443a4287..b89aacbcc5ca31 100644 --- a/src/node_v8.cc +++ b/src/node_v8.cc @@ -248,40 +248,6 @@ void SetFlagsFromString(const FunctionCallbackInfo& args) { V8::SetFlagsFromString(flags.out(), flags.length()); } -void StartCpuProfile(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - Isolate* isolate = env->isolate(); - CpuProfileOptions options = ParseCpuProfileOptions(args); - CpuProfilingResult result = env->StartCpuProfile(options); - if (result.status == CpuProfilingStatus::kErrorTooManyProfilers) { - return THROW_ERR_CPU_PROFILE_TOO_MANY(isolate, - "There are too many CPU profiles"); - } else if (result.status == CpuProfilingStatus::kStarted) { - args.GetReturnValue().Set(Number::New(isolate, result.id)); - } -} - -void StopCpuProfile(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - Isolate* isolate = env->isolate(); - CHECK(args[0]->IsUint32()); - uint32_t profile_id = args[0]->Uint32Value(env->context()).FromJust(); - CpuProfile* profile = env->StopCpuProfile(profile_id); - if (!profile) { - return THROW_ERR_CPU_PROFILE_NOT_STARTED(isolate, - "CPU profile not started"); - } - auto json_out_stream = std::make_unique(); - profile->Serialize(json_out_stream.get(), - CpuProfile::SerializationFormat::kJSON); - profile->Delete(); - Local ret; - if (ToV8Value(env->context(), json_out_stream->out_stream().str(), isolate) - .ToLocal(&ret)) { - args.GetReturnValue().Set(ret); - } -} - void StartHeapProfile(const FunctionCallbackInfo& args) { Isolate* isolate = args.GetIsolate(); auto options = ParseHeapProfileOptions(args); @@ -767,8 +733,6 @@ void Initialize(Local target, // Export symbols used by v8.setFlagsFromString() SetMethod(context, target, "setFlagsFromString", SetFlagsFromString); - SetMethod(context, target, "startCpuProfile", StartCpuProfile); - SetMethod(context, target, "stopCpuProfile", StopCpuProfile); SetMethod(context, target, "startHeapProfile", StartHeapProfile); SetMethod(context, target, "stopHeapProfile", StopHeapProfile); @@ -834,8 +798,6 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(GetCppHeapStatistics); registry->Register(IsStringOneByteRepresentation); registry->Register(fast_is_string_one_byte_representation_); - registry->Register(StartCpuProfile); - registry->Register(StopCpuProfile); registry->Register(StartHeapProfile); registry->Register(StopHeapProfile); } From 4c3d7f427f51adf1b2290fc8e809398f0aa32515 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 7 May 2026 07:40:57 +0200 Subject: [PATCH 3/4] Add tests --- .../test-v8-sampled-cpu-profiler-isolates.js | 72 +++++++ test/parallel/test-v8-sampled-cpu-profiler.js | 195 ++++++++++++++++++ 2 files changed, 267 insertions(+) create mode 100644 test/parallel/test-v8-sampled-cpu-profiler-isolates.js create mode 100644 test/parallel/test-v8-sampled-cpu-profiler.js diff --git a/test/parallel/test-v8-sampled-cpu-profiler-isolates.js b/test/parallel/test-v8-sampled-cpu-profiler-isolates.js new file mode 100644 index 00000000000000..84c58b74e2c9ee --- /dev/null +++ b/test/parallel/test-v8-sampled-cpu-profiler-isolates.js @@ -0,0 +1,72 @@ +'use strict'; + +// Verifies that v8.startCpuProfile() with { withContext: true } maintains +// per-isolate state: a profile in the main isolate and a profile in a Worker +// isolate run concurrently without their contexts crossing over. + +const common = require('../common'); +const assert = require('assert'); +const { Worker, isMainThread, parentPort, workerData } = + require('worker_threads'); +const v8 = require('v8'); + +function burnCpu(ms) { + const end = Date.now() + ms; + // eslint-disable-next-line no-empty + while (Date.now() < end) {} +} + +function collectContexts(node) { + const out = []; + if (Array.isArray(node.contexts)) { + for (const c of node.contexts) out.push(c.context); + } + for (const child of node.children || []) { + out.push(...collectContexts(child)); + } + return out; +} + +if (!isMainThread) { + // Worker side: profile in this isolate with our own marker. + const handle = v8.startCpuProfile({ + withContext: true, + intervalMicros: 500, + }); + const marker = workerData.marker; + handle.runWithContext(marker, () => burnCpu(120)); + const profile = handle.stopAndCapture(); + const ctxs = collectContexts(profile.topDownRoot); + parentPort.postMessage({ + sampleCount: ctxs.length, + distinctMarkers: [...new Set(ctxs)], + }); + return; +} + +// Main thread: profile concurrently in this isolate. +const w = new Worker(__filename, { workerData: { marker: 'worker-marker' } }); + +const handle = v8.startCpuProfile({ + withContext: true, + intervalMicros: 500, +}); +const mainMarker = 'main-marker'; +handle.runWithContext(mainMarker, () => burnCpu(120)); + +w.on('message', common.mustCall((workerResult) => { + const profile = handle.stopAndCapture(); + const mainCtxs = collectContexts(profile.topDownRoot); + + assert.ok(mainCtxs.length > 0, + 'main isolate profile should have context-bearing samples'); + for (const ctx of mainCtxs) { + assert.strictEqual(ctx, mainMarker, + `main isolate leaked: ${ctx} (expected ${mainMarker})`); + } + + assert.ok(workerResult.sampleCount > 0, + 'worker isolate profile should have context-bearing samples'); + assert.deepStrictEqual(workerResult.distinctMarkers, ['worker-marker'], + `worker isolate leaked: ${workerResult.distinctMarkers}`); +})); diff --git a/test/parallel/test-v8-sampled-cpu-profiler.js b/test/parallel/test-v8-sampled-cpu-profiler.js new file mode 100644 index 00000000000000..c7cf56c878dd51 --- /dev/null +++ b/test/parallel/test-v8-sampled-cpu-profiler.js @@ -0,0 +1,195 @@ +// Flags: --expose-gc + +'use strict'; + +require('../common'); +const assert = require('assert'); +const v8 = require('v8'); + +// Walk the result tree and collect every node's contexts array (if present). +function collectContexts(node) { + const out = []; + if (Array.isArray(node.contexts)) { + for (const c of node.contexts) { + out.push({ context: c.context, timestamp: c.timestamp }); + } + } + for (const child of node.children || []) { + out.push(...collectContexts(child)); + } + return out; +} + +function burnCpu(ms) { + const end = Date.now() + ms; + // eslint-disable-next-line no-empty + while (Date.now() < end) {} +} + +(async function main() { + // Test 1: backwards-compat — v8.startCpuProfile() with no args returns a + // handle whose stop() yields a JSON string. + { + const handle = v8.startCpuProfile(); + burnCpu(20); + const json = handle.stop(); + assert.strictEqual(typeof json, 'string'); + assert.ok(json.length > 0); + JSON.parse(json); // valid JSON + // Calling stop() again is a no-op. + assert.strictEqual(handle.stop(), undefined); + } + + // Test 2: stopAndCapture without context — returns object tree shape, no + // contexts arrays anywhere because withContext was not enabled. + { + const handle = v8.startCpuProfile(); + burnCpu(40); + const profile = handle.stopAndCapture(); + assert.strictEqual(typeof profile, 'object'); + assert.strictEqual(typeof profile.startTime, 'number'); + assert.strictEqual(typeof profile.endTime, 'number'); + assert.strictEqual(typeof profile.droppedContexts, 'number'); + assert.strictEqual(typeof profile.topDownRoot, 'object'); + const ctxs = collectContexts(profile.topDownRoot); + assert.strictEqual(ctxs.length, 0); + } + + // Test 3: with withContext, samples taken inside runWithContext carry the + // context value. + { + const handle = v8.startCpuProfile({ + withContext: true, + contextBufferSize: 4096, + intervalMicros: 500, + }); + const marker = { which: 'phase-A' }; + handle.runWithContext(marker, () => { + burnCpu(80); + }); + const profile = handle.stopAndCapture(); + const ctxs = collectContexts(profile.topDownRoot); + assert.ok(ctxs.length > 0); + for (const { context } of ctxs) { + assert.strictEqual(context, marker); + } + } + + // Test 4: context propagates across an awaited continuation. + { + const handle = v8.startCpuProfile({ + withContext: true, + intervalMicros: 500, + }); + const marker = { which: 'phase-B' }; + await handle.runWithContext(marker, async () => { + burnCpu(40); + await new Promise((resolve) => setImmediate(resolve)); + burnCpu(40); + }); + const profile = handle.stopAndCapture(); + const ctxs = collectContexts(profile.topDownRoot); + assert.ok(ctxs.length > 0); + for (const { context } of ctxs) { + assert.strictEqual(context, marker); + } + } + + // Test 5: snapshot() returns the just-captured tree and continues sampling + // for the next snapshot. Uses different markers on either side to verify + // sessions don't bleed into one another. + { + const handle = v8.startCpuProfile({ + withContext: true, + intervalMicros: 500, + }); + const markerA = { phase: 'A' }; + const markerB = { phase: 'B' }; + handle.runWithContext(markerA, () => burnCpu(60)); + const snapA = handle.snapshot(); + handle.runWithContext(markerB, () => burnCpu(60)); + const snapB = handle.stopAndCapture(); + + const ctxsA = collectContexts(snapA.topDownRoot); + const ctxsB = collectContexts(snapB.topDownRoot); + assert.ok(ctxsA.length > 0, 'snapshot A should have context-bearing samples'); + assert.ok(ctxsB.length > 0, 'snapshot B should have context-bearing samples'); + for (const { context } of ctxsA) { + assert.strictEqual(context, markerA, 'snapshot A leaked phase B context'); + } + for (const { context } of ctxsB) { + assert.strictEqual(context, markerB, 'snapshot B leaked phase A context'); + } + } + + // Test 6: buffer-full path increments droppedContexts. + { + const handle = v8.startCpuProfile({ + withContext: true, + contextBufferSize: 4, + intervalMicros: 500, + }); + handle.runWithContext({ which: 'phase-D' }, () => burnCpu(150)); + const profile = handle.stopAndCapture(); + assert.ok(profile.droppedContexts > 0, + `expected droppedContexts > 0, got ${profile.droppedContexts}`); + } + + // Test 7: runWithContext / enterWithContext throw when withContext is not + // set. + { + const handle = v8.startCpuProfile(); + assert.throws(() => handle.runWithContext({}, () => {}), + /requires \{ withContext: true \}/); + assert.throws(() => handle.enterWithContext({}), + /requires \{ withContext: true \}/); + handle.stop(); + } + + // Test 8: stop() / stopAndCapture() on already-stopped is no-op. + { + const handle = v8.startCpuProfile(); + handle.stop(); + assert.strictEqual(handle.stop(), undefined); + assert.strictEqual(handle.stopAndCapture(), undefined); + } + + // Test 9: snapshot() after stop throws. + { + const handle = v8.startCpuProfile({ withContext: true }); + handle.stopAndCapture(); + assert.throws(() => handle.snapshot(), /has been stopped/); + } + + // Test 10: dropping the handle without stopping is recoverable. The + // destructor's best-effort teardown runs at GC and a subsequent + // startCpuProfile() must succeed. + { + (() => { + const dropped = v8.startCpuProfile({ withContext: true }); + dropped.runWithContext({ phase: 'orphaned' }, () => burnCpu(20)); + // Intentionally do not call stop / stopAndCapture / snapshot. + })(); + if (typeof globalThis.gc === 'function') globalThis.gc(); + // Whether or not GC has reclaimed the handle yet, a brand new profiler + // must start cleanly. (If GC hasn't run, the previous handle is still + // alive and t_active_profiler is set — that's the existing-active error.) + let started = false; + try { + const fresh = v8.startCpuProfile(); + fresh.stop(); + started = true; + } catch (e) { + // Acceptable iff GC didn't run; otherwise this is a real failure. + assert.match(e.message, /already active/); + } + if (typeof globalThis.gc !== 'function' && !started) { + console.warn( + 'Note: --expose-gc was not enabled; orphaned-handle GC path ' + + 'was not exercised.'); + } + } +})().then( + () => console.log('all tests passed'), + (err) => { console.error(err); process.exit(1); } +); From d64a519f78a49fcab0c2de48821227e39dc2b23e Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 7 May 2026 09:48:37 +0200 Subject: [PATCH 4/4] Update API docs --- doc/api/v8.md | 116 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 113 insertions(+), 3 deletions(-) diff --git a/doc/api/v8.md b/doc/api/v8.md index da225a333ddcac..f5a34054d6313d 100644 --- a/doc/api/v8.md +++ b/doc/api/v8.md @@ -1603,9 +1603,85 @@ added: - v24.12.0 --> -* Returns: {string} +* Returns: {string|undefined} -Stopping collecting the profile and return the profile data. +Stop collecting the profile and return the profile data as a JSON-encoded +string. Calling `stop()` after the handle has already been stopped returns +`undefined`. + +### `syncCpuProfileHandle.stopAndCapture()` + + + +* Returns: {Object|undefined} + +Stop collecting the profile and return a structured profile object instead +of a JSON string. Calling `stopAndCapture()` after the handle has already +been stopped returns `undefined`. + +The returned object has shape: + +* `startTime` {number} Microseconds since V8's clock origin. +* `endTime` {number} Microseconds since V8's clock origin. +* `droppedContexts` {number} Number of samples for which a context could not + be recorded because the per-session context buffer was full. Always `0` + when the profile was started without `withContext`. +* `topDownRoot` {Object} Root of the recursive call tree: + * `functionName` {string} + * `scriptName` {string} + * `lineNumber` {number} + * `columnNumber` {number} + * `hitCount` {number} Number of samples that landed at this node. + * `contexts` {Array} Optional. Present only when at least one sample at + this node carried a context. Each element is `{ context, timestamp }`. + * `children` {Array} Child nodes (same shape). + +### `syncCpuProfileHandle.snapshot()` + + + +* Returns: {Object} + +Capture a profile of samples taken since the last `start` or `snapshot` +call, and continue sampling with a fresh internal session. Returns the +same shape as [`stopAndCapture()`][]. Useful for continuous profiling: emit +a snapshot every minute without losing samples between sessions. Throws +`ERR_INVALID_STATE` if the handle has already been stopped. + +### `syncCpuProfileHandle.runWithContext(value, fn[, ...args])` + + + +* `value` {any} Arbitrary JavaScript value to associate with samples + captured during the execution of `fn`. +* `fn` {Function} Function to invoke. +* `...args` {any} Arguments forwarded to `fn`. +* Returns: {any} The return value of `fn(...args)`. + +Run `fn(...args)` with `value` recorded as the context for any samples +captured during its synchronous execution and across awaited continuations +that propagate the context (via {AsyncLocalStorage}). Throws if the profile +was started without `withContext: true`. + +### `syncCpuProfileHandle.enterWithContext(value)` + + + +* `value` {any} Arbitrary JavaScript value to associate with subsequent + samples in the current asynchronous scope. + +Set `value` as the current sample context for the rest of the active +{AsyncLocalStorage} scope. Mirrors the naming of +[`asyncLocalStorage.enterWith()`][]. Throws if the profile was started +without `withContext: true`. ### `syncCpuProfileHandle[Symbol.dispose]()` @@ -1615,7 +1691,8 @@ added: - v24.12.0 --> -Stopping collecting the profile and the profile will be discarded. +Stops collecting the profile (equivalent to `stop()`); the profile is +discarded. ## Class: `SyncHeapProfileHandle` @@ -1783,6 +1860,19 @@ added: * `sampleInterval` {number} Requested sampling interval in milliseconds. **Default:** `0`. * `maxBufferSize` {integer} Maximum number of samples to keep before older entries are discarded. **Default:** `4294967295`. + * `withContext` {boolean} If `true`, the returned handle exposes + [`runWithContext()`][] and [`enterWithContext()`][] for associating + arbitrary JavaScript values with samples captured during their execution + scope. The values are surfaced on each sample of the structured profile + returned by [`stopAndCapture()`][] or [`snapshot()`][]. When `false` + (default), no per-sample context tracking is performed and there is no + extractor overhead. **Default:** `false`. + * `contextBufferSize` {integer} Maximum number of samples that can carry a + context value during a single profile session. Once exceeded, further + samples are recorded with no associated context and the + `droppedContexts` counter on the result is incremented. Only meaningful + when `withContext` is `true`. **Default:** `60000` (sufficient for 60 + seconds of sampling at the default 10 ms interval). * Returns: {SyncCPUProfileHandle} Starting a CPU profile then return a `SyncCPUProfileHandle` object. @@ -1794,6 +1884,21 @@ const profile = handle.stop(); console.log(profile); ``` +The returned handle can also produce a structured object tree instead of the +JSON string, and can carry a per-sample JavaScript context that propagates +across asynchronous boundaries via {AsyncLocalStorage}: + +```cjs +const handle = v8.startCpuProfile({ withContext: true }); +handle.runWithContext({ requestId: 42 }, () => { + // Synchronous and propagated-async work here is sampled with + // { requestId: 42 } associated with each sample. +}); +const profile = handle.stopAndCapture(); +// profile.topDownRoot is a recursive tree; each node may carry +// `contexts: [{ context, timestamp }, ...]`. +``` + ## `v8.startHeapProfile([options])`