Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 74 additions & 5 deletions src/EventPayloadBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ class EventPayloadBuilder
'additionalData',
];

/**
* {@see transformForJsonRecursive} max nesting — stops runaway recursion; combined with array reference
* stack for the same circular detection as {@see Serializer::prepare()}.
*/
private const TRANSFORM_JSON_MAX_DEPTH = 32;

/**
* EventPayloadFactory constructor.
*/
Expand Down Expand Up @@ -158,9 +164,15 @@ private function normalizeBacktrace(array $stack): array
$functionName = (string) $frame['functionName'];
}

$arguments = $this->buildArgumentsList($frame);

$additional = [];
foreach ($frame as $key => $value) {
if (!in_array($key, self::ALLOWED_KEYS, true)) {
// Mapped to `arguments` via StacktraceFrameBuilder / string list; do not dump raw `args` here
if ($key === 'args') {
continue;
}
// Drop heavy/unserializable objects from 'object' field; store class name instead
if ($key === 'object') {
$value = is_object($value) ? get_class($value) : $value;
Expand All @@ -176,9 +188,7 @@ private function normalizeBacktrace(array $stack): array
'column' => null,
'sourceCode' => isset($frame['sourceCode']) && is_array($frame['sourceCode']) ? $frame['sourceCode'] : null,
'function' => $functionName,
// Keep arguments only if it already looks like desired string[]; otherwise omit
// Limit argument processing to first 10 items to avoid performance issues
'arguments' => (isset($frame['arguments']) && is_array($frame['arguments'])) ? array_values(array_map('strval', array_slice($frame['arguments'], 0, 10))) : [],
'arguments' => $arguments,
'additionalData'=> $additional,
]);
}
Expand Down Expand Up @@ -223,19 +233,78 @@ private function sanitizeArrayKeys($value)
}

/**
* Transform values to JSON-serializable representation
* Build Hawk `arguments`: string list like "name = serializedValue" (from raw `args` via Serializer).
* Limits the number of lines ({@see StacktraceFrameBuilder::MAX_FRAME_ARGUMENTS}). Serialized values are not
* length-truncated; only param names are capped ({@see StacktraceFrameBuilder::formatTruncatedArgumentLine});
* prebuilt strings are split on the first `" = "` with {@see StacktraceFrameBuilder::truncatePrebuiltArgumentLine}.
Comment on lines +235 to +239
Copy link

Copilot AI Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR description mentions capping/truncating each stack argument line by byte length, but buildArgumentsList() explicitly keeps serialized values unbounded and the tests preserve very long prebuilt argument lines. This can still produce oversized events. Either implement a per-line byte cap (for both prebuilt arguments and formatted args) or update the PR description/contract expectations to match the current behavior.

Copilot uses AI. Check for mistakes.
*
* @param array $frame
*
* @return array
*/
private function buildArgumentsList(array $frame): array
{
$max = StacktraceFrameBuilder::MAX_FRAME_ARGUMENTS;

if (isset($frame['arguments']) && is_array($frame['arguments'])) {
$out = [];
foreach (array_slice($frame['arguments'], 0, $max) as $line) {
$out[] = StacktraceFrameBuilder::truncatePrebuiltArgumentLine((string) $line);
}

return $out;
}

if (!empty($frame['args']) && is_array($frame['args'])) {
$out = [];
foreach (array_slice($this->stacktraceFrameBuilder->getFormattedArguments($frame), 0, $max) as $line) {
$out[] = (string) $line;
}

return $out;
}

return [];
}

/**
* Transform frame extra fields for JSON — scalars, shallow objects, arrays with depth/cycle limits.
*
* @param mixed $value
*
* @return mixed
*/
private function transformForJson($value)
{
$stack = [];

return $this->transformForJsonRecursive($value, 0, $stack);
}

/**
* @param mixed $value
*
* @return mixed
*/
private function transformForJsonRecursive($value, int $depth, array &$stack)
{
if ($depth > self::TRANSFORM_JSON_MAX_DEPTH) {
return '[max depth]';
}

if (is_array($value)) {
foreach ($stack as $ancestor) {
if ($value === $ancestor) {
return '[circular]';
}
}

$stack[] = $value;
$result = [];
foreach ($value as $k => $v) {
$result[$k] = $this->transformForJson($v);
$result[$k] = $this->transformForJsonRecursive($v, $depth + 1, $stack);
}
Comment on lines 295 to 306
Copy link

Copilot AI Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

transformForJson() only enforces a max depth; it does not detect circular/self-referential arrays. For values like $GLOBALS (or any array with a self-reference), this will repeatedly re-walk the same structure until the depth limit and can still produce a very large nested payload and high CPU. Consider adding circular reference detection (similar to Serializer::prepare()), returning a sentinel like [circular] when an ancestor is encountered.

Copilot uses AI. Check for mistakes.
array_pop($stack);

return $result;
}
Expand Down
38 changes: 32 additions & 6 deletions src/Serializer.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ final class Serializer
*/
public function serializeValue($value): string
{
$encoded = json_encode($this->prepare($value), JSON_UNESCAPED_UNICODE);
$flags = JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT;
$stack = [];
$encoded = json_encode($this->prepare($value, 0, $stack), $flags);

if ($encoded === false) {
return '';
Expand All @@ -29,29 +31,53 @@ public function serializeValue($value): string
return $encoded;
}

/**
* Max nesting depth to avoid runaway recursion on $GLOBALS and similar circular structures.
*/
private const PREPARE_MAX_DEPTH = 32;

/**
* Prepares value for encoding
*
* @param $value
* @param mixed $value
* @param int $depth
* @param array $stack reference path (arrays only) to detect $GLOBALS-style cycles
*
* @return array|mixed|string
*/
private function prepare($value)
private function prepare($value, int $depth, array &$stack)
{
if ($depth > self::PREPARE_MAX_DEPTH) {
return '[max depth]';
}

if (!is_object($value) && (is_array($value) || is_iterable($value))) {
if (is_array($value)) {
foreach ($stack as $ancestor) {
if ($value === $ancestor) {
return '[circular]';
}
}
$stack[] = $value;
}

$result = [];
foreach ($value as $key => $subValue) {
if (is_array($subValue) || is_iterable($subValue)) {
$result[$key] = $this->prepare($subValue);
$result[$key] = $this->prepare($subValue, $depth + 1, $stack);
} else {
$result[$key] = $this->transform($subValue);
}
}

if (is_array($value)) {
array_pop($stack);
}

return $result;
} else {
return $this->transform($value);
}

return $this->transform($value);
}

/**
Expand Down
120 changes: 112 additions & 8 deletions src/StacktraceFrameBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,17 @@
*/
final class StacktraceFrameBuilder
{
/**
* Max function arguments to include per frame (payload size, CPU, Hawk limits).
*/
public const MAX_FRAME_ARGUMENTS = 20;

/**
* Max UTF-8 byte length for the argument name (left-hand side only).
* Serialized values from {@see Serializer::serializeValue()} are not length-capped so JSON stays intact.
*/
public const MAX_ARGUMENT_NAME_BYTES = 256;

/**
* @var Serializer
*/
Expand Down Expand Up @@ -183,6 +194,19 @@ private function composeFunctionName(array $frame): string
return $functionName;
}

/**
* Format `args` from a raw debug_backtrace() frame to Hawk `arguments` (list of "name = value" strings).
* Public so {@see EventPayloadBuilder} can map `args` without duplicating logic.
*
* @param array $frame
*
* @return array
*/
public function getFormattedArguments(array $frame): array
{
return $this->getArgs($frame);
}

/**
* Get function arguments for a frame
*
Expand Down Expand Up @@ -216,6 +240,9 @@ private function getArgs(array $frame): array
*/
if (!$reflection) {
foreach ($frame['args'] as $index => $value) {
if ($index >= self::MAX_FRAME_ARGUMENTS) {
break;
}
$arguments['arg' . $index] = $value;
}
} else {
Expand All @@ -231,6 +258,10 @@ private function getArgs(array $frame): array
$paramName = $reflectionParam->getName();
$paramPosition = $reflectionParam->getPosition();

if ($paramPosition >= self::MAX_FRAME_ARGUMENTS) {
break;
}

if (isset($frame['args'][$paramPosition])) {
$arguments[$paramName] = $frame['args'][$paramPosition];
}
Expand All @@ -243,18 +274,91 @@ private function getArgs(array $frame): array
*/
$newArguments = [];
foreach ($arguments as $name => $value) {
$value = $this->serializer->serializeValue($value);
$serialized = $this->serializer->serializeValue($value);
$newArguments[] = self::formatTruncatedArgumentLine((string) $name, $serialized);
}

try {
$newArguments[] = sprintf('%s = %s', $name, $value);
} catch (\Exception $e) {
// Ignore unknown types
}
return $newArguments;
}

/**
* Build `"name = value"` — only the name may be shortened; serialized value is kept whole (valid JSON, etc.).
*/
public static function formatTruncatedArgumentLine(string $name, string $serializedValue): string
{
$namePart = self::truncateUtf8StringToMaxBytes($name, self::MAX_ARGUMENT_NAME_BYTES);

return $namePart . ' = ' . $serializedValue;
}
Comment on lines +287 to +292
Copy link

Copilot AI Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

formatTruncatedArgumentLine() only truncates the argument name; the serialized value (and therefore the full "name = value" line) can still be arbitrarily large (e.g., very long strings/arrays), which can defeat the goal of keeping payloads small. Consider enforcing an explicit max byte length for the entire line and applying it consistently to both formatted and prebuilt argument lines.

Copilot uses AI. Check for mistakes.

/**
* Normalize one prebuilt `name = serializedValue` line: split at the first `" = "`, cap name only; value unchanged.
* Lines without `" = "` are returned as-is (no length limit).
*/
public static function truncatePrebuiltArgumentLine(string $line): string
{
$separator = ' = ';
$position = strpos($line, $separator);
if ($position === false) {
return $line;
Comment on lines +296 to +303
Copy link

Copilot AI Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

truncatePrebuiltArgumentLine() returns lines without the " = " delimiter unchanged (no length cap). That allows a single prebuilt argument entry to be extremely large and bypass any payload-size protections. Consider applying the same max-byte truncation to the whole line even when the delimiter is missing.

Suggested change
* Lines without `" = "` are returned as-is (no length limit).
*/
public static function truncatePrebuiltArgumentLine(string $line): string
{
$separator = ' = ';
$position = strpos($line, $separator);
if ($position === false) {
return $line;
* Lines without `" = "` are truncated as whole lines to the configured byte cap.
*/
public static function truncatePrebuiltArgumentLine(string $line): string
{
$separator = ' = ';
$position = strpos($line, $separator);
if ($position === false) {
return self::truncateUtf8StringToMaxBytes($line, self::MAX_ARGUMENT_NAME_BYTES);

Copilot uses AI. Check for mistakes.
}

$nameRaw = substr($line, 0, $position);
$valueRaw = substr($line, $position + strlen($separator));

return self::formatTruncatedArgumentLine($nameRaw, $valueRaw);
}

/**
* Longest prefix of $string whose byte length is at most $maxBytes and whose encoding is valid UTF-8.
* Used when {@see mb_strcut} is unavailable so {@see substr} never leaves a split codepoint (invalid JSON).
*/
private static function utf8SafePrefixMaxBytes(string $string, int $maxBytes): string
{
if ($maxBytes <= 0) {
return '';
}

if (strlen($string) <= $maxBytes) {
return $string;
}

$s = substr($string, 0, $maxBytes);
while ($s !== '' && preg_match('//u', $s) !== 1) {
$s = substr($s, 0, -1);
}

return $s;
}

/**
* Shorten text to byte length (`...` suffix when clipped). Unicode-safe: {@see mb_strcut} when available,
* otherwise {@see utf8SafePrefixMaxBytes} (valid UTF-8 prefix, no split codepoints).
*/
public static function truncateUtf8StringToMaxBytes(string $string, int $maxBytes): string
{
if ($maxBytes <= 3) {
return substr('...', 0, $maxBytes);
}

if (strlen($string) <= $maxBytes) {
return $string;
}

$cutLength = $maxBytes - 3;
if (function_exists('mb_strcut')) {
return mb_strcut($string, 0, $cutLength, 'UTF-8') . '...';
}

$arguments = $newArguments;
return self::utf8SafePrefixMaxBytes($string, $cutLength) . '...';
}

return $arguments;
/**
* @deprecated Use {@see truncateUtf8StringToMaxBytes} or {@see formatTruncatedArgumentLine}
*/
public static function truncateArgumentLineBytes(string $line, int $maxBytes): string
{
return self::truncateUtf8StringToMaxBytes($line, $maxBytes);
}

/**
Expand Down
Loading
Loading