From 99986469eff8ae37c5262b558bea6f980a9e39b3 Mon Sep 17 00:00:00 2001 From: Erica Pisani Date: Mon, 16 Mar 2026 14:18:47 +0000 Subject: [PATCH 1/2] feat(anthropic): Record finish reasons in AI monitoring spans Capture the stop_reason from Anthropic API responses and set it as GEN_AI_RESPONSE_FINISH_REASONS span data. Works for both streaming (via MessageDeltaEvent) and non-streaming responses. Co-Authored-By: Claude --- sentry_sdk/integrations/anthropic.py | 45 +++++++++++++------ .../integrations/anthropic/test_anthropic.py | 19 +++++--- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index bc208ac4f5..4f2baadb8f 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -159,7 +159,8 @@ def _collect_ai_data( usage: "_RecordedUsage", content_blocks: "list[str]", response_id: "str | None" = None, -) -> "tuple[str | None, _RecordedUsage, list[str], str | None]": + finish_reasons: "list[str] | None" = None, +) -> "tuple[str | None, _RecordedUsage, list[str], str | None, list[str] | None]": """ Collect model information, token usage, and collect content blocks from the AI streaming response. """ @@ -197,6 +198,7 @@ def _collect_ai_data( usage, content_blocks, response_id, + finish_reasons, ) # Counterintuitive, but message_delta contains cumulative token counts :) @@ -221,18 +223,18 @@ def _collect_ai_data( usage.cache_read_input_tokens = cache_read_input_tokens # TODO: Record event.usage.server_tool_use - return ( - model, - usage, - content_blocks, - response_id, - ) + stop_reason = getattr(event.delta, "stop_reason", None) + if stop_reason is not None: + finish_reasons = [stop_reason] + + return (model, usage, content_blocks, response_id, finish_reasons) return ( model, usage, content_blocks, response_id, + finish_reasons, ) @@ -411,6 +413,7 @@ def _wrap_synchronous_message_iterator( usage = _RecordedUsage() content_blocks: "list[str]" = [] response_id = None + finish_reasons = None try: for event in iterator: @@ -430,12 +433,15 @@ def _wrap_synchronous_message_iterator( yield event continue - (model, usage, content_blocks, response_id) = _collect_ai_data( - event, - model, - usage, - content_blocks, - response_id, + (model, usage, content_blocks, response_id, finish_reasons) = ( + _collect_ai_data( + event, + model, + usage, + content_blocks, + response_id, + finish_reasons, + ) ) yield event finally: @@ -459,6 +465,7 @@ def _wrap_synchronous_message_iterator( content_blocks=[{"text": "".join(content_blocks), "type": "text"}], finish_span=True, response_id=response_id, + finish_reasons=finish_reasons, ) @@ -475,6 +482,7 @@ async def _wrap_asynchronous_message_iterator( usage = _RecordedUsage() content_blocks: "list[str]" = [] response_id = None + finish_reasons = None try: async for event in iterator: @@ -499,12 +507,14 @@ async def _wrap_asynchronous_message_iterator( usage, content_blocks, response_id, + finish_reasons, ) = _collect_ai_data( event, model, usage, content_blocks, response_id, + finish_reasons, ) yield event finally: @@ -528,6 +538,7 @@ async def _wrap_asynchronous_message_iterator( content_blocks=[{"text": "".join(content_blocks), "type": "text"}], finish_span=True, response_id=response_id, + finish_reasons=finish_reasons, ) @@ -542,12 +553,15 @@ def _set_output_data( content_blocks: "list[Any]", finish_span: bool = False, response_id: "str | None" = None, + finish_reasons: "list[str] | None" = None, ) -> None: """ Set output data for the span based on the AI response.""" span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model) if response_id is not None: span.set_data(SPANDATA.GEN_AI_RESPONSE_ID, response_id) + if finish_reasons is not None: + span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reasons) if should_send_default_pii() and integration.include_prompts: output_messages: "dict[str, list[Any]]" = { "response": [], @@ -641,6 +655,10 @@ def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A elif hasattr(content_block, "text"): content_blocks.append({"type": "text", "text": content_block.text}) + finish_reasons = None + if getattr(result, "stop_reason", None) is not None: + finish_reasons = [getattr(result, "stop_reason")] + _set_output_data( span=span, integration=integration, @@ -652,6 +670,7 @@ def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A content_blocks=content_blocks, finish_span=True, response_id=getattr(result, "id", None), + finish_reasons=finish_reasons, ) else: span.set_data("unknown_response", True) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 3a854e3a4e..184002ca3a 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -63,6 +63,7 @@ async def __call__(self, *args, **kwargs): role="assistant", content=[TextBlock(type="text", text="Hi, I'm Claude.")], type="message", + stop_reason="end_turn", usage=Usage(input_tokens=10, output_tokens=20), ) @@ -136,6 +137,7 @@ def test_nonstreaming_create_message( assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.asyncio @@ -258,7 +260,7 @@ def test_streaming_create_message( ), ContentBlockStopEvent(type="content_block_stop", index=0), MessageDeltaEvent( - delta=Delta(), + delta=Delta(stop_reason="max_tokens"), usage=MessageDeltaUsage(output_tokens=10), type="message_delta", ), @@ -323,6 +325,7 @@ def test_streaming_create_message( assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] @pytest.mark.parametrize( @@ -373,7 +376,7 @@ def test_stream_messages( ), ContentBlockStopEvent(type="content_block_stop", index=0), MessageDeltaEvent( - delta=Delta(), + delta=Delta(stop_reason="max_tokens"), usage=MessageDeltaUsage(output_tokens=10), type="message_delta", ), @@ -439,6 +442,7 @@ def test_stream_messages( assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] @pytest.mark.asyncio @@ -492,7 +496,7 @@ async def test_streaming_create_message_async( ), ContentBlockStopEvent(type="content_block_stop", index=0), MessageDeltaEvent( - delta=Delta(), + delta=Delta(stop_reason="max_tokens"), usage=MessageDeltaUsage(output_tokens=10), type="message_delta", ), @@ -504,6 +508,7 @@ async def test_streaming_create_message_async( sentry_init( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, + default_integrations=False, send_default_pii=send_default_pii, ) events = capture_events() @@ -559,6 +564,7 @@ async def test_streaming_create_message_async( assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] @pytest.mark.asyncio @@ -1471,14 +1477,15 @@ def test_collect_ai_data_with_input_json_delta(): content_blocks = [] - model, new_usage, new_content_blocks, response_id = _collect_ai_data( - event, model, usage, content_blocks + model, new_usage, new_content_blocks, response_id, finish_reasons = ( + _collect_ai_data(event, model, usage, content_blocks) ) assert model is None assert new_usage.input_tokens == usage.input_tokens assert new_usage.output_tokens == usage.output_tokens assert new_content_blocks == ["test"] assert response_id is None + assert finish_reasons is None @pytest.mark.skipif( @@ -1766,6 +1773,7 @@ def test_nonstreaming_create_message_with_system_prompt( assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.asyncio @@ -1851,6 +1859,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async( assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.parametrize( From e146224f368123b39e766a865ec082fd1d9665ec Mon Sep 17 00:00:00 2001 From: Erica Pisani Date: Tue, 17 Mar 2026 07:24:56 +0000 Subject: [PATCH 2/2] ref(anthropic): Simplify finish_reasons to single finish_reason internally Anthropic only returns a single stop_reason, so track it as a string internally instead of a list. The list wrapping is deferred to _set_output_data where it's set on the span. Also removes an unnecessary getattr guard for event.delta.stop_reason. Co-Authored-By: Claude Opus 4.6 (1M context) --- sentry_sdk/integrations/anthropic.py | 43 ++++++++----------- .../integrations/anthropic/test_anthropic.py | 6 +-- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index 4f2baadb8f..32522a7234 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -159,8 +159,8 @@ def _collect_ai_data( usage: "_RecordedUsage", content_blocks: "list[str]", response_id: "str | None" = None, - finish_reasons: "list[str] | None" = None, -) -> "tuple[str | None, _RecordedUsage, list[str], str | None, list[str] | None]": + finish_reason: "str | None" = None, +) -> "tuple[str | None, _RecordedUsage, list[str], str | None, str | None]": """ Collect model information, token usage, and collect content blocks from the AI streaming response. """ @@ -198,7 +198,7 @@ def _collect_ai_data( usage, content_blocks, response_id, - finish_reasons, + finish_reason, ) # Counterintuitive, but message_delta contains cumulative token counts :) @@ -223,18 +223,17 @@ def _collect_ai_data( usage.cache_read_input_tokens = cache_read_input_tokens # TODO: Record event.usage.server_tool_use - stop_reason = getattr(event.delta, "stop_reason", None) - if stop_reason is not None: - finish_reasons = [stop_reason] + if event.delta.stop_reason is not None: + finish_reason = event.delta.stop_reason - return (model, usage, content_blocks, response_id, finish_reasons) + return (model, usage, content_blocks, response_id, finish_reason) return ( model, usage, content_blocks, response_id, - finish_reasons, + finish_reason, ) @@ -413,7 +412,7 @@ def _wrap_synchronous_message_iterator( usage = _RecordedUsage() content_blocks: "list[str]" = [] response_id = None - finish_reasons = None + finish_reason = None try: for event in iterator: @@ -433,14 +432,14 @@ def _wrap_synchronous_message_iterator( yield event continue - (model, usage, content_blocks, response_id, finish_reasons) = ( + (model, usage, content_blocks, response_id, finish_reason) = ( _collect_ai_data( event, model, usage, content_blocks, response_id, - finish_reasons, + finish_reason, ) ) yield event @@ -465,7 +464,7 @@ def _wrap_synchronous_message_iterator( content_blocks=[{"text": "".join(content_blocks), "type": "text"}], finish_span=True, response_id=response_id, - finish_reasons=finish_reasons, + finish_reason=finish_reason, ) @@ -482,7 +481,7 @@ async def _wrap_asynchronous_message_iterator( usage = _RecordedUsage() content_blocks: "list[str]" = [] response_id = None - finish_reasons = None + finish_reason = None try: async for event in iterator: @@ -507,14 +506,14 @@ async def _wrap_asynchronous_message_iterator( usage, content_blocks, response_id, - finish_reasons, + finish_reason, ) = _collect_ai_data( event, model, usage, content_blocks, response_id, - finish_reasons, + finish_reason, ) yield event finally: @@ -538,7 +537,7 @@ async def _wrap_asynchronous_message_iterator( content_blocks=[{"text": "".join(content_blocks), "type": "text"}], finish_span=True, response_id=response_id, - finish_reasons=finish_reasons, + finish_reason=finish_reason, ) @@ -553,15 +552,15 @@ def _set_output_data( content_blocks: "list[Any]", finish_span: bool = False, response_id: "str | None" = None, - finish_reasons: "list[str] | None" = None, + finish_reason: "str | None" = None, ) -> None: """ Set output data for the span based on the AI response.""" span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model) if response_id is not None: span.set_data(SPANDATA.GEN_AI_RESPONSE_ID, response_id) - if finish_reasons is not None: - span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reasons) + if finish_reason is not None: + span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, [finish_reason]) if should_send_default_pii() and integration.include_prompts: output_messages: "dict[str, list[Any]]" = { "response": [], @@ -655,10 +654,6 @@ def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A elif hasattr(content_block, "text"): content_blocks.append({"type": "text", "text": content_block.text}) - finish_reasons = None - if getattr(result, "stop_reason", None) is not None: - finish_reasons = [getattr(result, "stop_reason")] - _set_output_data( span=span, integration=integration, @@ -670,7 +665,7 @@ def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A content_blocks=content_blocks, finish_span=True, response_id=getattr(result, "id", None), - finish_reasons=finish_reasons, + finish_reason=getattr(result, "stop_reason", None), ) else: span.set_data("unknown_response", True) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 184002ca3a..8b83d2d128 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -1477,15 +1477,15 @@ def test_collect_ai_data_with_input_json_delta(): content_blocks = [] - model, new_usage, new_content_blocks, response_id, finish_reasons = ( - _collect_ai_data(event, model, usage, content_blocks) + model, new_usage, new_content_blocks, response_id, finish_reason = _collect_ai_data( + event, model, usage, content_blocks ) assert model is None assert new_usage.input_tokens == usage.input_tokens assert new_usage.output_tokens == usage.output_tokens assert new_content_blocks == ["test"] assert response_id is None - assert finish_reasons is None + assert finish_reason is None @pytest.mark.skipif(