diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index bc208ac4f5..32522a7234 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -159,7 +159,8 @@ def _collect_ai_data( usage: "_RecordedUsage", content_blocks: "list[str]", response_id: "str | None" = None, -) -> "tuple[str | None, _RecordedUsage, list[str], str | None]": + finish_reason: "str | None" = None, +) -> "tuple[str | None, _RecordedUsage, list[str], str | None, str | None]": """ Collect model information, token usage, and collect content blocks from the AI streaming response. """ @@ -197,6 +198,7 @@ def _collect_ai_data( usage, content_blocks, response_id, + finish_reason, ) # Counterintuitive, but message_delta contains cumulative token counts :) @@ -221,18 +223,17 @@ def _collect_ai_data( usage.cache_read_input_tokens = cache_read_input_tokens # TODO: Record event.usage.server_tool_use - return ( - model, - usage, - content_blocks, - response_id, - ) + if event.delta.stop_reason is not None: + finish_reason = event.delta.stop_reason + + return (model, usage, content_blocks, response_id, finish_reason) return ( model, usage, content_blocks, response_id, + finish_reason, ) @@ -411,6 +412,7 @@ def _wrap_synchronous_message_iterator( usage = _RecordedUsage() content_blocks: "list[str]" = [] response_id = None + finish_reason = None try: for event in iterator: @@ -430,12 +432,15 @@ def _wrap_synchronous_message_iterator( yield event continue - (model, usage, content_blocks, response_id) = _collect_ai_data( - event, - model, - usage, - content_blocks, - response_id, + (model, usage, content_blocks, response_id, finish_reason) = ( + _collect_ai_data( + event, + model, + usage, + content_blocks, + response_id, + finish_reason, + ) ) yield event finally: @@ -459,6 +464,7 @@ def _wrap_synchronous_message_iterator( content_blocks=[{"text": "".join(content_blocks), "type": "text"}], finish_span=True, response_id=response_id, + finish_reason=finish_reason, ) @@ -475,6 +481,7 @@ async def _wrap_asynchronous_message_iterator( usage = _RecordedUsage() content_blocks: "list[str]" = [] response_id = None + finish_reason = None try: async for event in iterator: @@ -499,12 +506,14 @@ async def _wrap_asynchronous_message_iterator( usage, content_blocks, response_id, + finish_reason, ) = _collect_ai_data( event, model, usage, content_blocks, response_id, + finish_reason, ) yield event finally: @@ -528,6 +537,7 @@ async def _wrap_asynchronous_message_iterator( content_blocks=[{"text": "".join(content_blocks), "type": "text"}], finish_span=True, response_id=response_id, + finish_reason=finish_reason, ) @@ -542,12 +552,15 @@ def _set_output_data( content_blocks: "list[Any]", finish_span: bool = False, response_id: "str | None" = None, + finish_reason: "str | None" = None, ) -> None: """ Set output data for the span based on the AI response.""" span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model) if response_id is not None: span.set_data(SPANDATA.GEN_AI_RESPONSE_ID, response_id) + if finish_reason is not None: + span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, [finish_reason]) if should_send_default_pii() and integration.include_prompts: output_messages: "dict[str, list[Any]]" = { "response": [], @@ -652,6 +665,7 @@ def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A content_blocks=content_blocks, finish_span=True, response_id=getattr(result, "id", None), + finish_reason=getattr(result, "stop_reason", None), ) else: span.set_data("unknown_response", True) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 3a854e3a4e..8b83d2d128 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -63,6 +63,7 @@ async def __call__(self, *args, **kwargs): role="assistant", content=[TextBlock(type="text", text="Hi, I'm Claude.")], type="message", + stop_reason="end_turn", usage=Usage(input_tokens=10, output_tokens=20), ) @@ -136,6 +137,7 @@ def test_nonstreaming_create_message( assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.asyncio @@ -258,7 +260,7 @@ def test_streaming_create_message( ), ContentBlockStopEvent(type="content_block_stop", index=0), MessageDeltaEvent( - delta=Delta(), + delta=Delta(stop_reason="max_tokens"), usage=MessageDeltaUsage(output_tokens=10), type="message_delta", ), @@ -323,6 +325,7 @@ def test_streaming_create_message( assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] @pytest.mark.parametrize( @@ -373,7 +376,7 @@ def test_stream_messages( ), ContentBlockStopEvent(type="content_block_stop", index=0), MessageDeltaEvent( - delta=Delta(), + delta=Delta(stop_reason="max_tokens"), usage=MessageDeltaUsage(output_tokens=10), type="message_delta", ), @@ -439,6 +442,7 @@ def test_stream_messages( assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] @pytest.mark.asyncio @@ -492,7 +496,7 @@ async def test_streaming_create_message_async( ), ContentBlockStopEvent(type="content_block_stop", index=0), MessageDeltaEvent( - delta=Delta(), + delta=Delta(stop_reason="max_tokens"), usage=MessageDeltaUsage(output_tokens=10), type="message_delta", ), @@ -504,6 +508,7 @@ async def test_streaming_create_message_async( sentry_init( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, + default_integrations=False, send_default_pii=send_default_pii, ) events = capture_events() @@ -559,6 +564,7 @@ async def test_streaming_create_message_async( assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] @pytest.mark.asyncio @@ -1471,7 +1477,7 @@ def test_collect_ai_data_with_input_json_delta(): content_blocks = [] - model, new_usage, new_content_blocks, response_id = _collect_ai_data( + model, new_usage, new_content_blocks, response_id, finish_reason = _collect_ai_data( event, model, usage, content_blocks ) assert model is None @@ -1479,6 +1485,7 @@ def test_collect_ai_data_with_input_json_delta(): assert new_usage.output_tokens == usage.output_tokens assert new_content_blocks == ["test"] assert response_id is None + assert finish_reason is None @pytest.mark.skipif( @@ -1766,6 +1773,7 @@ def test_nonstreaming_create_message_with_system_prompt( assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.asyncio @@ -1851,6 +1859,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async( assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.parametrize(