.

alexander-alderman-webb · alexander-alderman-webb · commit 53859e4762f4 · 2026-03-10T14:38:18.000+01:00
diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py
@@ -1,5 +1,6 @@
 import sys
 import json
+import inspect
 from collections.abc import Iterable
 from functools import wraps
 from typing import TYPE_CHECKING
@@ -37,6 +38,7 @@
     except ImportError:
         Omit = None
 
+    from anthropic import Stream, AsyncStream
     from anthropic.resources import AsyncMessages, Messages
 
     if TYPE_CHECKING:
@@ -45,11 +47,10 @@
     raise DidNotEnable("Anthropic not installed")
 
 if TYPE_CHECKING:
-    from typing import Any, AsyncIterator, Iterator, List, Optional, Union
+    from typing import Any, AsyncIterator, Iterator, List, Optional, Union, Callable
     from sentry_sdk.tracing import Span
     from sentry_sdk._types import TextPart
 
-    from anthropic import AsyncStream
     from anthropic.types import RawMessageStreamEvent
 
 
@@ -75,6 +76,117 @@ def setup_once() -> None:
         Messages.create = _wrap_message_create(Messages.create)
         AsyncMessages.create = _wrap_message_create_async(AsyncMessages.create)
 
+        Stream.__iter__ = _wrap_stream_iter(Stream.__iter__)
+        AsyncStream.__aiter__ = _wrap_async_stream_aiter(AsyncStream.__aiter__)
+
+
+def _wrap_stream_iter(
+    f: "Callable[..., Iterator[RawMessageStreamEvent]]",
+) -> "Callable[..., Iterator[RawMessageStreamEvent]]":
+    @wraps(f)
+    def _patched_iter(self: "Stream") -> "Iterator[RawMessageStreamEvent]":
+        if not hasattr(self, "_sentry_span"):
+            for event in f(self):
+                yield event
+
+        model = None
+        usage = _RecordedUsage()
+        content_blocks: "list[str]" = []
+
+        for event in f(self):
+            (
+                model,
+                usage,
+                content_blocks,
+            ) = _collect_ai_data(
+                event,
+                model,
+                usage,
+                content_blocks,
+            )
+            yield event
+
+        # Anthropic's input_tokens excludes cached/cache_write tokens.
+        # Normalize to total input tokens for correct cost calculations.
+        total_input = (
+            usage.input_tokens
+            + (usage.cache_read_input_tokens or 0)
+            + (usage.cache_write_input_tokens or 0)
+        )
+
+        span = self._sentry_span
+        integration = self._integration
+
+        _set_output_data(
+            span=span,
+            integration=integration,
+            model=model,
+            input_tokens=total_input,
+            output_tokens=usage.output_tokens,
+            cache_read_input_tokens=usage.cache_read_input_tokens,
+            cache_write_input_tokens=usage.cache_write_input_tokens,
+            content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
+            finish_span=True,
+        )
+
+        return f(self)
+
+    return _patched_iter
+
+
+def _wrap_async_stream_aiter(
+    f: "Callable[..., AsyncIterator[RawMessageStreamEvent]]",
+) -> "Callable[..., AsyncIterator[RawMessageStreamEvent]]":
+    @wraps(f)
+    async def _patched_aiter(
+        self: "AsyncStream",
+    ) -> "AsyncIterator[RawMessageStreamEvent]":
+        if not hasattr(self, "_sentry_span"):
+            async for event in f(self):
+                yield event
+
+        model = None
+        usage = _RecordedUsage()
+        content_blocks: "list[str]" = []
+
+        async for event in f(self):
+            (
+                model,
+                usage,
+                content_blocks,
+            ) = _collect_ai_data(
+                event,
+                model,
+                usage,
+                content_blocks,
+            )
+            yield event
+
+        # Anthropic's input_tokens excludes cached/cache_write tokens.
+        # Normalize to total input tokens for correct cost calculations.
+        total_input = (
+            usage.input_tokens
+            + (usage.cache_read_input_tokens or 0)
+            + (usage.cache_write_input_tokens or 0)
+        )
+
+        span = self._sentry_span
+        integration = self._integration
+
+        _set_output_data(
+            span=span,
+            integration=integration,
+            model=model,
+            input_tokens=total_input,
+            output_tokens=usage.output_tokens,
+            cache_read_input_tokens=usage.cache_read_input_tokens,
+            cache_write_input_tokens=usage.cache_write_input_tokens,
+            content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
+            finish_span=True,
+        )
+
+    return _patched_aiter
+
 
 def _capture_exception(exc: "Any") -> None:
     set_span_errored()
@@ -392,98 +504,6 @@ def _set_output_data(
         span.__exit__(None, None, None)
 
 
-def _patch_streaming_response_iterator(
-    result: "AsyncStream[RawMessageStreamEvent]",
-    span: "sentry_sdk.tracing.Span",
-    integration: "AnthropicIntegration",
-) -> None:
-    """
-    Responsible for closing the `gen_ai.chat` span and setting attributes acquired during response consumption.
-    """
-    old_iterator = result._iterator
-
-    def new_iterator() -> "Iterator[MessageStreamEvent]":
-        model = None
-        usage = _RecordedUsage()
-        content_blocks: "list[str]" = []
-
-        for event in old_iterator:
-            (
-                model,
-                usage,
-                content_blocks,
-            ) = _collect_ai_data(
-                event,
-                model,
-                usage,
-                content_blocks,
-            )
-            yield event
-
-        # Anthropic's input_tokens excludes cached/cache_write tokens.
-        # Normalize to total input tokens for correct cost calculations.
-        total_input = (
-            usage.input_tokens
-            + (usage.cache_read_input_tokens or 0)
-            + (usage.cache_write_input_tokens or 0)
-        )
-
-        _set_output_data(
-            span=span,
-            integration=integration,
-            model=model,
-            input_tokens=total_input,
-            output_tokens=usage.output_tokens,
-            cache_read_input_tokens=usage.cache_read_input_tokens,
-            cache_write_input_tokens=usage.cache_write_input_tokens,
-            content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
-            finish_span=True,
-        )
-
-    async def new_iterator_async() -> "AsyncIterator[MessageStreamEvent]":
-        model = None
-        usage = _RecordedUsage()
-        content_blocks: "list[str]" = []
-
-        async for event in old_iterator:
-            (
-                model,
-                usage,
-                content_blocks,
-            ) = _collect_ai_data(
-                event,
-                model,
-                usage,
-                content_blocks,
-            )
-            yield event
-
-        # Anthropic's input_tokens excludes cached/cache_write tokens.
-        # Normalize to total input tokens for correct cost calculations.
-        total_input = (
-            usage.input_tokens
-            + (usage.cache_read_input_tokens or 0)
-            + (usage.cache_write_input_tokens or 0)
-        )
-
-        _set_output_data(
-            span=span,
-            integration=integration,
-            model=model,
-            input_tokens=total_input,
-            output_tokens=usage.output_tokens,
-            cache_read_input_tokens=usage.cache_read_input_tokens,
-            cache_write_input_tokens=usage.cache_write_input_tokens,
-            content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
-            finish_span=True,
-        )
-
-    if str(type(result._iterator)) == "<class 'async_generator'>":
-        result._iterator = new_iterator_async()
-    else:
-        result._iterator = new_iterator()
-
-
 def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "Any":
     integration = kwargs.pop("integration")
     if integration is None:
@@ -510,9 +530,9 @@ def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A
 
     result = yield f, args, kwargs
 
-    is_streaming_response = kwargs.get("stream", False)
-    if is_streaming_response:
-        _patch_streaming_response_iterator(result, span, integration)
+    if isinstance(result, Stream) or isinstance(result, AsyncStream):
+        result._sentry_span = span
+        result._integration = integration
         return result
 
     with capture_internal_exceptions():