diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 73e5a6d9cb..b2b3f35e9c 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -486,6 +486,12 @@ class SPANDATA: Example: "Hello!" """ + GEN_AI_FUNCTION_ID = "gen_ai.function_id" + """ + Framework-specific tracing label for the execution of a function or other unit of execution in a generative AI system. + Example: "my-awesome-function" + """ + GEN_AI_OPERATION_NAME = "gen_ai.operation.name" """ The name of the operation being performed. diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 7a9d02f521..3ff228c61d 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -432,6 +432,13 @@ def on_chat_model_start( SPANDATA.GEN_AI_AGENT_NAME, agent_metadata["lc_agent_name"] ) + run_name = kwargs.get("name") + if run_name: + span.set_data( + SPANDATA.GEN_AI_FUNCTION_ID, + run_name, + ) + for key, attribute in DATA_FIELDS.items(): if key in all_params and all_params[key] is not None: set_data_normalized(span, attribute, all_params[key], unpack=False) diff --git a/tests/conftest.py b/tests/conftest.py index 4e4943ba85..ba28e4991c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1265,26 +1265,31 @@ def streaming_chat_completions_model_response(): @pytest.fixture def nonstreaming_chat_completions_model_response(): - return openai.types.chat.ChatCompletion( - id="chatcmpl-test", - choices=[ - openai.types.chat.chat_completion.Choice( - index=0, - finish_reason="stop", - message=openai.types.chat.ChatCompletionMessage( - role="assistant", content="Test response" - ), - ) - ], - created=1234567890, - model="gpt-3.5-turbo", - object="chat.completion", - usage=openai.types.CompletionUsage( - prompt_tokens=10, - completion_tokens=20, - total_tokens=30, - ), - ) + def inner( + response_id: str, + response_model: str, + message_content: str, + created: int, + usage: openai.types.CompletionUsage, + ): + return openai.types.chat.ChatCompletion( + id=response_id, + choices=[ + openai.types.chat.chat_completion.Choice( + index=0, + finish_reason="stop", + message=openai.types.chat.ChatCompletionMessage( + role="assistant", content=message_content + ), + ) + ], + created=created, + model=response_model, + object="chat.completion", + usage=usage, + ) + + return inner @pytest.fixture diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 9243fcda53..437bcb17a3 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -67,6 +67,7 @@ ) LANGCHAIN_VERSION = package_version("langchain") +LANGCHAIN_OPENAI_VERSION = package_version("langchain-openai") @tool @@ -170,6 +171,68 @@ def test_langchain_text_completion( assert llm_span["data"]["gen_ai.usage.output_tokens"] == 15 +def test_langchain_chat_with_run_name( + sentry_init, + capture_events, + get_model_response, + nonstreaming_chat_completions_model_response, +): + sentry_init( + integrations=[ + LangchainIntegration( + include_prompts=True, + ) + ], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + request_headers = {} + # Changed in https://github.com/langchain-ai/langchain/pull/32655 + if LANGCHAIN_OPENAI_VERSION >= (0, 3, 32): + request_headers["X-Stainless-Raw-Response"] = "True" + + model_response = get_model_response( + nonstreaming_chat_completions_model_response( + response_id="chat-id", + response_model="response-model-id", + message_content="the model response", + created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), + ), + serialize_pydantic=True, + request_headers=request_headers, + ) + + llm = ChatOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + ) + + with patch.object( + llm.client._client._client, + "send", + return_value=model_response, + ) as _: + with start_transaction(): + llm.invoke( + "How many letters in the word eudca", + config={"run_name": "my-snazzy-pipeline"}, + ) + + tx = events[0] + + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + assert len(chat_spans) == 1 + assert chat_spans[0]["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my-snazzy-pipeline" + + @pytest.mark.skipif( LANGCHAIN_VERSION < (1,), reason="LangChain 1.0+ required (ONE AGENT refactor)", diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index a8df5891ce..18f8cfaf6e 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -33,6 +33,7 @@ async def __call__(self, *args, **kwargs): from sentry_sdk.utils import package_version from openai import OpenAI, AsyncOpenAI +from openai.types import CompletionUsage from concurrent.futures import ThreadPoolExecutor @@ -160,7 +161,17 @@ def test_nonstreaming_chat_completion( client = OpenAI(api_key="test-key") model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -242,7 +253,17 @@ async def test_async_nonstreaming_chat_completion( client = AsyncOpenAI(api_key="test-key") model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -904,7 +925,17 @@ def test_span_origin( client = OpenAI(api_key="test-key") model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -949,7 +980,17 @@ def test_multiple_providers( openai_client = OpenAI(api_key="test-key") openai_model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -1043,7 +1084,17 @@ async def test_async_multiple_providers( openai_client = AsyncOpenAI(api_key="test-key") openai_model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -1138,7 +1189,17 @@ def test_additional_parameters( client = OpenAI(api_key="test-key") model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -1196,7 +1257,17 @@ async def test_async_additional_parameters( client = AsyncOpenAI(api_key="test-key") model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -1254,7 +1325,17 @@ def test_no_integration( client = OpenAI(api_key="test-key") model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -1301,7 +1382,17 @@ async def test_async_no_integration( client = AsyncOpenAI(api_key="test-key") model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -1478,7 +1569,17 @@ def test_binary_content_encoding_image_url( client = OpenAI(api_key="test-key") model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -1556,7 +1657,17 @@ async def test_async_binary_content_encoding_image_url( client = AsyncOpenAI(api_key="test-key") model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -1636,7 +1747,17 @@ def test_binary_content_encoding_mixed_content( client = OpenAI(api_key="test-key") model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -1703,7 +1824,17 @@ async def test_async_binary_content_encoding_mixed_content( client = AsyncOpenAI(api_key="test-key") model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -1769,7 +1900,17 @@ def test_binary_content_encoding_uri_type( client = OpenAI(api_key="test-key") model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) @@ -1841,7 +1982,17 @@ async def test_async_binary_content_encoding_uri_type( client = AsyncOpenAI(api_key="test-key") model_response = get_model_response( - nonstreaming_chat_completions_model_response, + nonstreaming_chat_completions_model_response( + response_id="chatcmpl-test", + response_model="gpt-3.5-turbo", + message_content="Test response", + created=1234567890, + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ), serialize_pydantic=True, request_headers={"X-Stainless-Raw-Response": "true"}, ) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index ada2e633de..20bbf2adf5 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -16,7 +16,7 @@ from openai import AsyncOpenAI, OpenAI, AsyncStream, Stream, OpenAIError from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding -from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk +from openai.types.chat import ChatCompletionMessage, ChatCompletionChunk from openai.types.chat.chat_completion import Choice from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage @@ -61,26 +61,6 @@ async def __call__(self, *args, **kwargs): OPENAI_VERSION = package_version("openai") -EXAMPLE_CHAT_COMPLETION = ChatCompletion( - id="chat-id", - choices=[ - Choice( - index=0, - finish_reason="stop", - message=ChatCompletionMessage( - role="assistant", content="the model response" - ), - ) - ], - created=10000000, - model="response-model-id", - object="chat.completion", - usage=CompletionUsage( - completion_tokens=10, - prompt_tokens=20, - total_tokens=30, - ), -) if SKIP_RESPONSES_TESTS: @@ -132,7 +112,11 @@ async def __call__(self, *args, **kwargs): ], ) def test_nonstreaming_chat_completion_no_prompts( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, + capture_events, + send_default_pii, + include_prompts, + nonstreaming_chat_completions_model_response, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], @@ -142,7 +126,19 @@ def test_nonstreaming_chat_completion_no_prompts( events = capture_events() client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.Mock( + return_value=nonstreaming_chat_completions_model_response( + response_id="chat-id", + response_model="gpt-3.5-turbo", + message_content="the model response", + created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), + ) + ) with start_transaction(name="openai tx"): response = ( @@ -229,7 +225,13 @@ def test_nonstreaming_chat_completion_no_prompts( ), ], ) -def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, request): +def test_nonstreaming_chat_completion( + sentry_init, + capture_events, + messages, + request, + nonstreaming_chat_completions_model_response, +): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -238,7 +240,19 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req events = capture_events() client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.Mock( + return_value=nonstreaming_chat_completions_model_response( + response_id="chat-id", + response_model="gpt-3.5-turbo", + message_content="the model response", + created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), + ) + ) with start_transaction(name="openai tx"): response = ( @@ -308,7 +322,11 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req ], ) async def test_nonstreaming_chat_completion_async_no_prompts( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, + capture_events, + send_default_pii, + include_prompts, + nonstreaming_chat_completions_model_response, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], @@ -318,7 +336,19 @@ async def test_nonstreaming_chat_completion_async_no_prompts( events = capture_events() client = AsyncOpenAI(api_key="z") - client.chat.completions._post = mock.AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.AsyncMock( + return_value=nonstreaming_chat_completions_model_response( + response_id="chat-id", + response_model="gpt-3.5-turbo", + message_content="the model response", + created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), + ) + ) with start_transaction(name="openai tx"): response = await client.chat.completions.create( @@ -404,7 +434,11 @@ async def test_nonstreaming_chat_completion_async_no_prompts( ], ) async def test_nonstreaming_chat_completion_async( - sentry_init, capture_events, messages, request + sentry_init, + capture_events, + messages, + request, + nonstreaming_chat_completions_model_response, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], @@ -414,7 +448,19 @@ async def test_nonstreaming_chat_completion_async( events = capture_events() client = AsyncOpenAI(api_key="z") - client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = AsyncMock( + return_value=nonstreaming_chat_completions_model_response( + response_id="chat-id", + response_model="gpt-3.5-turbo", + message_content="the model response", + created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), + ) + ) with start_transaction(name="openai tx"): response = await client.chat.completions.create( @@ -1850,7 +1896,9 @@ async def test_embeddings_create_raises_error_async( assert event["level"] == "error" -def test_span_origin_nonstreaming_chat(sentry_init, capture_events): +def test_span_origin_nonstreaming_chat( + sentry_init, capture_events, nonstreaming_chat_completions_model_response +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, @@ -1858,7 +1906,19 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_events): events = capture_events() client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.Mock( + return_value=nonstreaming_chat_completions_model_response( + response_id="chat-id", + response_model="gpt-3.5-turbo", + message_content="the model response", + created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), + ) + ) with start_transaction(name="openai tx"): client.chat.completions.create( @@ -1872,7 +1932,9 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_events): @pytest.mark.asyncio -async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events): +async def test_span_origin_nonstreaming_chat_async( + sentry_init, capture_events, nonstreaming_chat_completions_model_response +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, @@ -1880,7 +1942,19 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events): events = capture_events() client = AsyncOpenAI(api_key="z") - client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = AsyncMock( + return_value=nonstreaming_chat_completions_model_response( + response_id="chat-id", + response_model="gpt-3.5-turbo", + message_content="the model response", + created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), + ) + ) with start_transaction(name="openai tx"): await client.chat.completions.create( @@ -3630,7 +3704,9 @@ async def test_streaming_responses_api_async( "tools", [[], None, NOT_GIVEN, omit], ) -def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): +def test_empty_tools_in_chat_completion( + sentry_init, capture_events, tools, nonstreaming_chat_completions_model_response +): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, @@ -3638,7 +3714,19 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): events = capture_events() client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.Mock( + return_value=nonstreaming_chat_completions_model_response( + response_id="chat-id", + response_model="gpt-3.5-turbo", + message_content="the model response", + created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), + ) + ) with start_transaction(name="openai tx"): client.chat.completions.create( @@ -3669,7 +3757,11 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): ], ) def test_openai_message_role_mapping( - sentry_init, capture_events, test_message, expected_role + sentry_init, + capture_events, + test_message, + expected_role, + nonstreaming_chat_completions_model_response, ): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" @@ -3681,7 +3773,19 @@ def test_openai_message_role_mapping( events = capture_events() client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.Mock( + return_value=nonstreaming_chat_completions_model_response( + response_id="chat-id", + response_model="gpt-3.5-turbo", + message_content="the model response", + created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), + ) + ) test_messages = [test_message] @@ -3702,7 +3806,9 @@ def test_openai_message_role_mapping( assert stored_messages[0]["role"] == expected_role -def test_openai_message_truncation(sentry_init, capture_events): +def test_openai_message_truncation( + sentry_init, capture_events, nonstreaming_chat_completions_model_response +): """Test that large messages are truncated properly in OpenAI integration.""" sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], @@ -3712,7 +3818,19 @@ def test_openai_message_truncation(sentry_init, capture_events): events = capture_events() client = OpenAI(api_key="z") - client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + client.chat.completions._post = mock.Mock( + return_value=nonstreaming_chat_completions_model_response( + response_id="chat-id", + response_model="gpt-3.5-turbo", + message_content="the model response", + created=10000000, + usage=CompletionUsage( + prompt_tokens=20, + completion_tokens=10, + total_tokens=30, + ), + ) + ) large_content = ( "This is a very long message that will exceed our size limits. " * 1000