Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions sentry_sdk/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,12 @@ class SPANDATA:
Example: "Hello!"
"""

GEN_AI_FUNCTION_ID = "gen_ai.function_id"
"""
Framework-specific tracing label for the execution of a function or other unit of execution in a generative AI system.
Example: "my-awesome-function"
"""

GEN_AI_OPERATION_NAME = "gen_ai.operation.name"
"""
The name of the operation being performed.
Expand Down
7 changes: 7 additions & 0 deletions sentry_sdk/integrations/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,13 @@ def on_chat_model_start(
SPANDATA.GEN_AI_AGENT_NAME, agent_metadata["lc_agent_name"]
)

run_name = kwargs.get("name")
if run_name:
span.set_data(
SPANDATA.GEN_AI_FUNCTION_ID,
run_name,
Comment on lines +438 to +439
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The user-provided run_name is stored as gen_ai.function_id for chat models but as gen_ai.pipeline.name for LLMs, creating inconsistent tracing data.
Severity: MEDIUM

Suggested Fix

In on_chat_model_start, store the run_name under the same key used in on_llm_start, which is SPANDATA.GEN_AI_PIPELINE_NAME (gen_ai.pipeline.name), instead of SPANDATA.GEN_AI_FUNCTION_ID to ensure consistency.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: sentry_sdk/integrations/langchain.py#L438-L439

Potential issue: The new code in `on_chat_model_start` captures the `run_name` from
`kwargs.get("name")` and stores it in the span data under the key `gen_ai.function_id`.
However, the existing `on_llm_start` function stores the same `run_name` under the key
`gen_ai.pipeline.name`. This inconsistency means that users cannot query for all named
runs using a single key. Runs from chat models will be missed when querying by
`gen_ai.pipeline.name`, and LLM runs will be missed when querying by
`gen_ai.function_id`, fragmenting observability and making it difficult to aggregate
runs across different model types.

)
Comment on lines +435 to +440
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The run_name is stored under gen_ai.function_id for chat models but gen_ai.pipeline.name for LLM models, causing an inconsistency that will break an existing test.
Severity: MEDIUM

Suggested Fix

In the on_chat_model_start method, change the span data key from SPANDATA.GEN_AI_FUNCTION_ID to SPANDATA.GEN_AI_PIPELINE_NAME. This will align it with the on_llm_start method and ensure the run_name is stored consistently for all model types.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: sentry_sdk/integrations/langchain.py#L435-L440

Potential issue: The `on_chat_model_start` method stores the `run_name` parameter from
LangChain under the span data key `SPANDATA.GEN_AI_FUNCTION_ID`. This is inconsistent
with the existing `on_llm_start` method, which stores the same information under
`SPANDATA.GEN_AI_PIPELINE_NAME`. This discrepancy will cause the existing test
`test_langchain_chat_with_run_name` to fail because it asserts that the
`gen_ai.pipeline.name` key is present for chat model spans. This also breaks any
downstream tooling that relies on consistent key naming for the run name across
different model types.

Did we get this right? 👍 / 👎 to inform future reviews.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistent span attribute for run_name across callbacks

Medium Severity

The kwargs.get("name") value (originating from run_name in the user's config) is stored as GEN_AI_PIPELINE_NAME in on_llm_start but as GEN_AI_FUNCTION_ID in on_chat_model_start. The same user-provided run_name ends up under different span attributes depending on whether a text completion or chat model is used, making it impossible to query for run names consistently across both operation types.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit efc9460. Configure here.


for key, attribute in DATA_FIELDS.items():
if key in all_params and all_params[key] is not None:
set_data_normalized(span, attribute, all_params[key], unpack=False)
Expand Down
45 changes: 25 additions & 20 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1265,26 +1265,31 @@ def streaming_chat_completions_model_response():

@pytest.fixture
def nonstreaming_chat_completions_model_response():
return openai.types.chat.ChatCompletion(
id="chatcmpl-test",
choices=[
openai.types.chat.chat_completion.Choice(
index=0,
finish_reason="stop",
message=openai.types.chat.ChatCompletionMessage(
role="assistant", content="Test response"
),
)
],
created=1234567890,
model="gpt-3.5-turbo",
object="chat.completion",
usage=openai.types.CompletionUsage(
prompt_tokens=10,
completion_tokens=20,
total_tokens=30,
),
)
def inner(
response_id: str, response_model: str, message_content: str, created: int
):
return openai.types.chat.ChatCompletion(
id=response_id,
choices=[
openai.types.chat.chat_completion.Choice(
index=0,
finish_reason="stop",
message=openai.types.chat.ChatCompletionMessage(
role="assistant", content=message_content
),
)
],
created=created,
model=response_model,
object="chat.completion",
usage=openai.types.CompletionUsage(
prompt_tokens=10,
completion_tokens=20,
total_tokens=30,
),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Swapped token counts break existing test assertions

High Severity

The fixture uses prompt_tokens=10, completion_tokens=20, but the old EXAMPLE_CHAT_COMPLETION had completion_tokens=10, prompt_tokens=20. Multiple OpenAI tests assert output_tokens == 10 and input_tokens == 20, matching the old values. The swapped token counts cause these assertions to fail.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit a566ced. Configure here.

)

return inner


@pytest.fixture
Expand Down
58 changes: 58 additions & 0 deletions tests/integrations/langchain/test_langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
)

LANGCHAIN_VERSION = package_version("langchain")
LANGCHAIN_OPENAI_VERSION = package_version("langchain-openai")


@tool
Expand Down Expand Up @@ -170,6 +171,63 @@ def test_langchain_text_completion(
assert llm_span["data"]["gen_ai.usage.output_tokens"] == 15


def test_langchain_chat_with_run_name(
sentry_init,
capture_events,
get_model_response,
nonstreaming_chat_completions_model_response,
):
sentry_init(
integrations=[
LangchainIntegration(
include_prompts=True,
)
],
traces_sample_rate=1.0,
send_default_pii=True,
)
events = capture_events()

request_headers = {}
# Changed in https://github.com/langchain-ai/langchain/pull/32655
if LANGCHAIN_OPENAI_VERSION >= (0, 3, 32):
request_headers["X-Stainless-Raw-Response"] = "True"

model_response = get_model_response(
nonstreaming_chat_completions_model_response(
response_id="chat-id",
response_model="response-model-id",
message_content="the model response",
created=10000000,
),
serialize_pydantic=True,
request_headers=request_headers,
)

llm = ChatOpenAI(
model_name="gpt-3.5-turbo",
temperature=0,
openai_api_key="badkey",
)

with patch.object(
llm.client._client._client,
"send",
return_value=model_response,
) as _:
with start_transaction():
llm.invoke(
"How many letters in the word eudca",
config={"run_name": "my-snazzy-pipeline"},
)

tx = events[0]

chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")
assert len(chat_spans) == 1
assert chat_spans[0]["data"][SPANDATA.GEN_AI_FUNCTION_ID] == "my-snazzy-pipeline"


@pytest.mark.skipif(
LANGCHAIN_VERSION < (1,),
reason="LangChain 1.0+ required (ONE AGENT refactor)",
Expand Down
105 changes: 90 additions & 15 deletions tests/integrations/litellm/test_litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,12 @@ def test_nonstreaming_chat_completion(
client = OpenAI(api_key="test-key")

model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -242,7 +247,12 @@ async def test_async_nonstreaming_chat_completion(
client = AsyncOpenAI(api_key="test-key")

model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -904,7 +914,12 @@ def test_span_origin(
client = OpenAI(api_key="test-key")

model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -949,7 +964,12 @@ def test_multiple_providers(

openai_client = OpenAI(api_key="test-key")
openai_model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -1043,7 +1063,12 @@ async def test_async_multiple_providers(

openai_client = AsyncOpenAI(api_key="test-key")
openai_model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -1138,7 +1163,12 @@ def test_additional_parameters(
client = OpenAI(api_key="test-key")

model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -1196,7 +1226,12 @@ async def test_async_additional_parameters(
client = AsyncOpenAI(api_key="test-key")

model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -1254,7 +1289,12 @@ def test_no_integration(
client = OpenAI(api_key="test-key")

model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -1301,7 +1341,12 @@ async def test_async_no_integration(
client = AsyncOpenAI(api_key="test-key")

model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -1478,7 +1523,12 @@ def test_binary_content_encoding_image_url(
client = OpenAI(api_key="test-key")

model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -1556,7 +1606,12 @@ async def test_async_binary_content_encoding_image_url(
client = AsyncOpenAI(api_key="test-key")

model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -1636,7 +1691,12 @@ def test_binary_content_encoding_mixed_content(
client = OpenAI(api_key="test-key")

model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -1703,7 +1763,12 @@ async def test_async_binary_content_encoding_mixed_content(
client = AsyncOpenAI(api_key="test-key")

model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -1769,7 +1834,12 @@ def test_binary_content_encoding_uri_type(
client = OpenAI(api_key="test-key")

model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down Expand Up @@ -1841,7 +1911,12 @@ async def test_async_binary_content_encoding_uri_type(
client = AsyncOpenAI(api_key="test-key")

model_response = get_model_response(
nonstreaming_chat_completions_model_response,
nonstreaming_chat_completions_model_response(
response_id="chatcmpl-test",
response_model="gpt-3.5-turbo",
message_content="Test response",
created=1234567890,
),
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)
Expand Down
Loading
Loading