Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
a7758b5
fix(litellm): Avoid double span exits when streaming
alexander-alderman-webb Apr 1, 2026
3f761e9
simplify
alexander-alderman-webb Apr 1, 2026
1f94674
test cleanup
alexander-alderman-webb Apr 1, 2026
c31472c
docs
alexander-alderman-webb Apr 1, 2026
50d0b1f
use underscore
alexander-alderman-webb Apr 2, 2026
edd8a90
test(litellm): Replace mocks with httpx types in nonstreaming tests
alexander-alderman-webb Apr 2, 2026
ec3d128
add fixture
alexander-alderman-webb Apr 2, 2026
a4b9b3a
more mocks
alexander-alderman-webb Apr 7, 2026
9ae99be
update tox
alexander-alderman-webb Apr 7, 2026
c5b92c8
test(litellm): Remove mocks with httpx types in embedding tests
alexander-alderman-webb Apr 10, 2026
ad16c7f
test(litellm): Replace mocks with httpx types in rate-limit test
alexander-alderman-webb Apr 10, 2026
6f3c247
cleanup
alexander-alderman-webb Apr 10, 2026
dda374f
Merge branch 'webb/litellm/remove-mocks' into webb/litellm/embedding-…
alexander-alderman-webb Apr 10, 2026
b622a07
Merge branch 'webb/litellm/embedding-tests' into webb/litellm/error-mock
alexander-alderman-webb Apr 10, 2026
598d6b5
undo merge
alexander-alderman-webb Apr 10, 2026
a8689cd
remove fixture
alexander-alderman-webb Apr 10, 2026
ecd3718
Merge branch 'master' into webb/litellm/close-spans
alexander-alderman-webb Apr 10, 2026
0536025
re-run tox
alexander-alderman-webb Apr 10, 2026
62c32cb
Merge branch 'master' into webb/litellm/close-spans
alexander-alderman-webb Apr 10, 2026
f352bba
Merge branch 'webb/litellm/close-spans' into webb/litellm/remove-mocks
alexander-alderman-webb Apr 10, 2026
d9cf8b0
Merge branch 'webb/litellm/remove-mocks' into webb/litellm/embedding-…
alexander-alderman-webb Apr 10, 2026
a2b3585
make request headers consistent
alexander-alderman-webb Apr 10, 2026
6eb17c9
reset all executor references
alexander-alderman-webb Apr 10, 2026
1b28574
merge
alexander-alderman-webb Apr 10, 2026
ce5ce74
delete span when finished
alexander-alderman-webb Apr 13, 2026
8435f36
safe exit pattern
alexander-alderman-webb Apr 13, 2026
392eb17
Merge branch 'webb/litellm/close-spans' into webb/litellm/remove-mocks
alexander-alderman-webb Apr 13, 2026
6d52689
Merge branch 'webb/litellm/remove-mocks' into webb/litellm/embedding-…
alexander-alderman-webb Apr 13, 2026
5dc9bbe
update tox.ini again
alexander-alderman-webb Apr 13, 2026
0882066
Merge branch 'master' into webb/litellm/close-spans
alexander-alderman-webb Apr 13, 2026
ee4d55c
tox run
alexander-alderman-webb Apr 13, 2026
d9b9700
Merge branch 'webb/litellm/remove-mocks' into webb/litellm/embedding-…
alexander-alderman-webb Apr 13, 2026
a56dbcb
merge master
alexander-alderman-webb Apr 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1265,6 +1265,25 @@ def nonstreaming_chat_completions_model_response():
)


@pytest.fixture
def openai_embedding_model_response():
return openai.types.CreateEmbeddingResponse(
data=[
openai.types.Embedding(
embedding=[0.1, 0.2, 0.3],
index=0,
object="embedding",
)
],
model="text-embedding-ada-002",
object="list",
usage=openai.types.create_embedding_response.Usage(
prompt_tokens=5,
total_tokens=5,
),
)


@pytest.fixture
def nonstreaming_responses_model_response():
return openai.types.responses.Response(
Expand Down
141 changes: 76 additions & 65 deletions tests/integrations/litellm/test_litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,38 +128,6 @@ def __init__(
self.created = 1234567890


class MockEmbeddingData:
def __init__(self, embedding=None):
self.embedding = embedding or [0.1, 0.2, 0.3]
self.index = 0
self.object = "embedding"


class MockEmbeddingResponse:
def __init__(self, model="text-embedding-ada-002", data=None, usage=None):
self.model = model
self.data = data or [MockEmbeddingData()]
self.usage = usage or MockUsage(
prompt_tokens=5, completion_tokens=0, total_tokens=5
)
self.object = "list"

def model_dump(self):
return {
"model": self.model,
"data": [
{"embedding": d.embedding, "index": d.index, "object": d.object}
for d in self.data
],
"usage": {
"prompt_tokens": self.usage.prompt_tokens,
"completion_tokens": self.usage.completion_tokens,
"total_tokens": self.usage.total_tokens,
},
"object": self.object,
}


@pytest.mark.parametrize(
"send_default_pii, include_prompts",
[
Expand Down Expand Up @@ -313,7 +281,13 @@ def test_streaming_chat_completion(
assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True


def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
def test_embeddings_create(
sentry_init,
capture_events,
get_model_response,
openai_embedding_model_response,
clear_litellm_cache,
):
"""
Test that litellm.embedding() calls are properly instrumented.

Expand All @@ -327,20 +301,24 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
)
events = capture_events()

mock_response = MockEmbeddingResponse()
client = OpenAI(api_key="test-key")

# Mock within the test to ensure proper ordering with cache clearing
with mock.patch(
"litellm.openai_chat_completions.make_sync_openai_embedding_request"
) as mock_http:
# The function returns (headers, response)
mock_http.return_value = ({}, mock_response)
model_response = get_model_response(
openai_embedding_model_response,
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)

with mock.patch.object(
client.embeddings._client._client,
"send",
return_value=model_response,
):
with start_transaction(name="litellm test"):
response = litellm.embedding(
model="text-embedding-ada-002",
input="Hello, world!",
api_key="test-key", # Provide a fake API key to avoid authentication errors
client=client,
)
# Allow time for callbacks to complete (they may run in separate threads)
time.sleep(0.1)
Expand All @@ -351,8 +329,13 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
(event,) = events

assert event["type"] == "transaction"
assert len(event["spans"]) == 1
(span,) = event["spans"]
spans = list(
x
for x in event["spans"]
if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
)
assert len(spans) == 1
span = spans[0]

assert span["op"] == OP.GEN_AI_EMBEDDINGS
assert span["description"] == "embeddings text-embedding-ada-002"
Expand All @@ -365,7 +348,11 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):


def test_embeddings_create_with_list_input(
sentry_init, capture_events, clear_litellm_cache
sentry_init,
capture_events,
get_model_response,
openai_embedding_model_response,
clear_litellm_cache,
):
"""Test embedding with list input."""
sentry_init(
Expand All @@ -375,20 +362,24 @@ def test_embeddings_create_with_list_input(
)
events = capture_events()

mock_response = MockEmbeddingResponse()
client = OpenAI(api_key="test-key")

# Mock within the test to ensure proper ordering with cache clearing
with mock.patch(
"litellm.openai_chat_completions.make_sync_openai_embedding_request"
) as mock_http:
# The function returns (headers, response)
mock_http.return_value = ({}, mock_response)
model_response = get_model_response(
openai_embedding_model_response,
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)

with mock.patch.object(
client.embeddings._client._client,
"send",
return_value=model_response,
):
with start_transaction(name="litellm test"):
response = litellm.embedding(
model="text-embedding-ada-002",
input=["First text", "Second text", "Third text"],
api_key="test-key", # Provide a fake API key to avoid authentication errors
client=client,
)
# Allow time for callbacks to complete (they may run in separate threads)
time.sleep(0.1)
Expand All @@ -399,8 +390,13 @@ def test_embeddings_create_with_list_input(
(event,) = events

assert event["type"] == "transaction"
assert len(event["spans"]) == 1
(span,) = event["spans"]
spans = list(
x
for x in event["spans"]
if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
)
assert len(spans) == 1
span = spans[0]

assert span["op"] == OP.GEN_AI_EMBEDDINGS
assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
Expand All @@ -413,7 +409,13 @@ def test_embeddings_create_with_list_input(
]


def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache):
def test_embeddings_no_pii(
sentry_init,
capture_events,
get_model_response,
openai_embedding_model_response,
clear_litellm_cache,
):
"""Test that PII is not captured when disabled."""
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=True)],
Expand All @@ -422,20 +424,24 @@ def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache):
)
events = capture_events()

mock_response = MockEmbeddingResponse()
client = OpenAI(api_key="test-key")

# Mock within the test to ensure proper ordering with cache clearing
with mock.patch(
"litellm.openai_chat_completions.make_sync_openai_embedding_request"
) as mock_http:
# The function returns (headers, response)
mock_http.return_value = ({}, mock_response)
model_response = get_model_response(
openai_embedding_model_response,
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "true"},
)

with mock.patch.object(
client.embeddings._client._client,
"send",
return_value=model_response,
):
with start_transaction(name="litellm test"):
response = litellm.embedding(
model="text-embedding-ada-002",
input="Hello, world!",
api_key="test-key", # Provide a fake API key to avoid authentication errors
client=client,
)
# Allow time for callbacks to complete (they may run in separate threads)
time.sleep(0.1)
Expand All @@ -446,8 +452,13 @@ def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache):
(event,) = events

assert event["type"] == "transaction"
assert len(event["spans"]) == 1
(span,) = event["spans"]
spans = list(
x
for x in event["spans"]
if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
)
assert len(spans) == 1
span = spans[0]

assert span["op"] == OP.GEN_AI_EMBEDDINGS
# Check that embeddings input is NOT captured when PII is disabled
Expand Down
Loading