Skip to content

Commit 6abfe7c

Browse files
authored
docs(genai): Add Batch Embeddings Sample for Gen AI SDK (#13175)
1 parent 01a285c commit 6abfe7c

3 files changed

Lines changed: 76 additions & 45 deletions

File tree

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def generate_content(output_uri: str) -> str:
17+
# [START googlegenaisdk_batchpredict_embeddings_with_gcs]
18+
import time
19+
20+
from google import genai
21+
from google.genai.types import CreateBatchJobConfig, JobState, HttpOptions
22+
23+
client = genai.Client(http_options=HttpOptions(api_version="v1"))
24+
# TODO(developer): Update and un-comment below line
25+
# output_uri = "gs://your-bucket/your-prefix"
26+
27+
# See the documentation: https://googleapis.github.io/python-genai/genai.html#genai.batches.Batches.create
28+
job = client.batches.create(
29+
model="text-embedding-005",
30+
# Source link: https://storage.cloud.google.com/cloud-samples-data/generative-ai/embeddings/embeddings_input.jsonl
31+
src="gs://cloud-samples-data/generative-ai/embeddings/embeddings_input.jsonl",
32+
config=CreateBatchJobConfig(dest=output_uri),
33+
)
34+
print(f"Job name: {job.name}")
35+
print(f"Job state: {job.state}")
36+
# Example response:
37+
# Job name: projects/%PROJECT_ID%/locations/us-central1/batchPredictionJobs/9876453210000000000
38+
# Job state: JOB_STATE_PENDING
39+
40+
# See the documentation: https://googleapis.github.io/python-genai/genai.html#genai.types.BatchJob
41+
completed_states = {
42+
JobState.JOB_STATE_SUCCEEDED,
43+
JobState.JOB_STATE_FAILED,
44+
JobState.JOB_STATE_CANCELLED,
45+
JobState.JOB_STATE_PAUSED,
46+
}
47+
48+
while job.state not in completed_states:
49+
time.sleep(30)
50+
job = client.batches.get(name=job.name)
51+
print(f"Job state: {job.state}")
52+
if job.state == JobState.JOB_STATE_FAILED:
53+
print(f"Error: {job.error}")
54+
break
55+
56+
# Example response:
57+
# Job state: JOB_STATE_PENDING
58+
# Job state: JOB_STATE_RUNNING
59+
# Job state: JOB_STATE_RUNNING
60+
# ...
61+
# Job state: JOB_STATE_SUCCEEDED
62+
63+
# [END googlegenaisdk_batchpredict_embeddings_with_gcs]
64+
return job.state
65+
66+
67+
if __name__ == "__main__":
68+
generate_content(output_uri="gs://your-bucket/your-prefix")

genai/batch_prediction/test_batch_prediction_examples.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import pytest
2727

28+
import batchpredict_embeddings_with_gcs
2829
import batchpredict_with_bq
2930
import batchpredict_with_gcs
3031

@@ -61,6 +62,13 @@ def gcs_output_uri() -> str:
6162
blob.delete()
6263

6364

65+
def test_batch_prediction_embeddings_with_gcs(gcs_output_uri: str) -> None:
66+
response = batchpredict_embeddings_with_gcs.generate_content(
67+
output_uri=gcs_output_uri
68+
)
69+
assert response == JobState.JOB_STATE_SUCCEEDED
70+
71+
6472
def test_batch_prediction_with_bq(bq_output_uri: str) -> None:
6573
response = batchpredict_with_bq.generate_content(output_uri=bq_output_uri)
6674
assert response == JobState.JOB_STATE_SUCCEEDED

genai/embeddings/embed_content_text.py

Lines changed: 0 additions & 45 deletions
This file was deleted.

0 commit comments

Comments
 (0)