feat(genai): add python samples for text generation (2nd batch) (#13116)

Valeriy-Burlaka · web-flow · commit 0e0e64034ddf · 2025-01-31T11:18:08.000+01:00
* feat(genai): add python samples for text generation (2nd batch)

* remove auxilliary file

* move some samples into 1st batch

* feat: add 2 more textgen samples to the batch

* update the sdk

* test with local images

* linter/ordering

* update the usage of  for the new SDK
diff --git a/genai/text_generation/test_data/latte.jpg b/genai/text_generation/test_data/latte.jpg
diff --git a/genai/text_generation/test_data/scones.jpg b/genai/text_generation/test_data/scones.jpg
diff --git a/genai/text_generation/test_text_generation.py b/genai/text_generation/test_text_generation.py
@@ -18,9 +18,15 @@
 import textgen_chat_with_txt_stream
 import textgen_config_with_txt
 import textgen_sys_instr_with_txt
+import textgen_transcript_with_gcs_audio
+import textgen_with_gcs_audio
+import textgen_with_multi_img
+import textgen_with_multi_local_img
+import textgen_with_mute_video
 import textgen_with_txt
 import textgen_with_txt_img
 import textgen_with_txt_stream
+import textgen_with_video
 
 
 os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True"
@@ -29,18 +35,13 @@
 # os.environ['GOOGLE_CLOUD_PROJECT'] = "add-your-project-name"
 
 
-def test_textgen_with_txt() -> None:
-    response = textgen_with_txt.generate_content()
-    assert response
-
-
-def test_textgen_with_txt_img() -> None:
-    response = textgen_with_txt_img.generate_content()
+def test_textgen_with_txt_stream() -> None:
+    response = textgen_with_txt_stream.generate_content()
     assert response
 
 
-def test_textgen_with_txt_stream() -> None:
-    response = textgen_with_txt_stream.generate_content()
+def test_textgen_with_txt() -> None:
+    response = textgen_with_txt.generate_content()
     assert response
 
 
@@ -62,3 +63,41 @@ def test_textgen_config_with_txt() -> None:
 def test_textgen_sys_instr_with_txt() -> None:
     response = textgen_sys_instr_with_txt.generate_content()
     assert response
+
+
+def test_textgen_with_txt_img() -> None:
+    response = textgen_with_txt_img.generate_content()
+    assert response
+
+
+def test_textgen_with_multi_img() -> None:
+    response = textgen_with_multi_img.generate_content()
+    assert response
+
+
+def test_textgen_with_multi_local_img() -> None:
+    response = textgen_with_multi_local_img.generate_content(
+        "./test_data/latte.jpg",
+        "./test_data/scones.jpg",
+    )
+    assert response
+
+
+def test_textgen_with_mute_video() -> None:
+    response = textgen_with_mute_video.generate_content()
+    assert response
+
+
+def test_textgen_with_gcs_audio() -> None:
+    response = textgen_with_gcs_audio.generate_content()
+    assert response
+
+
+def test_textgen_transcript_with_gcs_audio() -> None:
+    response = textgen_transcript_with_gcs_audio.generate_content()
+    assert response
+
+
+def test_textgen_with_video() -> None:
+    response = textgen_with_video.generate_content()
+    assert response
diff --git a/genai/text_generation/textgen_transcript_with_gcs_audio.py b/genai/text_generation/textgen_transcript_with_gcs_audio.py
@@ -0,0 +1,49 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def generate_content() -> str:
+    # [START googlegenaisdk_textgen_transcript_with_gcs_audio]
+    from google import genai
+    from google.genai.types import Part
+
+    client = genai.Client()
+
+    prompt = """
+    Transcribe the interview, in the format of timecode, speaker, caption.
+    Use speaker A, speaker B, etc. to identify speakers.
+    """
+    response = client.models.generate_content(
+        model="gemini-2.0-flash-001",
+        contents=[
+            prompt,
+            Part.from_uri(
+                file_uri="gs://cloud-samples-data/generative-ai/audio/pixel.mp3",
+                mime_type="audio/mpeg"
+            )
+        ]
+    )
+    print(response.text)
+    # Example response:
+    # [00:00:00] **Speaker A:** your devices are getting better over time. And so ...
+    # [00:00:14] **Speaker B:** Welcome to the Made by Google podcast where we meet ...
+    # [00:00:20] **Speaker B:** Here's your host, Rasheed Finch.
+    # [00:00:23] **Speaker C:** Today we're talking to Aisha Sharif and DeCarlos Love. ...
+    # ...
+    # [END googlegenaisdk_textgen_transcript_with_gcs_audio]
+    return response.text
+
+
+if __name__ == "__main__":
+    generate_content()
diff --git a/genai/text_generation/textgen_with_gcs_audio.py b/genai/text_generation/textgen_with_gcs_audio.py
@@ -0,0 +1,52 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def generate_content() -> str:
+    # [START googlegenaisdk_textgen_with_gcs_audio]
+    from google import genai
+    from google.genai.types import Part
+
+    client = genai.Client()
+
+    prompt = """
+    Provide the summary of the audio file.
+    Summarize the main points of the audio concisely.
+    Create a chapter breakdown with timestamps for key sections or topics discussed.
+    """
+    response = client.models.generate_content(
+        model="gemini-2.0-flash-001",
+        contents=[
+            prompt,
+            Part.from_uri(
+                file_uri="gs://cloud-samples-data/generative-ai/audio/pixel.mp3",
+                mime_type="audio/mpeg"
+            )
+        ]
+    )
+    print(response.text)
+    # Example response:
+    # This episode of the Made by Google podcast features product managers ...
+    #
+    # **Chapter Breakdown:**
+    #
+    # * **[0:00-1:14] Introduction:** Host Rasheed Finch introduces Aisha and DeCarlos and ...
+    # * **[1:15-2:44] Transformative Features:** Aisha and DeCarlos discuss their ...
+    # ...
+    # [END googlegenaisdk_textgen_with_gcs_audio]
+    return response.text
+
+
+if __name__ == "__main__":
+    generate_content()
diff --git a/genai/text_generation/textgen_with_multi_img.py b/genai/text_generation/textgen_with_multi_img.py
@@ -0,0 +1,45 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def generate_content() -> str:
+    # [START googlegenaisdk_textgen_with_multi_img]
+    from google import genai
+    from google.genai.types import Part
+
+    client = genai.Client()
+    response = client.models.generate_content(
+        model="gemini-2.0-flash-001",
+        contents=[
+            "Generate a list of all the objects contained in both images.",
+            Part.from_uri(
+                file_uri="gs://cloud-samples-data/generative-ai/image/scones.jpg",
+                mime_type="image/jpeg"
+            ),
+            Part.from_uri(
+                file_uri="gs://cloud-samples-data/generative-ai/image/latte.jpg",
+                mime_type="image/jpeg"
+            )
+        ]
+    )
+    print(response.text)
+    # Example response:
+    # Okay, here's the list of objects present in both images:
+    # ...
+    # [END googlegenaisdk_textgen_with_multi_img]
+    return response.text
+
+
+if __name__ == "__main__":
+    generate_content()
diff --git a/genai/text_generation/textgen_with_multi_local_img.py b/genai/text_generation/textgen_with_multi_local_img.py
@@ -0,0 +1,57 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def generate_content(image_path_1: str, image_path_2: str) -> str:
+    # [START googlegenaisdk_textgen_with_multi_local_img]
+    from google import genai
+    from google.genai.types import Part
+
+    client = genai.Client()
+
+    # TODO(Developer): Update the below file paths to your images
+    # image_path_1 = "path/to/your/image1.jpg"
+    # image_path_2 = "path/to/your/image2.jpg"
+    with open(image_path_1, "rb") as f:
+        image_1_bytes = f.read()
+    with open(image_path_2, "rb") as f:
+        image_2_bytes = f.read()
+
+    response = client.models.generate_content(
+        model="gemini-2.0-flash-001",
+        contents=[
+            "Write an advertising jingle based on the items in both images.",
+            Part.from_bytes(
+                data=image_1_bytes,
+                mime_type="image/jpeg"
+            ),
+            Part.from_bytes(
+                data=image_2_bytes,
+                mime_type="image/jpeg"
+            )
+        ]
+    )
+    print(response.text)
+    # Example response:
+    # Okay, here's a jingle combining the elements of both sets of images, focusing on ...
+    # ...
+    # [END googlegenaisdk_textgen_with_multi_local_img]
+    return response.text
+
+
+if __name__ == "__main__":
+    generate_content(
+        "./test_data/latte.jpg",
+        "./test_data/scones.jpg",
+    )
diff --git a/genai/text_generation/textgen_with_mute_video.py b/genai/text_generation/textgen_with_mute_video.py
@@ -0,0 +1,41 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def generate_content() -> str:
+    # [START googlegenaisdk_textgen_with_mute_video]
+    from google import genai
+    from google.genai.types import Part
+
+    client = genai.Client()
+
+    response = client.models.generate_content(
+        model="gemini-2.0-flash-001",
+        contents=[
+            "What is in the video?",
+            Part.from_uri(
+                file_uri="gs://cloud-samples-data/generative-ai/video/ad_copy_from_video.mp4",
+                mime_type="video/mp4"
+            )
+        ]
+    )
+    print(response.text)
+    # Example response:
+    # The video shows several people surfing in an ocean with a coastline in the background. The camera ...
+    # [END googlegenaisdk_textgen_with_mute_video]
+    return response.text
+
+
+if __name__ == "__main__":
+    generate_content()
diff --git a/genai/text_generation/textgen_with_video.py b/genai/text_generation/textgen_with_video.py
@@ -0,0 +1,56 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def generate_content() -> str:
+    # [START googlegenaisdk_textgen_with_video]
+    from google import genai
+    from google.genai.types import Part
+
+    client = genai.Client()
+
+    prompt = """
+    Analyze the provided video file, including its audio.
+    Summarize the main points of the video concisely.
+    Create a chapter breakdown with timestamps for key sections or topics discussed.
+    """
+    response = client.models.generate_content(
+        model="gemini-2.0-flash-001",
+        contents=[
+            prompt,
+            Part.from_uri(
+                file_uri="gs://cloud-samples-data/generative-ai/video/pixel8.mp4",
+                mime_type="video/mp4"
+            )
+        ]
+    )
+
+    print(response.text)
+    # Example response:
+    # Here's a breakdown of the video:
+    #
+    # **Summary:**
+    #
+    # Saeka Shimada, a photographer in Tokyo, uses the Google Pixel 8 Pro's "Video Boost" feature to ...
+    #
+    # **Chapter Breakdown with Timestamps:**
+    #
+    # * **[00:00-00:12] Introduction & Tokyo at Night:** Saeka Shimada introduces herself ...
+    # ...
+    # [END googlegenaisdk_textgen_with_video]
+    return response.text
+
+
+if __name__ == "__main__":
+    generate_content()