Skip to content

Commit 61db94c

Browse files
authored
fix(genai): Update Bounding Box Sample to use consistent order of bounding box coordinates (#13441)
1 parent 613f4ad commit 61db94c

File tree

1 file changed

+29
-21
lines changed

1 file changed

+29
-21
lines changed

genai/bounding_box/boundingbox_with_txt_img.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,16 @@
1616
def generate_content() -> str:
1717
# [START googlegenaisdk_boundingbox_with_txt_img]
1818
import requests
19-
2019
from google import genai
21-
from google.genai.types import GenerateContentConfig, HttpOptions, Part, SafetySetting
22-
20+
from google.genai.types import (
21+
GenerateContentConfig,
22+
HarmBlockThreshold,
23+
HarmCategory,
24+
HttpOptions,
25+
Part,
26+
SafetySetting,
27+
)
2328
from PIL import Image, ImageColor, ImageDraw
24-
2529
from pydantic import BaseModel
2630

2731
# Helper class to represent a bounding box
@@ -31,7 +35,7 @@ class BoundingBox(BaseModel):
3135
3236
Attributes:
3337
box_2d (list[int]): A list of integers representing the 2D coordinates of the bounding box,
34-
typically in the format [x_min, y_min, x_max, y_max].
38+
typically in the format [y_min, x_min, y_max, x_max].
3539
label (str): A string representing the label or class associated with the object within the bounding box.
3640
"""
3741

@@ -41,12 +45,12 @@ class BoundingBox(BaseModel):
4145
# Helper function to plot bounding boxes on an image
4246
def plot_bounding_boxes(image_uri: str, bounding_boxes: list[BoundingBox]) -> None:
4347
"""
44-
Plots bounding boxes on an image with markers for each a name, using PIL, normalized coordinates, and different colors.
48+
Plots bounding boxes on an image with labels, using PIL and normalized coordinates.
4549
4650
Args:
47-
img_path: The path to the image file.
48-
bounding_boxes: A list of bounding boxes containing the name of the object
49-
and their positions in normalized [y1 x1 y2 x2] format.
51+
image_uri: The URI of the image file.
52+
bounding_boxes: A list of BoundingBox objects. Each box's coordinates are in
53+
normalized [y_min, x_min, y_max, x_max] format.
5054
"""
5155
with Image.open(requests.get(image_uri, stream=True, timeout=10).raw) as im:
5256
width, height = im.size
@@ -55,19 +59,23 @@ def plot_bounding_boxes(image_uri: str, bounding_boxes: list[BoundingBox]) -> No
5559
colors = list(ImageColor.colormap.keys())
5660

5761
for i, bbox in enumerate(bounding_boxes):
58-
y1, x1, y2, x2 = bbox.box_2d
59-
abs_y1 = int(y1 / 1000 * height)
60-
abs_x1 = int(x1 / 1000 * width)
61-
abs_y2 = int(y2 / 1000 * height)
62-
abs_x2 = int(x2 / 1000 * width)
62+
# Scale normalized coordinates to image dimensions
63+
abs_y_min = int(bbox.box_2d[0] / 1000 * height)
64+
abs_x_min = int(bbox.box_2d[1] / 1000 * width)
65+
abs_y_max = int(bbox.box_2d[2] / 1000 * height)
66+
abs_x_max = int(bbox.box_2d[3] / 1000 * width)
6367

6468
color = colors[i % len(colors)]
6569

70+
# Draw the rectangle using the correct (x, y) pairs
6671
draw.rectangle(
67-
((abs_x1, abs_y1), (abs_x2, abs_y2)), outline=color, width=4
72+
((abs_x_min, abs_y_min), (abs_x_max, abs_y_max)),
73+
outline=color,
74+
width=4,
6875
)
6976
if bbox.label:
70-
draw.text((abs_x1 + 8, abs_y1 + 6), bbox.label, fill=color)
77+
# Position the text at the top-left corner of the box
78+
draw.text((abs_x_min + 8, abs_y_min + 6), bbox.label, fill=color)
7179

7280
im.show()
7381

@@ -83,12 +91,12 @@ def plot_bounding_boxes(image_uri: str, bounding_boxes: list[BoundingBox]) -> No
8391
temperature=0.5,
8492
safety_settings=[
8593
SafetySetting(
86-
category="HARM_CATEGORY_DANGEROUS_CONTENT",
87-
threshold="BLOCK_ONLY_HIGH",
94+
category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
95+
threshold=HarmBlockThreshold.BLOCK_ONLY_HIGH,
8896
),
8997
],
9098
response_mime_type="application/json",
91-
response_schema=list[BoundingBox], # Add BoundingBox class to the response schema
99+
response_schema=list[BoundingBox],
92100
)
93101

94102
image_uri = "https://storage.googleapis.com/generativeai-downloads/images/socks.jpg"
@@ -109,8 +117,8 @@ def plot_bounding_boxes(image_uri: str, bounding_boxes: list[BoundingBox]) -> No
109117

110118
# Example response:
111119
# [
112-
# {"box_2d": [36, 246, 380, 492], "label": "top left sock with face"},
113-
# {"box_2d": [260, 663, 640, 917], "label": "top right sock with face"},
120+
# {"box_2d": [6, 246, 386, 526], "label": "top-left light blue sock with cat face"},
121+
# {"box_2d": [234, 649, 650, 863], "label": "top-right light blue sock with cat face"},
114122
# ]
115123
# [END googlegenaisdk_boundingbox_with_txt_img]
116124
return response.text

0 commit comments

Comments
 (0)