16
16
def generate_content () -> str :
17
17
# [START googlegenaisdk_boundingbox_with_txt_img]
18
18
import requests
19
-
20
19
from google import genai
21
- from google .genai .types import GenerateContentConfig , HttpOptions , Part , SafetySetting
22
-
20
+ from google .genai .types import (
21
+ GenerateContentConfig ,
22
+ HarmBlockThreshold ,
23
+ HarmCategory ,
24
+ HttpOptions ,
25
+ Part ,
26
+ SafetySetting ,
27
+ )
23
28
from PIL import Image , ImageColor , ImageDraw
24
-
25
29
from pydantic import BaseModel
26
30
27
31
# Helper class to represent a bounding box
@@ -31,7 +35,7 @@ class BoundingBox(BaseModel):
31
35
32
36
Attributes:
33
37
box_2d (list[int]): A list of integers representing the 2D coordinates of the bounding box,
34
- typically in the format [x_min, y_min, x_max , y_max].
38
+ typically in the format [y_min, x_min , y_max, x_max ].
35
39
label (str): A string representing the label or class associated with the object within the bounding box.
36
40
"""
37
41
@@ -41,12 +45,12 @@ class BoundingBox(BaseModel):
41
45
# Helper function to plot bounding boxes on an image
42
46
def plot_bounding_boxes (image_uri : str , bounding_boxes : list [BoundingBox ]) -> None :
43
47
"""
44
- Plots bounding boxes on an image with markers for each a name , using PIL, normalized coordinates, and different colors .
48
+ Plots bounding boxes on an image with labels , using PIL and normalized coordinates .
45
49
46
50
Args:
47
- img_path : The path to the image file.
48
- bounding_boxes: A list of bounding boxes containing the name of the object
49
- and their positions in normalized [y1 x1 y2 x2 ] format.
51
+ image_uri : The URI of the image file.
52
+ bounding_boxes: A list of BoundingBox objects. Each box's coordinates are in
53
+ normalized [y_min, x_min, y_max, x_max ] format.
50
54
"""
51
55
with Image .open (requests .get (image_uri , stream = True , timeout = 10 ).raw ) as im :
52
56
width , height = im .size
@@ -55,19 +59,23 @@ def plot_bounding_boxes(image_uri: str, bounding_boxes: list[BoundingBox]) -> No
55
59
colors = list (ImageColor .colormap .keys ())
56
60
57
61
for i , bbox in enumerate (bounding_boxes ):
58
- y1 , x1 , y2 , x2 = bbox . box_2d
59
- abs_y1 = int (y1 / 1000 * height )
60
- abs_x1 = int (x1 / 1000 * width )
61
- abs_y2 = int (y2 / 1000 * height )
62
- abs_x2 = int (x2 / 1000 * width )
62
+ # Scale normalized coordinates to image dimensions
63
+ abs_y_min = int (bbox . box_2d [ 0 ] / 1000 * height )
64
+ abs_x_min = int (bbox . box_2d [ 1 ] / 1000 * width )
65
+ abs_y_max = int (bbox . box_2d [ 2 ] / 1000 * height )
66
+ abs_x_max = int (bbox . box_2d [ 3 ] / 1000 * width )
63
67
64
68
color = colors [i % len (colors )]
65
69
70
+ # Draw the rectangle using the correct (x, y) pairs
66
71
draw .rectangle (
67
- ((abs_x1 , abs_y1 ), (abs_x2 , abs_y2 )), outline = color , width = 4
72
+ ((abs_x_min , abs_y_min ), (abs_x_max , abs_y_max )),
73
+ outline = color ,
74
+ width = 4 ,
68
75
)
69
76
if bbox .label :
70
- draw .text ((abs_x1 + 8 , abs_y1 + 6 ), bbox .label , fill = color )
77
+ # Position the text at the top-left corner of the box
78
+ draw .text ((abs_x_min + 8 , abs_y_min + 6 ), bbox .label , fill = color )
71
79
72
80
im .show ()
73
81
@@ -83,12 +91,12 @@ def plot_bounding_boxes(image_uri: str, bounding_boxes: list[BoundingBox]) -> No
83
91
temperature = 0.5 ,
84
92
safety_settings = [
85
93
SafetySetting (
86
- category = " HARM_CATEGORY_DANGEROUS_CONTENT" ,
87
- threshold = " BLOCK_ONLY_HIGH" ,
94
+ category = HarmCategory . HARM_CATEGORY_DANGEROUS_CONTENT ,
95
+ threshold = HarmBlockThreshold . BLOCK_ONLY_HIGH ,
88
96
),
89
97
],
90
98
response_mime_type = "application/json" ,
91
- response_schema = list [BoundingBox ], # Add BoundingBox class to the response schema
99
+ response_schema = list [BoundingBox ],
92
100
)
93
101
94
102
image_uri = "https://storage.googleapis.com/generativeai-downloads/images/socks.jpg"
@@ -109,8 +117,8 @@ def plot_bounding_boxes(image_uri: str, bounding_boxes: list[BoundingBox]) -> No
109
117
110
118
# Example response:
111
119
# [
112
- # {"box_2d": [36 , 246, 380, 492 ], "label": "top left sock with face"},
113
- # {"box_2d": [260, 663, 640, 917 ], "label": "top right sock with face"},
120
+ # {"box_2d": [6 , 246, 386, 526 ], "label": "top- left light blue sock with cat face"},
121
+ # {"box_2d": [234, 649, 650, 863 ], "label": "top- right light blue sock with cat face"},
114
122
# ]
115
123
# [END googlegenaisdk_boundingbox_with_txt_img]
116
124
return response .text
0 commit comments