|
2 | 2 |
|
3 | 3 | Hacked together by Ross Wightman |
4 | 4 | """ |
5 | | -import torch |
6 | | -from PIL import Image |
7 | | -import numpy as np |
8 | 5 | import random |
9 | 6 | import math |
| 7 | +from copy import deepcopy |
| 8 | + |
| 9 | +from PIL import Image |
| 10 | +import numpy as np |
| 11 | +import torch |
10 | 12 |
|
11 | 13 | IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406) |
12 | 14 | IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225) |
@@ -91,13 +93,13 @@ def __call__(self, img, anno: dict): |
91 | 93 | new_img = Image.new("RGB", (self.target_size[1], self.target_size[0]), color=self.fill_color) |
92 | 94 | interp_method = _pil_interp(self.interpolation) |
93 | 95 | img = img.resize((scaled_w, scaled_h), interp_method) |
94 | | - new_img.paste(img) |
| 96 | + new_img.paste(img) # pastes at 0,0 (upper-left corner) |
95 | 97 |
|
96 | 98 | if 'bbox' in anno: |
97 | | - # FIXME haven't tested this path since not currently using dataset annotations for train/eval |
98 | 99 | bbox = anno['bbox'] |
99 | 100 | bbox[:, :4] *= img_scale |
100 | | - clip_boxes_(bbox, (scaled_h, scaled_w)) |
| 101 | + bbox_bound = (min(scaled_h, self.target_size[0]), min(scaled_w, self.target_size[1])) |
| 102 | + clip_boxes_(bbox, bbox_bound) # crop to bounds of target image or letter-box, whichever is smaller |
101 | 103 | valid_indices = (bbox[:, :2] < bbox[:, 2:4]).all(axis=1) |
102 | 104 | anno['bbox'] = bbox[valid_indices, :] |
103 | 105 | anno['cls'] = anno['cls'][valid_indices] |
@@ -151,15 +153,15 @@ def __call__(self, img, anno: dict): |
151 | 153 | right, lower = min(scaled_w, offset_x + self.target_size[1]), min(scaled_h, offset_y + self.target_size[0]) |
152 | 154 | img = img.crop((offset_x, offset_y, right, lower)) |
153 | 155 | new_img = Image.new("RGB", (self.target_size[1], self.target_size[0]), color=self.fill_color) |
154 | | - new_img.paste(img) |
| 156 | + new_img.paste(img) # pastes at 0,0 (upper-left corner) |
155 | 157 |
|
156 | 158 | if 'bbox' in anno: |
157 | | - # FIXME not fully tested |
158 | | - bbox = anno['bbox'].copy() # FIXME copy for debugger inspection, back to inplace |
| 159 | + bbox = anno['bbox'] # for convenience, modifies in-place |
159 | 160 | bbox[:, :4] *= img_scale |
160 | 161 | box_offset = np.stack([offset_y, offset_x] * 2) |
161 | 162 | bbox -= box_offset |
162 | | - clip_boxes_(bbox, (scaled_h, scaled_w)) |
| 163 | + bbox_bound = (min(scaled_h, self.target_size[0]), min(scaled_w, self.target_size[1])) |
| 164 | + clip_boxes_(bbox, bbox_bound) # crop to bounds of target image or letter-box, whichever is smaller |
163 | 165 | valid_indices = (bbox[:, :2] < bbox[:, 2:4]).all(axis=1) |
164 | 166 | anno['bbox'] = bbox[valid_indices, :] |
165 | 167 | anno['cls'] = anno['cls'][valid_indices] |
|
0 commit comments