diff --git a/README.md b/README.md index 91b0ca6..449a187 100644 --- a/README.md +++ b/README.md @@ -113,4 +113,6 @@ processed_image_dwpose = dwpose(img) ### Image resolution -In order to maintain the image aspect ratio, `detect_resolution`, `image_resolution` and images sizes need to be using multiple of `64`. +In order to maintain the image aspect ratio, `detect_resolution`, `image_resolution` and calls to `resize_image` need to be using multiple of `32`. +Otherwise images will be resized to work correctly. +Resolution can be set to `None` to prevent resize. This may lead to RunTimeError. diff --git a/src/controlnet_aux/canny/__init__.py b/src/controlnet_aux/canny/__init__.py index aca9ae3..1872c82 100644 --- a/src/controlnet_aux/canny/__init__.py +++ b/src/controlnet_aux/canny/__init__.py @@ -20,12 +20,13 @@ def __call__(self, input_image=None, low_threshold=100, high_threshold=200, dete output_type = output_type or "np" input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) detected_map = cv2.Canny(input_image, low_threshold, high_threshold) detected_map = HWC3(detected_map) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/dwpose/__init__.py b/src/controlnet_aux/dwpose/__init__.py index 34e010f..d12b34f 100644 --- a/src/controlnet_aux/dwpose/__init__.py +++ b/src/controlnet_aux/dwpose/__init__.py @@ -45,7 +45,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out input_image = cv2.cvtColor(np.array(input_image, dtype=np.uint8), cv2.COLOR_RGB2BGR) input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) H, W, C = input_image.shape with torch.no_grad(): @@ -80,7 +81,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out detected_map = draw_pose(pose, H, W) detected_map = HWC3(detected_map) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/hed/__init__.py b/src/controlnet_aux/hed/__init__.py index e37d9fe..0fecee7 100644 --- a/src/controlnet_aux/hed/__init__.py +++ b/src/controlnet_aux/hed/__init__.py @@ -93,7 +93,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, saf input_image = np.array(input_image, dtype=np.uint8) input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) assert input_image.ndim == 3 H, W, C = input_image.shape @@ -112,7 +113,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, saf detected_map = edge detected_map = HWC3(detected_map) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/leres/__init__.py b/src/controlnet_aux/leres/__init__.py index dd1caf5..e4464a2 100644 --- a/src/controlnet_aux/leres/__init__.py +++ b/src/controlnet_aux/leres/__init__.py @@ -62,7 +62,8 @@ def __call__(self, input_image, thr_a=0, thr_b=0, boost=False, detect_resolution input_image = np.array(input_image, dtype=np.uint8) input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) assert input_image.ndim == 3 height, width, dim = input_image.shape @@ -107,7 +108,7 @@ def __call__(self, input_image, thr_a=0, thr_b=0, boost=False, detect_resolution detected_map = depth_image detected_map = HWC3(detected_map) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/lineart/__init__.py b/src/controlnet_aux/lineart/__init__.py index 8c0dc57..4ca83f0 100644 --- a/src/controlnet_aux/lineart/__init__.py +++ b/src/controlnet_aux/lineart/__init__.py @@ -137,7 +137,8 @@ def __call__(self, input_image, coarse=False, detect_resolution=512, image_resol input_image = np.array(input_image, dtype=np.uint8) input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) model = self.model_coarse if coarse else self.model assert input_image.ndim == 3 @@ -155,7 +156,7 @@ def __call__(self, input_image, coarse=False, detect_resolution=512, image_resol detected_map = HWC3(detected_map) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/lineart_anime/__init__.py b/src/controlnet_aux/lineart_anime/__init__.py index 28ec4a9..c1ad8c8 100644 --- a/src/controlnet_aux/lineart_anime/__init__.py +++ b/src/controlnet_aux/lineart_anime/__init__.py @@ -156,7 +156,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out input_image = np.array(input_image, dtype=np.uint8) input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) H, W, C = input_image.shape Hn = 256 * int(np.ceil(float(H) / 256.0)) @@ -177,7 +178,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out detected_map = HWC3(detected_map) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/mediapipe_face/__init__.py b/src/controlnet_aux/mediapipe_face/__init__.py index 91f3cfc..08af268 100644 --- a/src/controlnet_aux/mediapipe_face/__init__.py +++ b/src/controlnet_aux/mediapipe_face/__init__.py @@ -37,12 +37,13 @@ def __call__(self, input_image = np.array(input_image, dtype=np.uint8) input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) detected_map = generate_annotation(input_image, max_faces, min_confidence) detected_map = HWC3(detected_map) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/midas/__init__.py b/src/controlnet_aux/midas/__init__.py index 8aa12e7..5b21a90 100644 --- a/src/controlnet_aux/midas/__init__.py +++ b/src/controlnet_aux/midas/__init__.py @@ -45,7 +45,8 @@ def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1, depth_and_normal=False output_type = output_type or "np" input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) assert input_image.ndim == 3 image_depth = input_image @@ -77,7 +78,7 @@ def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1, depth_and_normal=False if depth_and_normal: normal_image = HWC3(normal_image) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape depth_image = cv2.resize(depth_image, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/mlsd/__init__.py b/src/controlnet_aux/mlsd/__init__.py index 1e230c7..0b96d0c 100644 --- a/src/controlnet_aux/mlsd/__init__.py +++ b/src/controlnet_aux/mlsd/__init__.py @@ -51,7 +51,8 @@ def __call__(self, input_image, thr_v=0.1, thr_d=0.1, detect_resolution=512, ima input_image = np.array(input_image, dtype=np.uint8) input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) assert input_image.ndim == 3 img = input_image @@ -68,7 +69,7 @@ def __call__(self, input_image, thr_v=0.1, thr_d=0.1, detect_resolution=512, ima detected_map = img_output[:, :, 0] detected_map = HWC3(detected_map) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/normalbae/__init__.py b/src/controlnet_aux/normalbae/__init__.py index f61943f..f656b8e 100644 --- a/src/controlnet_aux/normalbae/__init__.py +++ b/src/controlnet_aux/normalbae/__init__.py @@ -74,7 +74,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out input_image = np.array(input_image, dtype=np.uint8) input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) assert input_image.ndim == 3 image_normal = input_image @@ -97,7 +98,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out detected_map = normal_image detected_map = HWC3(detected_map) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/open_pose/__init__.py b/src/controlnet_aux/open_pose/__init__.py index 650c870..8283cdf 100644 --- a/src/controlnet_aux/open_pose/__init__.py +++ b/src/controlnet_aux/open_pose/__init__.py @@ -214,7 +214,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, inc input_image = np.array(input_image, dtype=np.uint8) input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) H, W, C = input_image.shape poses = self.detect_poses(input_image, include_hand, include_face) @@ -223,7 +224,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, inc detected_map = canvas detected_map = HWC3(detected_map) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/pidi/__init__.py b/src/controlnet_aux/pidi/__init__.py index 8f46bab..3319238 100644 --- a/src/controlnet_aux/pidi/__init__.py +++ b/src/controlnet_aux/pidi/__init__.py @@ -49,7 +49,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, saf input_image = np.array(input_image, dtype=np.uint8) input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) assert input_image.ndim == 3 input_image = input_image[:, :, ::-1].copy() with torch.no_grad(): @@ -67,7 +68,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, saf detected_map = edge[0, 0] detected_map = HWC3(detected_map) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/segment_anything/__init__.py b/src/controlnet_aux/segment_anything/__init__.py index 048c096..3406cb1 100644 --- a/src/controlnet_aux/segment_anything/__init__.py +++ b/src/controlnet_aux/segment_anything/__init__.py @@ -71,7 +71,8 @@ def __call__(self, input_image: Union[np.ndarray, Image.Image]=None, detect_reso input_image = np.array(input_image, dtype=np.uint8) input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) # Generate Masks masks = self.mask_generator.generate(input_image) @@ -81,7 +82,7 @@ def __call__(self, input_image: Union[np.ndarray, Image.Image]=None, detect_reso detected_map = map detected_map = HWC3(detected_map) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/shuffle/__init__.py b/src/controlnet_aux/shuffle/__init__.py index e50f7cd..2c12e60 100644 --- a/src/controlnet_aux/shuffle/__init__.py +++ b/src/controlnet_aux/shuffle/__init__.py @@ -21,7 +21,8 @@ def __call__(self, input_image, h=None, w=None, f=None, detect_resolution=512, i input_image = np.array(input_image, dtype=np.uint8) input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) H, W, C = input_image.shape if h is None: @@ -35,7 +36,7 @@ def __call__(self, input_image, h=None, w=None, f=None, detect_resolution=512, i flow = np.concatenate([x, y], axis=2).astype(np.float32) detected_map = cv2.remap(input_image, flow, None, cv2.INTER_LINEAR) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR) diff --git a/src/controlnet_aux/util.py b/src/controlnet_aux/util.py index 79ba7f1..d94c159 100644 --- a/src/controlnet_aux/util.py +++ b/src/controlnet_aux/util.py @@ -91,8 +91,10 @@ def resize_image(input_image, resolution): k = float(resolution) / min(H, W) H *= k W *= k - H = int(np.round(H / 64.0)) * 64 - W = int(np.round(W / 64.0)) * 64 + # We ensure image size is multiple of 32. If not this leads to RuntimeError: + # The size of tensor a (X) must match the size of tensor b (Y) at non-singleton dimension Z + H = int(np.round(H / 32.0)) * 32 + W = int(np.round(W / 32.0)) * 32 img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA) return img diff --git a/src/controlnet_aux/zoe/__init__.py b/src/controlnet_aux/zoe/__init__.py index 2e69795..4ee5dff 100644 --- a/src/controlnet_aux/zoe/__init__.py +++ b/src/controlnet_aux/zoe/__init__.py @@ -47,7 +47,8 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out output_type = output_type or "np" input_image = HWC3(input_image) - input_image = resize_image(input_image, detect_resolution) + if detect_resolution is not None: + input_image = resize_image(input_image, detect_resolution) assert input_image.ndim == 3 image_depth = input_image @@ -73,7 +74,7 @@ def __call__(self, input_image, detect_resolution=512, image_resolution=512, out detected_map = depth_image detected_map = HWC3(detected_map) - img = resize_image(input_image, image_resolution) + img = resize_image(input_image, image_resolution) if image_resolution is not None else input_image H, W, C = img.shape detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)