Skip to content

Commit

Permalink
added prepare_img function (stretch_and_squish, scale_and_centercrop,…
Browse files Browse the repository at this point in the history
… scale_and_fill), added unit tests for prepare_img functions, added opencv-contrib-python requirement
  • Loading branch information
geroldmeisinger committed Jun 9, 2024
1 parent 9b4bc07 commit 3a82a1e
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 15 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ coverage.xml
.hypothesis/
.pytest_cache/
cover/
outputs/

# Translations
*.mo
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ pyparsing==3.1.2
PySide6==6.7.1
transformers==4.41.2
playsound3==2.2.1
opencv-contrib-python==4.10.0.82
numpy==1.26.4

# PyTorch
torch==2.2.2; platform_system != "Windows"
Expand All @@ -25,7 +27,6 @@ xformers==0.0.25.post1

# InternLM-XComposer2
auto-gptq==0.7.1; platform_system == "Linux" or platform_system == "Windows"
numpy==1.26.4

# WD Tagger
huggingface-hub==0.23.2
Expand Down
3 changes: 3 additions & 0 deletions taggui/run_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from tests import test_image

test_image.test_prepares()
Empty file added taggui/tests/__init__.py
Empty file.
25 changes: 25 additions & 0 deletions taggui/tests/test_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os

from PIL import Image
from PIL.Image import Resampling

from utils.image import prepare_img_scale_and_centercrop, prepare_img_scale_and_fill, prepare_img_stretch_and_squish

def test_prepares():
target_size = 1344
resampling = Resampling.LANCZOS
out_dir = "outputs/"

os.makedirs(out_dir, exist_ok=True)

for path in ["images/people_landscape.webp", "images/people_portrait.webp"]:
basename, ext = os.path.splitext(os.path.basename(path))
img = Image.open(path)
for name, func in [("stretch_and_squish", prepare_img_stretch_and_squish), ("scale_and_centercrop", prepare_img_scale_and_centercrop), ("scale_and_fill", prepare_img_scale_and_fill)]:
if name == "scale_and_fill":
for method in ["white", "gray", "black", "noise", "replicate", "reflect"]:
ret = func(img, target_size, resampling, method)
ret.save(f"{out_dir}/{basename}_{method}.webp", format='WebP', lossless=True, quality=0)
else:
ret = func(img, target_size, resampling)
ret.save(f"{out_dir}/{basename}_{name}.webp", format='WebP', lossless=True, quality=0)
79 changes: 65 additions & 14 deletions taggui/utils/image.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import random

from dataclasses import dataclass, field
from pathlib import Path
from PIL import Image as PilImage, ImageColor

from PIL import Image as PilImage, ImageColor, ImageOps
from PIL.Image import Resampling
import cv2 as opencv
import numpy as np

from PySide6.QtGui import QIcon

Expand All @@ -14,18 +19,36 @@ class Image:
thumbnail: QIcon | None = None

# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#filters
def prepare_img_stretch_and_squish(img: PilImage, target_size: int, resampling=Resampling.LANCZOS) -> PilImage
def prepare_img_stretch_and_squish(pil_image: PilImage, target_size: int, resample=Resampling.LANCZOS) -> PilImage:
"""Preprocesses an image for the model by simply stretching and squishing it to the target size. Does not retain shapes (see https://github.com/THUDM/CogVLM2/discussions/83)"""
return img
ret = pil_image.resize((target_size, target_size), resample=resample)
return ret

def prepare_img_scale_and_centercrop(img: PilImage, target_size: int, resampling=Resampling.LANCZOS) -> PilImage:
def prepare_img_scale_and_centercrop(pil_image: PilImage, target_size: int, resample=Resampling.LANCZOS) -> PilImage:
"""Preprocesses an image for the model by scaling the short side to target size and then center cropping a square. May crop important content especially in very rectangular images (this method was used in Stable Diffusion 1 see https://arxiv.org/abs/2112.10752)"""
return img
width, height = pil_image.size
if width < height:
new_width = target_size
new_height = int(target_size * height / width)
else:
new_height = target_size
new_width = int(target_size * width / height)

# Resize the image with the calculated dimensions
ret = pil_image.resize((new_width, new_height), resample=resample)

def prepare_img_scale_and_fill(img: PilImage, target_size: int, resampling=Resampling.LANCZOS, method: str = "black") -> PilImage:
# Center crop a square from the resized image (make sure that there are no off-by-one errors)
left = (new_width - target_size) / 2
top = (new_height - target_size) / 2
right = left + target_size
bottom = top + target_size
ret = ret.crop((left, top, right, bottom))
return ret

def prepare_img_scale_and_fill(pil_image: PilImage, target_size: int, resample=Resampling.LANCZOS, method: str = "black") -> PilImage:
"""
Preprocesses an image for the model by scaling the long side to target size and filling borders of the short side with content according to method (color, repeat, noise) until it is square. Introduces new content that wasn't there before which might be caught up by the model ("This image showcases a portrait of a person. On the left and right side are black borders.")
- method: can be on of "noise", "repeat" or a color value ("gray", "#000000", "rgb(100%,100%,100%)" etc.) which can be interpreted by Pillow (see https://pillow.readthedocs.io/en/stable/reference/ImageColor.html and https://developer.mozilla.org/en-US/docs/Web/CSS/named-color)
Preprocesses an image for the model by scaling the long side to target size and filling borders of the short side with content according to method (color, noise, replicate, reflect) until it is square. Introduces new content that wasn't there before which might be caught up by the model ("This image showcases a portrait of a person. On the left and right side are black borders.")
- method: can be on of "noise", "replicate", "reflect" or a color value ("gray", "#000000", "rgb(100%,100%,100%)" etc.) which can be interpreted by Pillow (see https://pillow.readthedocs.io/en/stable/reference/ImageColor.html and https://developer.mozilla.org/en-US/docs/Web/CSS/named-color)
"""
color = None
try:
Expand All @@ -34,10 +57,38 @@ def prepare_img_scale_and_fill(img: PilImage, target_size: int, resampling=Resam
except ValueError:
pass

match method:
case "color": pass # fill borders with color
case "noise": pass # fill borders with RGB noise
case "repeat": pass # fill borders with color value of the edge
case _:
width, height = pil_image.size
if width > height:
new_width = target_size
new_height = int((new_width / width) * height)
else:
new_height = target_size
new_width = int((new_height / height) * width)

pastee = pil_image.resize((new_width, new_height), resample=resample)

if method == "color": # fill borders with color
canvas = PilImage.new("RGB", (target_size, target_size), color)
offset = ((target_size - new_width) // 2, (target_size - new_height) // 2)
canvas.paste(pastee, offset)
ret = canvas
elif method == "noise": # fill borders with RGB noise
canvas = PilImage.new("RGB", (target_size, target_size))
for x in range(target_size):
for y in range(target_size):
canvas.putpixel((x, y), (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)))
canvas.paste(pastee, ((target_size - new_width) // 2, (target_size - new_height) // 2))
ret = canvas
elif method in ("replicate", "reflect"): # fill borders with color value of the edge
left_padding = int((target_size - new_width) / 2)
top_padding = int((target_size - new_height) / 2)
right_padding = target_size - new_width - left_padding
bottom_padding = target_size - new_height - top_padding
opencv_pastee = np.array(pastee)
borderType = { "replicate": opencv.BORDER_REPLICATE, "reflect": opencv.BORDER_REFLECT }[method]
opencv_ret = opencv.copyMakeBorder(opencv_pastee, top_padding, bottom_padding, left_padding, right_padding, borderType=borderType)
ret = PilImage.fromarray(opencv_ret)
else:
raise ValueError(f"Invalid method='{method}'")

return img
return ret

0 comments on commit 3a82a1e

Please sign in to comment.