-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
122 lines (101 loc) · 3.82 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import argparse
import json
import logging
import os
from dataclasses import dataclass
from typing import List
from urllib.parse import urlparse
from dotenv import load_dotenv
from pydantic import BaseModel # type: ignore
from tag.groq_client import GroqTaggingClient
from utils.image_utils import encode_image, is_image
from utils.log_utils import config_logs
config_logs()
logger = logging.getLogger(__name__)
config = load_dotenv()
@dataclass
class Tags(BaseModel):
tags: List[str]
categories: List[str]
def is_url(path: str) -> bool:
return urlparse(path).scheme in (
"http",
"https",
)
def prompt_groq(client: GroqTaggingClient, custom_prompt: str, image_url: str) -> dict:
try:
return json.loads(client.message(custom_prompt, image_url))
except Exception as e:
logger.error(
f"Error processing {image_url}: {e}. Check https://console.groq.com/docs/vision for reasons why this may occur"
)
return {
"error": "Error. Check https://console.groq.com/docs/vision for reasons why this may occur"
}
def process_images(inputs: List[str], output: str, custom_prompt: str, model: str):
api_key = os.environ.get("GROQ_API_KEY")
if not api_key:
raise ValueError("GROQ_API_KEY is not set in the .env file")
client = GroqTaggingClient(api_key=api_key, model=model)
out_json = {}
for path in inputs:
if is_url(path):
out_json[path] = prompt_groq(client, custom_prompt, path)
else:
if os.path.isdir(path):
for file in os.listdir(path):
file_path = os.path.join(path, file)
if not is_image(file_path):
logger.error(f"{file_path} is not an image")
continue
out_json[file_path] = prompt_groq(
client,
custom_prompt,
f"data:image/jpeg;base64,{encode_image(file_path)}",
)
else:
if not is_image(path):
logger.error(f"{path} is not an image")
continue
out_json[path] = prompt_groq(
client,
custom_prompt,
f"data:image/jpeg;base64,{encode_image(path)}",
)
logger.info(f"Output: {out_json}")
with open(output, "w") as f:
json.dump(out_json, f, indent=2)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Parse Args", formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"-i",
"--images",
nargs="+",
help="Path to images to use. This can be a url, an image path, or a directory of images.",
required=True,
)
parser.add_argument(
"-o",
"--output",
help="Path to save the output",
required=False,
default="output.json",
)
parser.add_argument(
"-c",
"--custom-prompt",
help="Custom prompt to use. This is useful if you want to override how to prompt Groq",
required=False,
default=f"You will be tagging the attached image using 1-2 words per tag. You can supply as many tags as you like. Please add 1 or 2 categories as well. This should not be a paragraph or sentence. The JSON object must use the schema: {json.dumps(Tags.model_json_schema(), indent=2)} and nothing else. What is in the image?",
)
parser.add_argument(
"-m",
"--model",
help="Model to use. Please note that only certain models are capable of handling images",
required=False,
default="llama-3.2-11b-vision-preview",
)
args = parser.parse_args()
process_images(args.images, args.output, args.custom_prompt, args.model)