-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathvision_ocr.py
More file actions
29 lines (23 loc) · 859 Bytes
/
vision_ocr.py
File metadata and controls
29 lines (23 loc) · 859 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from asyncio import run
from avalan.entities import GenerationSettings
from avalan.model.vision.image import ImageTextToTextModel
from os.path import isfile
from sys import argv, exit
async def example(path: str) -> None:
print("Loading model... ", end="", flush=True)
with ImageTextToTextModel("google/gemma-3-12b-it") as vm:
print(f"DONE. Running image recognition for {path}", flush=True)
text = await vm(
path,
"Transcribe the text on this image, word for word, "
"keeping format when possible.",
settings=GenerationSettings(max_new_tokens=1024),
)
print(text, flush=True)
if __name__ == "__main__":
path = (
argv[1]
if len(argv) == 2 and isfile(argv[1])
else exit(f"Usage: {argv[0]} <valid_file_path>")
)
run(example(path))