diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a4d81c6..17bf229 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,21 +19,9 @@ repos: entry: poetry lock --check pass_filenames: false language: system - -# Ready to be enabled soon -# - repo: local -# hooks: -# - id: system -# name: flake8 -# entry: poetry run flake8 docling_ibm_models -# pass_filenames: false -# language: system -# files: '\.py$' -# - repo: local -# hooks: -# - id: system -# name: MyPy -# entry: poetry run mypy docling_ibm_models -# pass_filenames: false -# language: system -# files: '\.py$' + # - id: system + # name: MyPy + # entry: poetry run mypy docling_ibm_models + # pass_filenames: false + # language: system + # files: '\.py$' \ No newline at end of file diff --git a/demo/demo_layout_predictor.py b/demo/demo_layout_predictor.py index e8a567c..56c030f 100644 --- a/demo/demo_layout_predictor.py +++ b/demo/demo_layout_predictor.py @@ -10,15 +10,52 @@ from pathlib import Path import numpy as np -from PIL import Image, ImageDraw +import torch from huggingface_hub import snapshot_download +from PIL import Image, ImageDraw, ImageFont from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor +def save_predictions(prefix: str, viz_dir: str, img_fn: str, img, predictions: dict): + img_path = Path(img_fn) + + image = img.copy() + draw = ImageDraw.Draw(image) + + predictions_filename = f"{prefix}_{img_path.stem}.txt" + predictions_fn = os.path.join(viz_dir, predictions_filename) + with open(predictions_fn, "w") as fd: + for pred in predictions: + bbox = [ + round(pred["l"], 2), + round(pred["t"], 2), + round(pred["r"], 2), + round(pred["b"], 2), + ] + label = pred["label"] + confidence = round(pred["confidence"], 3) + + # Save the predictions in txt file + pred_txt = f"{prefix} {img_fn}: {label} - {bbox} - {confidence}\n" + fd.write(pred_txt) + + # Draw the bbox and label + draw.rectangle(bbox, outline="orange") + txt = f"{label}: {confidence}" + draw.text( + (bbox[0], bbox[1]), text=txt, font=ImageFont.load_default(), fill="blue" + ) + + draw_filename = f"{prefix}_{img_path.name}" + draw_fn = os.path.join(viz_dir, draw_filename) + image.save(draw_fn) + + def demo( logger: logging.Logger, artifact_path: str, + device: str, num_threads: int, img_dir: str, viz_dir: str, @@ -30,58 +67,43 @@ def demo( pdf_image = pyvips.Image.new_from_file("test_data/ADS.2007.page_123.pdf", page=0) """ # Create the layout predictor - lpredictor = LayoutPredictor(artifact_path, num_threads=num_threads) - logger.info("LayoutPredictor settings: {}".format(lpredictor.info())) + lpredictor = LayoutPredictor(artifact_path, device=device, num_threads=num_threads) # Predict all test png images + t0 = time.perf_counter() + img_counter = 0 for img_fn in Path(img_dir).rglob("*.png"): + img_counter += 1 logger.info("Predicting '%s'...", img_fn) - start_t = time.time() with Image.open(img_fn) as image: # Predict layout + img_t0 = time.perf_counter() preds = list(lpredictor.predict(image)) - dt_ms = 1000 * (time.time() - start_t) - logger.debug("Time elapsed for prediction(ms): %s", dt_ms) - - # Draw predictions - out_img = image.copy() - draw = ImageDraw.Draw(out_img) - - for i, pred in enumerate(preds): - score = pred["confidence"] - label = pred["label"] - box = [ - round(pred["l"]), - round(pred["t"]), - round(pred["r"]), - round(pred["b"]), - ] - - # Draw bbox and label - draw.rectangle( - box, - outline="red", - ) - draw.text( - (box[0], box[1]), - text=str(label), - fill="blue", - ) - logger.info("%s: [label|score|bbox] = ['%s' | %s | %s]", i, label, score, box) - - save_fn = os.path.join(viz_dir, os.path.basename(img_fn)) - out_img.save(save_fn) - logger.info("Saving prediction visualization in: '%s'", save_fn) + img_ms = 1000 * (time.perf_counter() - img_t0) + logger.debug("Prediction(ms): {:.2f}".format(img_ms)) + + # Save predictions + logger.info("Saving prediction visualization in: '%s'", viz_dir) + save_predictions("ST", viz_dir, img_fn, image, preds) + total_ms = 1000 * (time.perf_counter() - t0) + avg_ms = (total_ms / img_counter) if img_counter > 0 else 0 + logger.info( + "For {} images(ms): [total|avg] = [{:.1f}|{:.1f}]".format( + img_counter, total_ms, avg_ms + ) + ) def main(args): r""" """ num_threads = int(args.num_threads) if args.num_threads is not None else None + device = args.device.lower() img_dir = args.img_dir viz_dir = args.viz_dir # Initialize logger + logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger("LayoutPredictor") logger.setLevel(logging.DEBUG) if not logger.hasHandlers(): @@ -96,11 +118,13 @@ def main(args): Path(viz_dir).mkdir(parents=True, exist_ok=True) # Download models from HF - download_path = snapshot_download(repo_id="ds4sd/docling-models", revision="v2.0.1") - artifact_path = os.path.join(download_path, "model_artifacts/layout/beehive_v0.0.5_pt") + download_path = snapshot_download( + repo_id="ds4sd/docling-models", revision="v2.1.0" + ) + artifact_path = os.path.join(download_path, "model_artifacts/layout") # Test the LayoutPredictor - demo(logger, artifact_path, num_threads, img_dir, viz_dir) + demo(logger, artifact_path, device, num_threads, img_dir, viz_dir) if __name__ == "__main__": @@ -109,7 +133,10 @@ def main(args): """ parser = argparse.ArgumentParser(description="Test the LayoutPredictor") parser.add_argument( - "-n", "--num_threads", required=False, default=None, help="Number of threads" + "-d", "--device", required=False, default="cpu", help="One of [cpu, cuda, mps]" + ) + parser.add_argument( + "-n", "--num_threads", required=False, default=4, help="Number of threads" ) parser.add_argument( "-i", diff --git a/docling_ibm_models/layoutmodel/layout_predictor.py b/docling_ibm_models/layoutmodel/layout_predictor.py index c42d38f..df7851e 100644 --- a/docling_ibm_models/layoutmodel/layout_predictor.py +++ b/docling_ibm_models/layoutmodel/layout_predictor.py @@ -2,6 +2,7 @@ # Copyright IBM Corp. 2024 - 2024 # SPDX-License-Identifier: MIT # +import logging import os from collections.abc import Iterable from typing import Union @@ -10,38 +11,30 @@ import torch import torchvision.transforms as T from PIL import Image +from transformers import RTDetrForObjectDetection, RTDetrImageProcessor -MODEL_CHECKPOINT_FN = "model.pt" -DEFAULT_NUM_THREADS = 4 +_log = logging.getLogger(__name__) class LayoutPredictor: - r""" - Document layout prediction using torch + """ + Document layout prediction using safe tensors """ def __init__( - self, artifact_path: str, num_threads: int = None, use_cpu_only: bool = False + self, + artifact_path: str, + device: str = "cpu", + num_threads: int = 4, ): - r""" + """ Provide the artifact path that contains the LayoutModel file - The number of threads is decided, in the following order, by: - 1. The init method parameter `num_threads`, if it is set. - 2. The envvar "OMP_NUM_THREADS", if it is set. - 3. The default value DEFAULT_NUM_THREADS. - - The execution provided is decided, in the following order: - 1. If the init method parameter `cpu_only` is True or the envvar "USE_CPU_ONLY" is set, - it uses the "CPUExecutionProvider". - 3. Otherwise if the "CUDAExecutionProvider" is present, use: - ["CUDAExecutionProvider", "CPUExecutionProvider"]: - Parameters ---------- artifact_path: Path for the model torch file. - num_threads: (Optional) Number of threads to run the inference. - use_cpu_only: (Optional) If True, it forces CPU as the execution provider. + device: (Optional) device to run the inference. + num_threads: (Optional) Number of threads to run the inference if device = 'cpu' Raises ------ @@ -70,40 +63,51 @@ def __init__( } # Blacklisted classes - self._black_classes = set(["Form", "Key-Value Region"]) + self._black_classes = set() # ["Form", "Key-Value Region"]) # Set basic params - self._threshold = 0.6 # Score threshold + self._threshold = 0.3 # Score threshold self._image_size = 640 self._size = np.asarray([[self._image_size, self._image_size]], dtype=np.int64) - self._use_cpu_only = use_cpu_only or ("USE_CPU_ONLY" in os.environ) - # Model file - self._torch_fn = os.path.join(artifact_path, MODEL_CHECKPOINT_FN) - if not os.path.isfile(self._torch_fn): - raise FileNotFoundError("Missing torch file: {}".format(self._torch_fn)) - - # Get env vars - if num_threads is None: - num_threads = int(os.environ.get("OMP_NUM_THREADS", DEFAULT_NUM_THREADS)) + # Set number of threads for CPU + self._device = torch.device(device) self._num_threads = num_threads + if device == "cpu": + torch.set_num_threads(self._num_threads) + + # Model file and configurations + self._st_fn = os.path.join(artifact_path, "model.safetensors") + if not os.path.isfile(self._st_fn): + raise FileNotFoundError("Missing safe tensors file: {}".format(self._st_fn)) - self.model = torch.jit.load(self._torch_fn) + # Load model and move to device + processor_config = os.path.join(artifact_path, "preprocessor_config.json") + model_config = os.path.join(artifact_path, "config.json") + self._image_processor = RTDetrImageProcessor.from_json_file(processor_config) + self._model = RTDetrForObjectDetection.from_pretrained( + artifact_path, config=model_config + ).to(self._device) + self._model.eval() + + _log.debug("LayoutPredictor settings: {}".format(self.info())) def info(self) -> dict: - r""" + """ Get information about the configuration of LayoutPredictor """ info = { - "torch_file": self._torch_fn, - "use_cpu_only": self._use_cpu_only, + "safe_tensors_file": self._st_fn, + "device": self._device.type, + "num_threads": self._num_threads, "image_size": self._image_size, "threshold": self._threshold, } return info + @torch.inference_mode() def predict(self, orig_img: Union[Image.Image, np.ndarray]) -> Iterable[dict]: - r""" + """ Predict bounding boxes for a given image. The origin (0, 0) is the top-left corner and the predicted bbox coords are provided as: [left, top, right, bottom] @@ -128,40 +132,44 @@ def predict(self, orig_img: Union[Image.Image, np.ndarray]) -> Iterable[dict]: else: raise TypeError("Not supported input image format") + resize = {"height": self._image_size, "width": self._image_size} + inputs = self._image_processor( + images=page_img, + return_tensors="pt", + size=resize, + ).to(self._device) + outputs = self._model(**inputs) + results = self._image_processor.post_process_object_detection( + outputs, + target_sizes=torch.tensor([page_img.size[::-1]]), + threshold=self._threshold, + ) + w, h = page_img.size - orig_size = torch.tensor([w, h])[None] - transforms = T.Compose( - [ - T.Resize((640, 640)), - T.ToTensor(), - ] - ) - img = transforms(page_img)[None] - # Predict - with torch.no_grad(): - labels, boxes, scores = self.model(img, orig_size) + result = results[0] + for score, label_id, box in zip( + result["scores"], result["labels"], result["boxes"] + ): + score = float(score.item()) + + label_id = int(label_id.item()) + 1 # Advance the label_id + label_str = self._classes_map[label_id] - # Yield output - for label_idx, box, score in zip(labels[0], boxes[0], scores[0]): # Filter out blacklisted classes - label_idx = int(label_idx.item()) - score = float(score.item()) - label = self._classes_map[label_idx + 1] - if label in self._black_classes: + if label_str in self._black_classes: continue - # Check against threshold - if score > self._threshold: - l = min(w, max(0, box[0])) - t = min(h, max(0, box[1])) - r = min(w, max(0, box[2])) - b = min(h, max(0, box[3])) - yield { - "l": l, - "t": t, - "r": r, - "b": b, - "label": label, - "confidence": score, - } + bbox_float = [float(b.item()) for b in box] + l = min(w, max(0, bbox_float[0])) + t = min(h, max(0, bbox_float[1])) + r = min(w, max(0, bbox_float[2])) + b = min(h, max(0, bbox_float[3])) + yield { + "l": l, + "t": t, + "r": r, + "b": b, + "label": label_str, + "confidence": score, + } diff --git a/docling_ibm_models/tableformer/data_management/tf_predictor.py b/docling_ibm_models/tableformer/data_management/tf_predictor.py index dbc9f6c..41922d8 100644 --- a/docling_ibm_models/tableformer/data_management/tf_predictor.py +++ b/docling_ibm_models/tableformer/data_management/tf_predictor.py @@ -2,14 +2,17 @@ # Copyright IBM Corp. 2024 - 2024 # SPDX-License-Identifier: MIT # +import glob import json import logging import os from itertools import groupby +from pathlib import Path import cv2 import numpy as np import torch +from safetensors.torch import load_model import docling_ibm_models.tableformer.common as c import docling_ibm_models.tableformer.data_management.transforms as T @@ -82,45 +85,27 @@ def otsl_sqr_chk(rs_list, logdebug): return isSquare -def decide_device(config: dict) -> str: - r""" - Decide the inference device based on the "predict.device_mode" parameter - """ - device_mode = config["predict"].get("device_mode", "cpu") - num_gpus = torch.cuda.device_count() - - if device_mode == "auto": - device = "cuda:0" if num_gpus > 0 else "cpu" - elif device_mode in ["gpu", "cuda"]: - device = "cuda:0" - else: - device = "cpu" - return device - - class TFPredictor: r""" Table predictions for the in-memory Docling API """ - def __init__(self, config, num_threads: int = None): + def __init__(self, config, device: str = "cpu", num_threads: int = 4): r""" - The number of threads is decided, in the following order, by: - 1. The init method parameter `num_threads`, if it is set. - 2. The envvar "OMP_NUM_THREADS", if it is set. - 3. The default value 4. - Parameters ---------- - config : dict - Parameters configuration + config : dict Parameters configuration + device: (Optional) torch device to run the inference. + num_threads: (Optional) Number of threads to run the inference if device = 'cpu' + Raises ------ ValueError When the model cannot be found """ - self._device = decide_device(config) - self._log().info("Running on device: {}".format(self._device)) + # self._device = torch.device(device) + self._device = device + self._log().info("Running on device: {}".format(device)) self._config = config self.enable_post_process = True @@ -133,11 +118,10 @@ def __init__(self, config, num_threads: int = None): self._init_word_map() - # Set the number of torch threads - if num_threads is None: - num_threads = int(os.environ.get("OMP_NUM_THREADS", 4)) - self._num_threads = num_threads - torch.set_num_threads(num_threads) + # Set the number of threads + if device == "cpu": + self._num_threads = num_threads + torch.set_num_threads(self._num_threads) # Load the model self._model = self._load_model() @@ -202,10 +186,21 @@ def _load_model(self): if self._model_type == "TableModel02": self._remove_padding = True - # Load model from checkpoint - success, _, _, _, _ = model.load() - if not success: - err_msg = "Cannot load the model" + # Load model from safetensors + save_dir = self._config["model"]["save_dir"] + models_fn = glob.glob(f"{save_dir}/tableformer_*.safetensors") + if not models_fn: + err_msg = "Not able to find a model file for {}".format(self._model_type) + self._log().error(err_msg) + raise ValueError(err_msg) + model_fn = models_fn[ + 0 + ] # Take the first tableformer safetensors file inside the save_dir + missing, unexpected = load_model(model, model_fn, device=self._device) + if missing or unexpected: + err_msg = "Not able to load the model weights for {}".format( + self._model_type + ) self._log().error(err_msg) raise ValueError(err_msg) diff --git a/poetry.lock b/poetry.lock index 7601ce0..541b8b7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "appnope" @@ -1381,6 +1381,7 @@ files = [ {file = "nh3-0.2.19-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:00810cd5275f5c3f44b9eb0e521d1a841ee2f8023622de39ffc7d88bd533d8e0"}, {file = "nh3-0.2.19-cp38-abi3-win32.whl", hash = "sha256:7e98621856b0a911c21faa5eef8f8ea3e691526c2433f9afc2be713cb6fbdb48"}, {file = "nh3-0.2.19-cp38-abi3-win_amd64.whl", hash = "sha256:75c7cafb840f24430b009f7368945cb5ca88b2b54bb384ebfba495f16bc9c121"}, + {file = "nh3-0.2.19.tar.gz", hash = "sha256:790056b54c068ff8dceb443eaefb696b84beff58cca6c07afd754d17692a4804"}, ] [[package]] @@ -1828,10 +1829,10 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, - {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""}, ] @@ -1985,13 +1986,13 @@ xmp = ["defusedxml"] [[package]] name = "pkginfo" -version = "1.11.2" +version = "1.12.0" description = "Query metadata from sdists / bdists / installed packages." optional = false python-versions = ">=3.8" files = [ - {file = "pkginfo-1.11.2-py3-none-any.whl", hash = "sha256:9ec518eefccd159de7ed45386a6bb4c6ca5fa2cb3bd9b71154fae44f6f1b36a3"}, - {file = "pkginfo-1.11.2.tar.gz", hash = "sha256:c6bc916b8298d159e31f2c216e35ee5b86da7da18874f879798d0a1983537c86"}, + {file = "pkginfo-1.12.0-py3-none-any.whl", hash = "sha256:dcd589c9be4da8973eceffa247733c144812759aa67eaf4bbf97016a02f39088"}, + {file = "pkginfo-1.12.0.tar.gz", hash = "sha256:8ad91a0445a036782b9366ef8b8c2c50291f83a553478ba8580c73d3215700cf"}, ] [package.extras] @@ -2575,6 +2576,109 @@ Pygments = ">=2.5.1" [package.extras] md = ["cmarkgfm (>=0.8.0)"] +[[package]] +name = "regex" +version = "2024.11.6" +description = "Alternative regular expression module, to replace re." +optional = false +python-versions = ">=3.8" +files = [ + {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62"}, + {file = "regex-2024.11.6-cp310-cp310-win32.whl", hash = "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e"}, + {file = "regex-2024.11.6-cp310-cp310-win_amd64.whl", hash = "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45"}, + {file = "regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9"}, + {file = "regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad"}, + {file = "regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54"}, + {file = "regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d"}, + {file = "regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff"}, + {file = "regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f"}, + {file = "regex-2024.11.6-cp38-cp38-win32.whl", hash = "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4"}, + {file = "regex-2024.11.6-cp38-cp38-win_amd64.whl", hash = "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b"}, + {file = "regex-2024.11.6-cp39-cp39-win32.whl", hash = "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57"}, + {file = "regex-2024.11.6-cp39-cp39-win_amd64.whl", hash = "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983"}, + {file = "regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519"}, +] + [[package]] name = "requests" version = "2.32.3" @@ -2624,6 +2728,142 @@ files = [ [package.extras] idna2008 = ["idna"] +[[package]] +name = "safetensors" +version = "0.4.5" +description = "" +optional = false +python-versions = ">=3.7" +files = [ + {file = "safetensors-0.4.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a63eaccd22243c67e4f2b1c3e258b257effc4acd78f3b9d397edc8cf8f1298a7"}, + {file = "safetensors-0.4.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:23fc9b4ec7b602915cbb4ec1a7c1ad96d2743c322f20ab709e2c35d1b66dad27"}, + {file = "safetensors-0.4.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6885016f34bef80ea1085b7e99b3c1f92cb1be78a49839203060f67b40aee761"}, + {file = "safetensors-0.4.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:133620f443450429322f238fda74d512c4008621227fccf2f8cf4a76206fea7c"}, + {file = "safetensors-0.4.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4fb3e0609ec12d2a77e882f07cced530b8262027f64b75d399f1504ffec0ba56"}, + {file = "safetensors-0.4.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0f1dd769f064adc33831f5e97ad07babbd728427f98e3e1db6902e369122737"}, + {file = "safetensors-0.4.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6d156bdb26732feada84f9388a9f135528c1ef5b05fae153da365ad4319c4c5"}, + {file = "safetensors-0.4.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e347d77e2c77eb7624400ccd09bed69d35c0332f417ce8c048d404a096c593b"}, + {file = "safetensors-0.4.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9f556eea3aec1d3d955403159fe2123ddd68e880f83954ee9b4a3f2e15e716b6"}, + {file = "safetensors-0.4.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9483f42be3b6bc8ff77dd67302de8ae411c4db39f7224dec66b0eb95822e4163"}, + {file = "safetensors-0.4.5-cp310-none-win32.whl", hash = "sha256:7389129c03fadd1ccc37fd1ebbc773f2b031483b04700923c3511d2a939252cc"}, + {file = "safetensors-0.4.5-cp310-none-win_amd64.whl", hash = "sha256:e98ef5524f8b6620c8cdef97220c0b6a5c1cef69852fcd2f174bb96c2bb316b1"}, + {file = "safetensors-0.4.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:21f848d7aebd5954f92538552d6d75f7c1b4500f51664078b5b49720d180e47c"}, + {file = "safetensors-0.4.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bb07000b19d41e35eecef9a454f31a8b4718a185293f0d0b1c4b61d6e4487971"}, + {file = "safetensors-0.4.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09dedf7c2fda934ee68143202acff6e9e8eb0ddeeb4cfc24182bef999efa9f42"}, + {file = "safetensors-0.4.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:59b77e4b7a708988d84f26de3ebead61ef1659c73dcbc9946c18f3b1786d2688"}, + {file = "safetensors-0.4.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d3bc83e14d67adc2e9387e511097f254bd1b43c3020440e708858c684cbac68"}, + {file = "safetensors-0.4.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39371fc551c1072976073ab258c3119395294cf49cdc1f8476794627de3130df"}, + {file = "safetensors-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6c19feda32b931cae0acd42748a670bdf56bee6476a046af20181ad3fee4090"}, + {file = "safetensors-0.4.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a659467495de201e2f282063808a41170448c78bada1e62707b07a27b05e6943"}, + {file = "safetensors-0.4.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bad5e4b2476949bcd638a89f71b6916fa9a5cae5c1ae7eede337aca2100435c0"}, + {file = "safetensors-0.4.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a3a315a6d0054bc6889a17f5668a73f94f7fe55121ff59e0a199e3519c08565f"}, + {file = "safetensors-0.4.5-cp311-none-win32.whl", hash = "sha256:a01e232e6d3d5cf8b1667bc3b657a77bdab73f0743c26c1d3c5dd7ce86bd3a92"}, + {file = "safetensors-0.4.5-cp311-none-win_amd64.whl", hash = "sha256:cbd39cae1ad3e3ef6f63a6f07296b080c951f24cec60188378e43d3713000c04"}, + {file = "safetensors-0.4.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:473300314e026bd1043cef391bb16a8689453363381561b8a3e443870937cc1e"}, + {file = "safetensors-0.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:801183a0f76dc647f51a2d9141ad341f9665602a7899a693207a82fb102cc53e"}, + {file = "safetensors-0.4.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1524b54246e422ad6fb6aea1ac71edeeb77666efa67230e1faf6999df9b2e27f"}, + {file = "safetensors-0.4.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b3139098e3e8b2ad7afbca96d30ad29157b50c90861084e69fcb80dec7430461"}, + {file = "safetensors-0.4.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65573dc35be9059770808e276b017256fa30058802c29e1038eb1c00028502ea"}, + {file = "safetensors-0.4.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd33da8e9407559f8779c82a0448e2133737f922d71f884da27184549416bfed"}, + {file = "safetensors-0.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3685ce7ed036f916316b567152482b7e959dc754fcc4a8342333d222e05f407c"}, + {file = "safetensors-0.4.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dde2bf390d25f67908278d6f5d59e46211ef98e44108727084d4637ee70ab4f1"}, + {file = "safetensors-0.4.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7469d70d3de970b1698d47c11ebbf296a308702cbaae7fcb993944751cf985f4"}, + {file = "safetensors-0.4.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3a6ba28118636a130ccbb968bc33d4684c48678695dba2590169d5ab03a45646"}, + {file = "safetensors-0.4.5-cp312-none-win32.whl", hash = "sha256:c859c7ed90b0047f58ee27751c8e56951452ed36a67afee1b0a87847d065eec6"}, + {file = "safetensors-0.4.5-cp312-none-win_amd64.whl", hash = "sha256:b5a8810ad6a6f933fff6c276eae92c1da217b39b4d8b1bc1c0b8af2d270dc532"}, + {file = "safetensors-0.4.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:25e5f8e2e92a74f05b4ca55686234c32aac19927903792b30ee6d7bd5653d54e"}, + {file = "safetensors-0.4.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:81efb124b58af39fcd684254c645e35692fea81c51627259cdf6d67ff4458916"}, + {file = "safetensors-0.4.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:585f1703a518b437f5103aa9cf70e9bd437cb78eea9c51024329e4fb8a3e3679"}, + {file = "safetensors-0.4.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4b99fbf72e3faf0b2f5f16e5e3458b93b7d0a83984fe8d5364c60aa169f2da89"}, + {file = "safetensors-0.4.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b17b299ca9966ca983ecda1c0791a3f07f9ca6ab5ded8ef3d283fff45f6bcd5f"}, + {file = "safetensors-0.4.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:76ded72f69209c9780fdb23ea89e56d35c54ae6abcdec67ccb22af8e696e449a"}, + {file = "safetensors-0.4.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2783956926303dcfeb1de91a4d1204cd4089ab441e622e7caee0642281109db3"}, + {file = "safetensors-0.4.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d94581aab8c6b204def4d7320f07534d6ee34cd4855688004a4354e63b639a35"}, + {file = "safetensors-0.4.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:67e1e7cb8678bb1b37ac48ec0df04faf689e2f4e9e81e566b5c63d9f23748523"}, + {file = "safetensors-0.4.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:dbd280b07e6054ea68b0cb4b16ad9703e7d63cd6890f577cb98acc5354780142"}, + {file = "safetensors-0.4.5-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:77d9b228da8374c7262046a36c1f656ba32a93df6cc51cd4453af932011e77f1"}, + {file = "safetensors-0.4.5-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:500cac01d50b301ab7bb192353317035011c5ceeef0fca652f9f43c000bb7f8d"}, + {file = "safetensors-0.4.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75331c0c746f03158ded32465b7d0b0e24c5a22121743662a2393439c43a45cf"}, + {file = "safetensors-0.4.5-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:670e95fe34e0d591d0529e5e59fd9d3d72bc77b1444fcaa14dccda4f36b5a38b"}, + {file = "safetensors-0.4.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:098923e2574ff237c517d6e840acada8e5b311cb1fa226019105ed82e9c3b62f"}, + {file = "safetensors-0.4.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13ca0902d2648775089fa6a0c8fc9e6390c5f8ee576517d33f9261656f851e3f"}, + {file = "safetensors-0.4.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f0032bedc869c56f8d26259fe39cd21c5199cd57f2228d817a0e23e8370af25"}, + {file = "safetensors-0.4.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f4b15f51b4f8f2a512341d9ce3475cacc19c5fdfc5db1f0e19449e75f95c7dc8"}, + {file = "safetensors-0.4.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f6594d130d0ad933d885c6a7b75c5183cb0e8450f799b80a39eae2b8508955eb"}, + {file = "safetensors-0.4.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:60c828a27e852ded2c85fc0f87bf1ec20e464c5cd4d56ff0e0711855cc2e17f8"}, + {file = "safetensors-0.4.5-cp37-none-win32.whl", hash = "sha256:6d3de65718b86c3eeaa8b73a9c3d123f9307a96bbd7be9698e21e76a56443af5"}, + {file = "safetensors-0.4.5-cp37-none-win_amd64.whl", hash = "sha256:5a2d68a523a4cefd791156a4174189a4114cf0bf9c50ceb89f261600f3b2b81a"}, + {file = "safetensors-0.4.5-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:e7a97058f96340850da0601a3309f3d29d6191b0702b2da201e54c6e3e44ccf0"}, + {file = "safetensors-0.4.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:63bfd425e25f5c733f572e2246e08a1c38bd6f2e027d3f7c87e2e43f228d1345"}, + {file = "safetensors-0.4.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3664ac565d0e809b0b929dae7ccd74e4d3273cd0c6d1220c6430035befb678e"}, + {file = "safetensors-0.4.5-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:313514b0b9b73ff4ddfb4edd71860696dbe3c1c9dc4d5cc13dbd74da283d2cbf"}, + {file = "safetensors-0.4.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31fa33ee326f750a2f2134a6174773c281d9a266ccd000bd4686d8021f1f3dac"}, + {file = "safetensors-0.4.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:09566792588d77b68abe53754c9f1308fadd35c9f87be939e22c623eaacbed6b"}, + {file = "safetensors-0.4.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:309aaec9b66cbf07ad3a2e5cb8a03205663324fea024ba391594423d0f00d9fe"}, + {file = "safetensors-0.4.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:53946c5813b8f9e26103c5efff4a931cc45d874f45229edd68557ffb35ffb9f8"}, + {file = "safetensors-0.4.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:868f9df9e99ad1e7f38c52194063a982bc88fedc7d05096f4f8160403aaf4bd6"}, + {file = "safetensors-0.4.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:9cc9449bd0b0bc538bd5e268221f0c5590bc5c14c1934a6ae359d44410dc68c4"}, + {file = "safetensors-0.4.5-cp38-none-win32.whl", hash = "sha256:83c4f13a9e687335c3928f615cd63a37e3f8ef072a3f2a0599fa09f863fb06a2"}, + {file = "safetensors-0.4.5-cp38-none-win_amd64.whl", hash = "sha256:b98d40a2ffa560653f6274e15b27b3544e8e3713a44627ce268f419f35c49478"}, + {file = "safetensors-0.4.5-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:cf727bb1281d66699bef5683b04d98c894a2803442c490a8d45cd365abfbdeb2"}, + {file = "safetensors-0.4.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:96f1d038c827cdc552d97e71f522e1049fef0542be575421f7684756a748e457"}, + {file = "safetensors-0.4.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:139fbee92570ecea774e6344fee908907db79646d00b12c535f66bc78bd5ea2c"}, + {file = "safetensors-0.4.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c36302c1c69eebb383775a89645a32b9d266878fab619819ce660309d6176c9b"}, + {file = "safetensors-0.4.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d641f5b8149ea98deb5ffcf604d764aad1de38a8285f86771ce1abf8e74c4891"}, + {file = "safetensors-0.4.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b4db6a61d968de73722b858038c616a1bebd4a86abe2688e46ca0cc2d17558f2"}, + {file = "safetensors-0.4.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b75a616e02f21b6f1d5785b20cecbab5e2bd3f6358a90e8925b813d557666ec1"}, + {file = "safetensors-0.4.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:788ee7d04cc0e0e7f944c52ff05f52a4415b312f5efd2ee66389fb7685ee030c"}, + {file = "safetensors-0.4.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:87bc42bd04fd9ca31396d3ca0433db0be1411b6b53ac5a32b7845a85d01ffc2e"}, + {file = "safetensors-0.4.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4037676c86365a721a8c9510323a51861d703b399b78a6b4486a54a65a975fca"}, + {file = "safetensors-0.4.5-cp39-none-win32.whl", hash = "sha256:1500418454529d0ed5c1564bda376c4ddff43f30fce9517d9bee7bcce5a8ef50"}, + {file = "safetensors-0.4.5-cp39-none-win_amd64.whl", hash = "sha256:9d1a94b9d793ed8fe35ab6d5cea28d540a46559bafc6aae98f30ee0867000cab"}, + {file = "safetensors-0.4.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fdadf66b5a22ceb645d5435a0be7a0292ce59648ca1d46b352f13cff3ea80410"}, + {file = "safetensors-0.4.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d42ffd4c2259f31832cb17ff866c111684c87bd930892a1ba53fed28370c918c"}, + {file = "safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd8a1f6d2063a92cd04145c7fd9e31a1c7d85fbec20113a14b487563fdbc0597"}, + {file = "safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:951d2fcf1817f4fb0ef0b48f6696688a4e852a95922a042b3f96aaa67eedc920"}, + {file = "safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ac85d9a8c1af0e3132371d9f2d134695a06a96993c2e2f0bbe25debb9e3f67a"}, + {file = "safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e3cec4a29eb7fe8da0b1c7988bc3828183080439dd559f720414450de076fcab"}, + {file = "safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:21742b391b859e67b26c0b2ac37f52c9c0944a879a25ad2f9f9f3cd61e7fda8f"}, + {file = "safetensors-0.4.5-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c7db3006a4915151ce1913652e907cdede299b974641a83fbc092102ac41b644"}, + {file = "safetensors-0.4.5-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f68bf99ea970960a237f416ea394e266e0361895753df06e3e06e6ea7907d98b"}, + {file = "safetensors-0.4.5-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8158938cf3324172df024da511839d373c40fbfaa83e9abf467174b2910d7b4c"}, + {file = "safetensors-0.4.5-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:540ce6c4bf6b58cb0fd93fa5f143bc0ee341c93bb4f9287ccd92cf898cc1b0dd"}, + {file = "safetensors-0.4.5-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:bfeaa1a699c6b9ed514bd15e6a91e74738b71125a9292159e3d6b7f0a53d2cde"}, + {file = "safetensors-0.4.5-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:01c8f00da537af711979e1b42a69a8ec9e1d7112f208e0e9b8a35d2c381085ef"}, + {file = "safetensors-0.4.5-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a0dd565f83b30f2ca79b5d35748d0d99dd4b3454f80e03dfb41f0038e3bdf180"}, + {file = "safetensors-0.4.5-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:023b6e5facda76989f4cba95a861b7e656b87e225f61811065d5c501f78cdb3f"}, + {file = "safetensors-0.4.5-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9633b663393d5796f0b60249549371e392b75a0b955c07e9c6f8708a87fc841f"}, + {file = "safetensors-0.4.5-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78dd8adfb48716233c45f676d6e48534d34b4bceb50162c13d1f0bdf6f78590a"}, + {file = "safetensors-0.4.5-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8e8deb16c4321d61ae72533b8451ec4a9af8656d1c61ff81aa49f966406e4b68"}, + {file = "safetensors-0.4.5-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:52452fa5999dc50c4decaf0c53aa28371f7f1e0fe5c2dd9129059fbe1e1599c7"}, + {file = "safetensors-0.4.5-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d5f23198821e227cfc52d50fa989813513db381255c6d100927b012f0cfec63d"}, + {file = "safetensors-0.4.5-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f4beb84b6073b1247a773141a6331117e35d07134b3bb0383003f39971d414bb"}, + {file = "safetensors-0.4.5-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:68814d599d25ed2fdd045ed54d370d1d03cf35e02dce56de44c651f828fb9b7b"}, + {file = "safetensors-0.4.5-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0b6453c54c57c1781292c46593f8a37254b8b99004c68d6c3ce229688931a22"}, + {file = "safetensors-0.4.5-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adaa9c6dead67e2dd90d634f89131e43162012479d86e25618e821a03d1eb1dc"}, + {file = "safetensors-0.4.5-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73e7d408e9012cd17511b382b43547850969c7979efc2bc353f317abaf23c84c"}, + {file = "safetensors-0.4.5-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:775409ce0fcc58b10773fdb4221ed1eb007de10fe7adbdf8f5e8a56096b6f0bc"}, + {file = "safetensors-0.4.5-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:834001bed193e4440c4a3950a31059523ee5090605c907c66808664c932b549c"}, + {file = "safetensors-0.4.5.tar.gz", hash = "sha256:d73de19682deabb02524b3d5d1f8b3aaba94c72f1bbfc7911b9b9d5d391c0310"}, +] + +[package.dependencies] +numpy = {version = ">=1.21.6", optional = true, markers = "extra == \"numpy\""} +torch = {version = ">=1.10", optional = true, markers = "extra == \"torch\""} + +[package.extras] +all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"] +dev = ["safetensors[all]"] +jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors[numpy]"] +mlx = ["mlx (>=0.0.9)"] +numpy = ["numpy (>=1.21.6)"] +paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"] +pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"] +quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"] +tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"] +testing = ["h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools-rust (>=1.5.2)"] +torch = ["safetensors[numpy]", "torch (>=1.10)"] + [[package]] name = "secretstorage" version = "3.3.3" @@ -2756,6 +2996,135 @@ files = [ {file = "tokenize_rt-6.1.0.tar.gz", hash = "sha256:e8ee836616c0877ab7c7b54776d2fefcc3bde714449a206762425ae114b53c86"}, ] +[[package]] +name = "tokenizers" +version = "0.20.3" +description = "" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tokenizers-0.20.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:31ccab28dbb1a9fe539787210b0026e22debeab1662970f61c2d921f7557f7e4"}, + {file = "tokenizers-0.20.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c6361191f762bda98c773da418cf511cbaa0cb8d0a1196f16f8c0119bde68ff8"}, + {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f128d5da1202b78fa0a10d8d938610472487da01b57098d48f7e944384362514"}, + {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:79c4121a2e9433ad7ef0769b9ca1f7dd7fa4c0cd501763d0a030afcbc6384481"}, + {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7850fde24197fe5cd6556e2fdba53a6d3bae67c531ea33a3d7c420b90904141"}, + {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b357970c095dc134978a68c67d845a1e3803ab7c4fbb39195bde914e7e13cf8b"}, + {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a333d878c4970b72d6c07848b90c05f6b045cf9273fc2bc04a27211721ad6118"}, + {file = "tokenizers-0.20.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1fd9fee817f655a8f50049f685e224828abfadd436b8ff67979fc1d054b435f1"}, + {file = "tokenizers-0.20.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9e7816808b402129393a435ea2a509679b41246175d6e5e9f25b8692bfaa272b"}, + {file = "tokenizers-0.20.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba96367db9d8a730d3a1d5996b4b7babb846c3994b8ef14008cd8660f55db59d"}, + {file = "tokenizers-0.20.3-cp310-none-win32.whl", hash = "sha256:ee31ba9d7df6a98619426283e80c6359f167e2e9882d9ce1b0254937dbd32f3f"}, + {file = "tokenizers-0.20.3-cp310-none-win_amd64.whl", hash = "sha256:a845c08fdad554fe0871d1255df85772f91236e5fd6b9287ef8b64f5807dbd0c"}, + {file = "tokenizers-0.20.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:585b51e06ca1f4839ce7759941e66766d7b060dccfdc57c4ca1e5b9a33013a90"}, + {file = "tokenizers-0.20.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:61cbf11954f3b481d08723ebd048ba4b11e582986f9be74d2c3bdd9293a4538d"}, + {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef820880d5e4e8484e2fa54ff8d297bb32519eaa7815694dc835ace9130a3eea"}, + {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:67ef4dcb8841a4988cd00dd288fb95dfc8e22ed021f01f37348fd51c2b055ba9"}, + {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff1ef8bd47a02b0dc191688ccb4da53600df5d4c9a05a4b68e1e3de4823e78eb"}, + {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:444d188186eab3148baf0615b522461b41b1f0cd58cd57b862ec94b6ac9780f1"}, + {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37c04c032c1442740b2c2d925f1857885c07619224a533123ac7ea71ca5713da"}, + {file = "tokenizers-0.20.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:453c7769d22231960ee0e883d1005c93c68015025a5e4ae56275406d94a3c907"}, + {file = "tokenizers-0.20.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4bb31f7b2847e439766aaa9cc7bccf7ac7088052deccdb2275c952d96f691c6a"}, + {file = "tokenizers-0.20.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:843729bf0f991b29655a069a2ff58a4c24375a553c70955e15e37a90dd4e045c"}, + {file = "tokenizers-0.20.3-cp311-none-win32.whl", hash = "sha256:efcce3a927b1e20ca694ba13f7a68c59b0bd859ef71e441db68ee42cf20c2442"}, + {file = "tokenizers-0.20.3-cp311-none-win_amd64.whl", hash = "sha256:88301aa0801f225725b6df5dea3d77c80365ff2362ca7e252583f2b4809c4cc0"}, + {file = "tokenizers-0.20.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:49d12a32e190fad0e79e5bdb788d05da2f20d8e006b13a70859ac47fecf6ab2f"}, + {file = "tokenizers-0.20.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:282848cacfb9c06d5e51489f38ec5aa0b3cd1e247a023061945f71f41d949d73"}, + {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abe4e08c7d0cd6154c795deb5bf81d2122f36daf075e0c12a8b050d824ef0a64"}, + {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca94fc1b73b3883c98f0c88c77700b13d55b49f1071dfd57df2b06f3ff7afd64"}, + {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef279c7e239f95c8bdd6ff319d9870f30f0d24915b04895f55b1adcf96d6c60d"}, + {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16384073973f6ccbde9852157a4fdfe632bb65208139c9d0c0bd0176a71fd67f"}, + {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:312d522caeb8a1a42ebdec87118d99b22667782b67898a76c963c058a7e41d4f"}, + {file = "tokenizers-0.20.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2b7cb962564785a83dafbba0144ecb7f579f1d57d8c406cdaa7f32fe32f18ad"}, + {file = "tokenizers-0.20.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:124c5882ebb88dadae1fc788a582299fcd3a8bd84fc3e260b9918cf28b8751f5"}, + {file = "tokenizers-0.20.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2b6e54e71f84c4202111a489879005cb14b92616a87417f6c102c833af961ea2"}, + {file = "tokenizers-0.20.3-cp312-none-win32.whl", hash = "sha256:83d9bfbe9af86f2d9df4833c22e94d94750f1d0cd9bfb22a7bb90a86f61cdb1c"}, + {file = "tokenizers-0.20.3-cp312-none-win_amd64.whl", hash = "sha256:44def74cee574d609a36e17c8914311d1b5dbcfe37c55fd29369d42591b91cf2"}, + {file = "tokenizers-0.20.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e0b630e0b536ef0e3c8b42c685c1bc93bd19e98c0f1543db52911f8ede42cf84"}, + {file = "tokenizers-0.20.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a02d160d2b19bcbfdf28bd9a4bf11be4cb97d0499c000d95d4c4b1a4312740b6"}, + {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e3d80d89b068bc30034034b5319218c7c0a91b00af19679833f55f3becb6945"}, + {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:174a54910bed1b089226512b4458ea60d6d6fd93060254734d3bc3540953c51c"}, + {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:098b8a632b8656aa5802c46689462c5c48f02510f24029d71c208ec2c822e771"}, + {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:78c8c143e3ae41e718588281eb3e212c2b31623c9d6d40410ec464d7d6221fb5"}, + {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b26b0aadb18cd8701077362ba359a06683662d5cafe3e8e8aba10eb05c037f1"}, + {file = "tokenizers-0.20.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07d7851a72717321022f3774e84aa9d595a041d643fafa2e87fbc9b18711dac0"}, + {file = "tokenizers-0.20.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:bd44e48a430ada902c6266a8245f5036c4fe744fcb51f699999fbe82aa438797"}, + {file = "tokenizers-0.20.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a4c186bb006ccbe1f5cc4e0380d1ce7806f5955c244074fd96abc55e27b77f01"}, + {file = "tokenizers-0.20.3-cp313-none-win32.whl", hash = "sha256:6e19e0f1d854d6ab7ea0c743d06e764d1d9a546932be0a67f33087645f00fe13"}, + {file = "tokenizers-0.20.3-cp313-none-win_amd64.whl", hash = "sha256:d50ede425c7e60966a9680d41b58b3a0950afa1bb570488e2972fa61662c4273"}, + {file = "tokenizers-0.20.3-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:9adda1ff5fb9dcdf899ceca672a4e2ce9e797adb512a6467305ca3d8bfcfbdd0"}, + {file = "tokenizers-0.20.3-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:6dde2cae6004ba7a3badff4a11911cae03ebf23e97eebfc0e71fef2530e5074f"}, + {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4a7fd678b35614fca708579eb95b7587a5e8a6d328171bd2488fd9f27d82be4"}, + {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1b80e3c7283a01a356bd2210f53d1a4a5d32b269c2024389ed0173137708d50e"}, + {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a8cc0e8176b762973758a77f0d9c4467d310e33165fb74173418ca3734944da4"}, + {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5634b2e2f5f3d2b4439d2d74066e22eb4b1f04f3fea05cb2a3c12d89b5a3bcd"}, + {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b4ba635165bc1ea46f2da8e5d80b5f70f6ec42161e38d96dbef33bb39df73964"}, + {file = "tokenizers-0.20.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18e4c7c64172e7789bd8b07aa3087ea87c4c4de7e90937a2aa036b5d92332536"}, + {file = "tokenizers-0.20.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1f74909ef7675c26d4095a817ec3393d67f3158ca4836c233212e5613ef640c4"}, + {file = "tokenizers-0.20.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0e9b81321a1e05b16487d312b4264984513f8b4a7556229cafac6e88c2036b09"}, + {file = "tokenizers-0.20.3-cp37-none-win32.whl", hash = "sha256:ab48184cd58b4a03022a2ec75b54c9f600ffea9a733612c02325ed636f353729"}, + {file = "tokenizers-0.20.3-cp37-none-win_amd64.whl", hash = "sha256:60ac483cebee1c12c71878523e768df02fa17e4c54412966cb3ac862c91b36c1"}, + {file = "tokenizers-0.20.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:3229ef103c89583d10b9378afa5d601b91e6337530a0988e17ca8d635329a996"}, + {file = "tokenizers-0.20.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6ac52cc24bad3de865c7e65b1c4e7b70d00938a8ae09a92a453b8f676e714ad5"}, + {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04627b7b502fa6a2a005e1bd446fa4247d89abcb1afaa1b81eb90e21aba9a60f"}, + {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c27ceb887f0e81a3c377eb4605dca7a95a81262761c0fba308d627b2abb98f2b"}, + {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65ab780194da4e1fcf5670523a2f377c4838ebf5249efe41fa1eddd2a84fb49d"}, + {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98d343134f47159e81f7f242264b0eb222e6b802f37173c8d7d7b64d5c9d1388"}, + {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2475bb004ab2009d29aff13b5047bfdb3d4b474f0aa9d4faa13a7f34dbbbb43"}, + {file = "tokenizers-0.20.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b6583a65c01db1197c1eb36857ceba8ec329d53afadd268b42a6b04f4965724"}, + {file = "tokenizers-0.20.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:62d00ba208358c037eeab7bfc00a905adc67b2d31b68ab40ed09d75881e114ea"}, + {file = "tokenizers-0.20.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0fc7a39e5bedc817bda395a798dfe2d9c5f7c71153c90d381b5135a0328d9520"}, + {file = "tokenizers-0.20.3-cp38-none-win32.whl", hash = "sha256:84d40ee0f8550d64d3ea92dd7d24a8557a9172165bdb986c9fb2503b4fe4e3b6"}, + {file = "tokenizers-0.20.3-cp38-none-win_amd64.whl", hash = "sha256:205a45246ed7f1718cf3785cff88450ba603352412aaf220ace026384aa3f1c0"}, + {file = "tokenizers-0.20.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:93e37f0269a11dc3b1a953f1fca9707f0929ebf8b4063c591c71a0664219988e"}, + {file = "tokenizers-0.20.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f4cb0c614b0135e781de96c2af87e73da0389ac1458e2a97562ed26e29490d8d"}, + {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7eb2fb1c432f5746b22f8a7f09fc18c4156cb0031c77f53cb19379d82d43297a"}, + {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfa8d029bb156181b006643309d6b673615a24e4ed24cf03aa191d599b996f51"}, + {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f90549622de3bf476ad9f1dd6f3f952ec3ed6ab8615ae88ef060d0c5bfad55d"}, + {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1d469c74eebf5c43fd61cd9b030e271d17198edd7bd45392e03a3c091d7d6d4"}, + {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bee8f53b2594749f4460d53253bae55d718f04e9b633efa0f5df8938bd98e4f0"}, + {file = "tokenizers-0.20.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:938441babf3e5720e4459e306ef2809fb267680df9d1ff2873458b22aef60248"}, + {file = "tokenizers-0.20.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7310ab23d7b0caebecc0e8be11a1146f320f5f07284000f6ea54793e83de1b75"}, + {file = "tokenizers-0.20.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:16121eb030a2b13094cfec936b0c12e8b4063c5f839591ea7d0212336d8f9921"}, + {file = "tokenizers-0.20.3-cp39-none-win32.whl", hash = "sha256:401cc21ef642ee235985d747f65e18f639464d377c70836c9003df208d582064"}, + {file = "tokenizers-0.20.3-cp39-none-win_amd64.whl", hash = "sha256:7498f3ea7746133335a6adb67a77cf77227a8b82c8483f644a2e5f86fea42b8d"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e919f2e3e68bb51dc31de4fcbbeff3bdf9c1cad489044c75e2b982a91059bd3c"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b8e9608f2773996cc272156e305bd79066163a66b0390fe21750aff62df1ac07"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39270a7050deaf50f7caff4c532c01b3c48f6608d42b3eacdebdc6795478c8df"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e005466632b1c5d2d2120f6de8aa768cc9d36cd1ab7d51d0c27a114c91a1e6ee"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a07962340b36189b6c8feda552ea1bfeee6cf067ff922a1d7760662c2ee229e5"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:55046ad3dd5f2b3c67501fcc8c9cbe3e901d8355f08a3b745e9b57894855f85b"}, + {file = "tokenizers-0.20.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:efcf0eb939988b627558aaf2b9dc3e56d759cad2e0cfa04fcab378e4b48fc4fd"}, + {file = "tokenizers-0.20.3-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f3558a7ae6a6d38a77dfce12172a1e2e1bf3e8871e744a1861cd7591ea9ebe24"}, + {file = "tokenizers-0.20.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d53029fe44bc70c3ff14ef512460a0cf583495a0f8e2f4b70e26eb9438e38a9"}, + {file = "tokenizers-0.20.3-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57a2a56397b2bec5a629b516b23f0f8a3e4f978c7488d4a299980f8375954b85"}, + {file = "tokenizers-0.20.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e5bfaae740ef9ece000f8a07e78ac0e2b085c5ce9648f8593ddf0243c9f76d"}, + {file = "tokenizers-0.20.3-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:fbaf3ea28fedfb2283da60e710aff25492e795a7397cad8a50f1e079b65a5a70"}, + {file = "tokenizers-0.20.3-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:c47c037116310dc976eb96b008e41b9cfaba002ed8005848d4d632ee0b7ba9ae"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c31751f0721f58f5e19bb27c1acc259aeff860d8629c4e1a900b26a1979ada8e"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:c697cbd3be7a79ea250ea5f380d6f12e534c543cfb137d5c734966b3ee4f34cc"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b48971b88ef9130bf35b41b35fd857c3c4dae4a9cd7990ebc7fc03e59cc92438"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4e615de179bbe060ab33773f0d98a8a8572b5883dd7dac66c1de8c056c7e748c"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da1ec842035ed9999c62e45fbe0ff14b7e8a7e02bb97688cc6313cf65e5cd755"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:6ee4954c1dd23aadc27958dad759006e71659d497dcb0ef0c7c87ea992c16ebd"}, + {file = "tokenizers-0.20.3-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:3eda46ca402751ec82553a321bf35a617b76bbed7586e768c02ccacbdda94d6d"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:de082392a85eb0055cc055c535bff2f0cc15d7a000bdc36fbf601a0f3cf8507a"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:c3db46cc0647bfd88263afdb739b92017a02a87ee30945cb3e86c7e25c7c9917"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a292392f24ab9abac5cfa8197e5a6208f2e43723420217e1ceba0b4ec77816ac"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8dcd91f4e60f62b20d83a87a84fe062035a1e3ff49a8c2bbdeb2d441c8e311f4"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:900991a2b8ee35961b1095db7e265342e0e42a84c1a594823d5ee9f8fb791958"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:5a8d8261ca2133d4f98aa9627c748189502b3787537ba3d7e2beb4f7cfc5d627"}, + {file = "tokenizers-0.20.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:c4fd4d71e6deb6ddf99d8d0eab87d1d16f635898906e631914a9bae8ae9f2cfb"}, + {file = "tokenizers-0.20.3.tar.gz", hash = "sha256:2278b34c5d0dd78e087e1ca7f9b1dcbf129d80211afa645f214bd6e051037539"}, +] + +[package.dependencies] +huggingface-hub = ">=0.16.4,<1.0" + +[package.extras] +dev = ["tokenizers[testing]"] +docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"] + [[package]] name = "tomli" version = "2.2.1" @@ -3050,6 +3419,75 @@ files = [ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"] +[[package]] +name = "transformers" +version = "4.46.3" +description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "transformers-4.46.3-py3-none-any.whl", hash = "sha256:a12ef6f52841fd190a3e5602145b542d03507222f2c64ebb7ee92e8788093aef"}, + {file = "transformers-4.46.3.tar.gz", hash = "sha256:8ee4b3ae943fe33e82afff8e837f4b052058b07ca9be3cb5b729ed31295f72cc"}, +] + +[package.dependencies] +filelock = "*" +huggingface-hub = ">=0.23.2,<1.0" +numpy = ">=1.17" +packaging = ">=20.0" +pyyaml = ">=5.1" +regex = "!=2019.12.17" +requests = "*" +safetensors = ">=0.4.1" +tokenizers = ">=0.20,<0.21" +tqdm = ">=4.27" + +[package.extras] +accelerate = ["accelerate (>=0.26.0)"] +agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch"] +all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision"] +audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +benchmark = ["optimum-benchmark (>=0.3.0)"] +codecarbon = ["codecarbon (==1.2.0)"] +deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"] +deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] +dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.20,<0.21)", "urllib3 (<2.0.0)"] +dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "libcst", "librosa", "nltk (<=3.8.1)", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"] +flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +ftfy = ["ftfy"] +integrations = ["optuna", "ray[tune] (>=2.7.0)", "sigopt"] +ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] +modelcreation = ["cookiecutter (==1.7.3)"] +natten = ["natten (>=0.14.6,<0.15.0)"] +onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"] +onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] +optuna = ["optuna"] +quality = ["GitPython (<3.1.19)", "datasets (!=2.5.0)", "isort (>=5.5.4)", "libcst", "rich", "ruff (==0.5.1)", "urllib3 (<2.0.0)"] +ray = ["ray[tune] (>=2.7.0)"] +retrieval = ["datasets (!=2.5.0)", "faiss-cpu"] +ruff = ["ruff (==0.5.1)"] +sagemaker = ["sagemaker (>=2.31.0)"] +sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"] +serving = ["fastapi", "pydantic", "starlette", "uvicorn"] +sigopt = ["sigopt"] +sklearn = ["scikit-learn"] +speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] +tf = ["keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"] +tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<0.24)", "tensorflow-text (<2.16)", "tf2onnx"] +tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +tiktoken = ["blobfile", "tiktoken"] +timm = ["timm (<=0.9.16)"] +tokenizers = ["tokenizers (>=0.20,<0.21)"] +torch = ["accelerate (>=0.26.0)", "torch"] +torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"] +torchhub = ["filelock", "huggingface-hub (>=0.23.2,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.20,<0.21)", "torch", "tqdm (>=4.27)"] +video = ["av (==9.2.0)"] +vision = ["Pillow (>=10.0.1,<=15.0)"] + [[package]] name = "triton" version = "2.2.0" @@ -3301,4 +3739,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "b09e65b2a431ab1b0c5c29b9f1ad49a2e9276f7396fd64bae91bdc2253669c2f" +content-hash = "b708b636d08f016dcec02f7b8fba9fc3e6d2b2ee70ed14b72e77db7b585f607c" diff --git a/pyproject.toml b/pyproject.toml index 96a75e3..45f7da4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,12 +24,14 @@ packages = [ python = "^3.9" torch = "^2.2.2" torchvision = "^0" +transformers = "^4.42.0" numpy = ">=1.24.4,<3.0.0" jsonlines = "^3.1.0" Pillow = "^10.0.0" tqdm = "^4.64.0" opencv-python-headless = "^4.6.0.66" huggingface_hub = ">=0.23,<1" +safetensors = {version=">=0.4.3,<1", extras=["torch"]} [tool.poetry.group.dev.dependencies] black = {extras = ["jupyter"], version = "^24.4.2"} @@ -96,3 +98,16 @@ branch = "main" parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test" parser_angular_minor_types = "feat" parser_angular_patch_types = "fix,perf" + + +# [tool.mypy] +# pretty = true +# no_implicit_optional = true +# python_version = "3.10" +# +# [[tool.mypy.overrides]] +# module = [ +# "torchvision.*", +# "transformers.*" +# ] +# ignore_missing_imports = true \ No newline at end of file diff --git a/run_tf.py b/run_tf.py new file mode 100644 index 0000000..a3572b0 --- /dev/null +++ b/run_tf.py @@ -0,0 +1,763 @@ +import argparse +import glob +import json +import os +from pathlib import Path + +from huggingface_hub import snapshot_download +import numpy as np +import cv2 +from PIL import Image, ImageDraw + +import docling_ibm_models.tableformer.data_management.tf_predictor as tf_predictor +from docling_ibm_models.tableformer.data_management.tf_predictor import \ + TFPredictor + +########################################################################################### +# Debug +import sys +import warnings +import traceback + +def warn_with_traceback(message, category, filename, lineno, file=None, line=None): + log = sys.stderr + traceback.print_stack(file=log) + log.write(warnings.formatwarning(message, category, filename, lineno, line)) + +warnings.showwarning = warn_with_traceback +########################################################################################### + + +""" +- Implements TF predictor to accept the input format from IOCR, e.g. + "./tests/test_data/samples/tf_table_example_0.json" (trivial table crop) +- Shape the output format like GTE would: e.g. + "tests/test_data/samples/tf_gte_output_2.json" (Note: full form image) +""" + +docling_api_data = { + "table_jsons": [ + "./tests/test_data/samples/ADS.2007.page_123.png_iocr.parse_format.json", + "./tests/test_data/samples/PHM.2013.page_30.png_iocr.parse_format.json", + "./tests/test_data/samples/empty_iocr.png.json" + ], + "png_images": [ + "./tests/test_data/samples/ADS.2007.page_123.png", + "./tests/test_data/samples/PHM.2013.page_30.png", + "./tests/test_data/samples/empty_iocr.png" + ], + "table_bboxes": [ + [[178, 748, 1061, 976], [177, 1163, 1062, 1329]], + [[100, 186, 1135, 525]], + [[178, 748, 1061, 976], [177, 1163, 1062, 1329]] + ], +} + +test_config = { + "dataset": { + "type": "TF_prepared", + "name": "TF", + "raw_data_dir": "./tests/test_data/model_artifacts/", + "load_cells": True, + "bbox_format": "5plet", + "resized_image": 448, + "keep_AR": False, + "up_scaling_enabled": True, + "down_scaling_enabled": True, + "padding_mode": "null", + "padding_color": [0, 0, 0], + "image_normalization": { + "state": True, + "mean": [0.94247851, 0.94254675, 0.94292611], + "std": [0.17910956, 0.17940403, 0.17931663], + }, + "color_jitter": True, + "rand_crop": True, + "rand_pad": True, + "image_grayscale": False, + }, + "model": { + "type": "TableModel04_rs", + "name": "14_128_256_4_true", + "save_dir": "./tests/test_data/model_artifacts/", + "backbone": "resnet18", + "enc_image_size": 28, + "tag_embed_dim": 16, + "hidden_dim": 512, + "tag_decoder_dim": 512, + "bbox_embed_dim": 256, + "tag_attention_dim": 256, + "bbox_attention_dim": 512, + "enc_layers": 4, # 6 + "dec_layers": 2, # 6 + "nheads": 8, + "dropout": 0.1, + "bbox_classes": 2, + }, + "train": { + "save_periodicity": 1, + "disable_cuda": False, + "epochs": 23, + "batch_size": 50, + "clip_gradient": 0.1, + "clip_max_norm": 0.1, + "bbox": True, + "validation": False, + }, + "predict": { + "max_steps": 1024, + "beam_size": 5, + "bbox": True, + "predict_dir": "./tests/test_data/samples", + "pdf_cell_iou_thres": 0.05, + "padding": False, + "padding_size": 50, + "disable_post_process": False, + "profiling": False, + "device_mode": "auto", + }, + "dataset_wordmap": { + "word_map_tag": { + "": 0, + "": 1, + "": 2, + "": 3, + "ecel": 4, + "fcel": 5, + "lcel": 6, + "ucel": 7, + "xcel": 8, + "nl": 9, + "ched": 10, + "rhed": 11, + "srow": 12, + }, + "word_map_cell": { + " ": 13, + "!": 179, + '"': 126, + "#": 101, + "$": 119, + "%": 18, + "&": 114, + "'": 108, + "(": 29, + ")": 32, + "*": 26, + "+": 97, + ",": 71, + "-": 63, + ".": 34, + "/": 66, + "0": 33, + "1": 36, + "2": 43, + "3": 41, + "4": 45, + "5": 17, + "6": 37, + "7": 35, + "8": 40, + "9": 16, + ":": 88, + ";": 92, + "<": 73, + "": 9, + "": 23, + "": 219, + "": 233, + "": 94, + "": 77, + "": 151, + "": 1, + "": 280, + "": 21, + "": 218, + "": 0, + "": 279, + "": 232, + "": 93, + "": 75, + "": 150, + "": 278, + "=": 99, + ">": 39, + "?": 96, + "@": 125, + "A": 27, + "B": 86, + "C": 19, + "D": 57, + "E": 64, + "F": 47, + "G": 44, + "H": 10, + "I": 20, + "J": 80, + "K": 81, + "L": 52, + "M": 46, + "N": 69, + "O": 65, + "P": 62, + "Q": 59, + "R": 60, + "S": 58, + "T": 48, + "U": 55, + "V": 2, + "W": 83, + "X": 104, + "Y": 89, + "Z": 113, + "[": 70, + "\\": 165, + "]": 72, + "^": 132, + "_": 84, + "`": 196, + "a": 3, + "b": 6, + "c": 54, + "d": 12, + "e": 8, + "f": 50, + "g": 28, + "h": 56, + "i": 5, + "j": 82, + "k": 95, + "l": 7, + "m": 30, + "n": 31, + "o": 15, + "p": 22, + "q": 67, + "r": 4, + "s": 51, + "t": 14, + "u": 25, + "v": 24, + "w": 53, + "x": 61, + "y": 49, + "z": 11, + "{": 158, + "|": 139, + "}": 159, + "~": 147, + "\u00a2": 203, + "\u00a3": 162, + "\u00a4": 220, + "\u00a5": 176, + "\u00a7": 142, + "\u00a9": 268, + "\u00ab": 239, + "\u00ad": 275, + "\u00ae": 130, + "\u00b0": 100, + "\u00b1": 79, + "\u00b6": 171, + "\u00b7": 137, + "\u00bb": 240, + "\u00d7": 118, + "\u00d8": 192, + "\u00df": 197, + "\u00e6": 261, + "\u00f7": 225, + "\u00f8": 163, + "\u0131": 242, + "\u0142": 267, + "\u01c2": 211, + "\u025b": 223, + "\u02b9": 248, + "\u02c2": 195, + "\u02c3": 208, + "\u02c6": 253, + "\u0300": 209, + "\u0301": 131, + "\u0302": 138, + "\u0303": 156, + "\u0304": 152, + "\u0306": 222, + "\u0307": 247, + "\u0308": 103, + "\u030a": 102, + "\u030c": 254, + "\u0327": 155, + "\u0328": 269, + "\u0338": 170, + "\u0391": 173, + "\u0392": 169, + "\u0393": 180, + "\u0394": 85, + "\u0398": 243, + "\u0399": 271, + "\u039b": 272, + "\u03a0": 213, + "\u03a3": 185, + "\u03a6": 148, + "\u03a7": 212, + "\u03a8": 141, + "\u03a9": 161, + "\u03b1": 90, + "\u03b2": 107, + "\u03b3": 110, + "\u03b4": 153, + "\u03b5": 166, + "\u03b6": 178, + "\u03b7": 146, + "\u03b8": 186, + "\u03b9": 229, + "\u03ba": 164, + "\u03bb": 91, + "\u03bc": 78, + "\u03bd": 230, + "\u03be": 244, + "\u03c0": 127, + "\u03c1": 149, + "\u03c3": 116, + "\u03c4": 198, + "\u03c5": 189, + "\u03c6": 140, + "\u03c7": 124, + "\u03c8": 216, + "\u03c9": 167, + "\u0410": 273, + "\u0421": 194, + "\u115f": 217, + "\u200b": 265, + "\u2010": 117, + "\u2012": 135, + "\u2013": 42, + "\u2014": 106, + "\u2015": 228, + "\u2016": 259, + "\u2018": 123, + "\u2019": 121, + "\u201c": 87, + "\u201d": 115, + "\u201e": 245, + "\u2020": 109, + "\u2021": 129, + "\u2022": 128, + "\u2028": 190, + "\u2030": 154, + "\u2032": 68, + "\u203b": 224, + "\u2044": 188, + "\u204e": 199, + "\u2061": 200, + "\u20ac": 184, + "\u2190": 202, + "\u2191": 112, + "\u2192": 120, + "\u2193": 111, + "\u2194": 183, + "\u21d1": 266, + "\u21d2": 264, + "\u21d3": 255, + "\u2205": 215, + "\u2206": 175, + "\u2208": 262, + "\u2211": 160, + "\u2212": 76, + "\u2216": 206, + "\u2217": 105, + "\u2218": 246, + "\u2219": 236, + "\u221a": 187, + "\u221e": 207, + "\u2223": 260, + "\u2225": 193, + "\u2227": 182, + "\u2229": 256, + "\u222b": 258, + "\u223c": 98, + "\u2248": 210, + "\u2264": 38, + "\u2265": 74, + "\u2266": 214, + "\u2267": 181, + "\u2295": 263, + "\u22c5": 174, + "\u22c6": 191, + "\u22ee": 277, + "\u22ef": 270, + "\u2500": 205, + "\u2551": 231, + "\u25a0": 250, + "\u25a1": 177, + "\u25aa": 145, + "\u25b2": 136, + "\u25b3": 143, + "\u25bc": 251, + "\u25c6": 226, + "\u25ca": 235, + "\u25cb": 227, + "\u25cf": 172, + "\u25e6": 274, + "\u2605": 204, + "\u2606": 144, + "\u2640": 133, + "\u2642": 134, + "\u2663": 252, + "\u2666": 157, + "\u266f": 221, + "\u2713": 122, + "\u2714": 249, + "\u2717": 201, + "\u2794": 168, + "\u27a2": 276, + "\u2a7d": 234, + "\u2a7e": 241, + "\u3008": 237, + "\u3009": 238, + "\ufeff": 257, + }, + }, +} + +# ================================================================================================== + +configs = [test_config] + + +def init() -> list[dict]: + r""" + Initialize the testing environment + """ + # Download models from HF + download_path = snapshot_download(repo_id="ds4sd/docling-models") + save_dir = os.path.join(download_path, "model_artifacts/tableformer") + + # Add the missing config keys + for config in configs: + config["model"]["save_dir"] = save_dir + return configs + +def combine_checkpoint(save_dir): + r""" + Check if the checkpoint file is present as one part or 2 splits. + Combine parts into one file if needed + + Parameters + ---------- + save_dir : string + The directory to check for checkpoint files or splits of it + + Returns + ------- + int + 0: The full checkpoint file already exists, no combine was needed + 1: The splits were found, a combine has been done + -1: No full checkpoint and no splits exist. Error + """ + # Check if the full file already exists + full_file_pattern = os.path.join(save_dir, "*.check") + candidate = glob.glob(full_file_pattern) + if len(candidate) == 1: + print( + "combine_checkpoint: The whole checkpoint file was found: {}".format( + candidate[0] + ) + ) + return 0 + + # Check for splits + splits_pattern = os.path.join(save_dir, "*.check.a[a-z]") + splits = glob.glob(splits_pattern) + splits.sort() + if splits is None or len(splits) == 0: + print( + "combine_checkpoint: Both the full checkpoint and the splits are missing. Error" + ) + return -1 + + # Combine splits + full_fn = splits[0].rpartition(".check")[0] + ".check" + with open(full_fn, "wb") as f_out: + for split_fn in splits: + with open(split_fn, "rb") as f_split: + print("combine_checkpoint: read split: {}".format(split_fn)) + f_out.write(f_split.read()) + + print("combine_checkpoint: combine splits as: {}".format(full_fn)) + return 1 + + +def test_tf_predictor(): + r""" + Test the TFPredictor + """ + viz = True + configs = init() + + # Load the docling_api_data + iocr_pages = [] + for table_json_fn, png_image_fn, table_bboxes_b in zip( + docling_api_data["table_jsons"], + docling_api_data["png_images"], + docling_api_data["table_bboxes"], + ): + with open(table_json_fn, "r") as fp: + iocr_page_raw = json.load(fp) + iocr_page = iocr_page_raw["pages"][0] + iocr_page["image"] = cv2.imread(png_image_fn) + # page_image = cv2.imread(png_image_fn) + iocr_page["png_image_fn"] = png_image_fn + iocr_page["table_bboxes"] = table_bboxes_b + iocr_pages.append(iocr_page) + + # Loop over the test configs + for test_config in configs: + # Check if the checkpoint file should be combined + assert ( + combine_checkpoint(test_config["model"]["save_dir"]) >= 0 + ), "Model checkpoint is missing" + + # Loop over the iocr_pages + predictor = TFPredictor(test_config) + + ########################################################################################### + # Debug: Measure TF parameters + tf_params_count = 0 + for tf_module_params in predictor._model.parameters(): + module_params = 1 + for dim in tf_module_params.size(): + module_params *= dim + tf_params_count += module_params + print("TF params: {}".format(tf_params_count)) + ########################################################################################### + + for iocr_page in iocr_pages: + # Prepare "Predict" parameters + # iw = iocr_page["width"] + # ih = iocr_page["height"] + # table_bboxes = [[0, 0, iw, ih]] # just one table per page in our examples + table_bboxes = iocr_page["table_bboxes"] + + # for t, table_bbox in enumerate(table_bboxes): + print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") + png_img_bfn = os.path.basename(iocr_page["png_image_fn"]) + print("Predicting image: {}".format(png_img_bfn)) + + # Run prediction, post-processing, and cell matching + # PARAMETERS: + # iocr_page - json received from iocr, augmented with iocr_page["image"] + # table_bboxes - list of detected bboxes on page: [[x1, y1, x2, y2], [...]...] + # do_matching - Boolean, when True - will match with text cells provided, + # when False - returns original cell prediction BBOXes in the same format + # OUTPUT: + # List of dicts per table: [{"tf_responses":[...], "predict_details": {}}] + + multi_tf_output = predictor.multi_table_predict( + iocr_page, table_bboxes, True + ) + + # Test output for validity, create visualizations... + for t, tf_output in enumerate(multi_tf_output): + tf_responses = tf_output["tf_responses"] + predict_details = tf_output["predict_details"] + assert tf_responses is not None, "Empty prediction response" + assert isinstance( + tf_responses, list + ), " Wrong response type. It should be a list" + + img = Image.open(iocr_page["png_image_fn"]) + img1 = ImageDraw.Draw(img) + + xt0 = table_bboxes[t][0] + yt0 = table_bboxes[t][1] + xt1 = max(xt0, table_bboxes[t][2]) + yt1 = max(yt0, table_bboxes[t][3]) + img1.rectangle(((xt0, yt0), (xt1, yt1)), outline="pink", width=5) + + if viz: + # Visualize original OCR words: + for iocr_word in iocr_page["tokens"]: + xi0 = iocr_word["bbox"]["l"] + yi0 = iocr_word["bbox"]["t"] + xi1 = max(xi0, iocr_word["bbox"]["r"]) + yi1 = max(yi0, iocr_word["bbox"]["b"]) + img1.rectangle(((xi0, yi0), (xi1, yi1)), outline="gray") + # Visualize original docling_ibm_models.tableformer predictions: + for predicted_bbox in predict_details["prediction_bboxes_page"]: + xp0 = predicted_bbox[0] - 1 + yp0 = predicted_bbox[1] - 1 + xp1 = max(xp0, predicted_bbox[2] + 1) + yp1 = max(yp0, predicted_bbox[3] + 1) + img1.rectangle(((xp0, yp0), (xp1, yp1)), outline="green") + + # Check the structure of the list items + for i, response in enumerate(tf_responses): + assert ( + "bbox" in response + ), "bbox field is missing from response: " + str(i) + assert ( + "text_cell_bboxes" in response + ), "text_cell_bboxes is missing: " + str(i) + assert ( + "row_span" in response + ), "row_span is missing from resp: " + str(i) + assert ( + "col_span" in response + ), "col_span is missing from response: " + str(i) + # print("*********** column_header: {}".format(response["column_header"])) + if viz: + # Visualization: + for text_cell in response["text_cell_bboxes"]: + xc0 = text_cell["l"] + yc0 = text_cell["t"] + xc1 = max(xc0, text_cell["r"]) + yc1 = max(yc0, text_cell["b"]) + img1.rectangle(((xc0, yc0), (xc1, yc1)), outline="red") + + x0 = response["bbox"]["l"] - 2 + y0 = response["bbox"]["t"] - 2 + x1 = max(x0, response["bbox"]["r"] + 2) + y1 = max(y0, response["bbox"]["b"] + 2) + + if response["column_header"]: + img1.rectangle( + ((x0, y0), (x1, y1)), outline="blue", width=2 + ) + elif response["row_header"]: + img1.rectangle( + ((x0, y0), (x1, y1)), outline="magenta", width=2 + ) + elif response["row_section"]: + img1.rectangle( + ((x0, y0), (x1, y1)), outline="brown", width=2 + ) + else: + img1.rectangle( + ((x0, y0), (x1, y1)), outline="black", width=1 + ) + if viz: + viz_root = "./tests/test_data/viz/" + Path(viz_root).mkdir(parents=True, exist_ok=True) + png_img_bfn1 = png_img_bfn.replace(".png", "." + str(t) + ".png") + viz_fn = os.path.join(viz_root, png_img_bfn1) + img.save(viz_fn) + + +def run_tf_predictor(page_input_fn: str, table_bboxes_fn: str): + r""" + Run the TFPredictor with external files to provide ethe page_input and table_bboxes + """ + viz = True + + configs = init() + config = configs[0] + predictor = TFPredictor(config) + + assert ( + combine_checkpoint(config["model"]["save_dir"]) >= 0 + ), "Model checkpoint is missing" + + # Load the page_input and table_bboxes + with open(page_input_fn, "r") as fd: + page_input = json.load(fd) + page_input["image"] = np.array(page_input["image"], dtype=np.float32) + + with open(table_bboxes_fn, "r") as fd: + table_bboxes = json.load(fd) + + multi_tf_output = predictor.multi_table_predict(page_input, table_bboxes, True) + + # Test output for validity, create visualizations... + for t, tf_output in enumerate(multi_tf_output): + tf_responses = tf_output["tf_responses"] + predict_details = tf_output["predict_details"] + assert tf_responses is not None, "Empty prediction response" + assert isinstance( + tf_responses, list + ), " Wrong response type. It should be a list" + + img = page_input["image"] + img1 = ImageDraw.Draw(img) + + xt0 = table_bboxes[t][0] + yt0 = table_bboxes[t][1] + xt1 = max(xt0, table_bboxes[t][2]) + yt1 = max(yt0, table_bboxes[t][3]) + img1.rectangle(((xt0, yt0), (xt1, yt1)), outline="pink", width=5) + + if viz: + # # Visualize original OCR words: + # for iocr_word in iocr_page["tokens"]: + # xi0 = iocr_word["bbox"]["l"] + # yi0 = iocr_word["bbox"]["t"] + # xi1 = max(xi0, iocr_word["bbox"]["r"]) + # yi1 = max(yi0, iocr_word["bbox"]["b"]) + # img1.rectangle(((xi0, yi0), (xi1, yi1)), outline="gray") + # Visualize original docling_ibm_models.tableformer predictions: + for predicted_bbox in predict_details["prediction_bboxes_page"]: + xp0 = predicted_bbox[0] - 1 + yp0 = predicted_bbox[1] - 1 + xp1 = max(xp0, predicted_bbox[2] + 1) + yp1 = max(yp0, predicted_bbox[3] + 1) + img1.rectangle(((xp0, yp0), (xp1, yp1)), outline="green") + + # Check the structure of the list items + for i, response in enumerate(tf_responses): + assert ( + "bbox" in response + ), "bbox field is missing from response: " + str(i) + assert ( + "text_cell_bboxes" in response + ), "text_cell_bboxes is missing: " + str(i) + assert ( + "row_span" in response + ), "row_span is missing from resp: " + str(i) + assert ( + "col_span" in response + ), "col_span is missing from response: " + str(i) + # print("*********** column_header: {}".format(response["column_header"])) + if viz: + # Visualization: + for text_cell in response["text_cell_bboxes"]: + xc0 = text_cell["l"] + yc0 = text_cell["t"] + xc1 = max(xc0, text_cell["r"]) + yc1 = max(yc0, text_cell["b"]) + img1.rectangle(((xc0, yc0), (xc1, yc1)), outline="red") + + x0 = response["bbox"]["l"] - 2 + y0 = response["bbox"]["t"] - 2 + x1 = max(x0, response["bbox"]["r"] + 2) + y1 = max(y0, response["bbox"]["b"] + 2) + + if response["column_header"]: + img1.rectangle( + ((x0, y0), (x1, y1)), outline="blue", width=2 + ) + elif response["row_header"]: + img1.rectangle( + ((x0, y0), (x1, y1)), outline="magenta", width=2 + ) + elif response["row_section"]: + img1.rectangle( + ((x0, y0), (x1, y1)), outline="brown", width=2 + ) + else: + img1.rectangle( + ((x0, y0), (x1, y1)), outline="black", width=1 + ) + if viz: + viz_root = "./tests/test_data/viz/" + Path(viz_root).mkdir(parents=True, exist_ok=True) + png_img_bfn1 = "run_test.png" + viz_fn = os.path.join(viz_root, png_img_bfn1) + img.save(viz_fn) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("page_input_fn", help="File path of the page input data") + parser.add_argument("table_bboxes_fn", help="File path of the table bboxes data") + args = parser.parse_args() + + run_tf_predictor(args.page_input_fn, args.table_bboxes_fn) + # test_tf_predictor() diff --git a/tests/test_layout_predictor.py b/tests/test_layout_predictor.py index 5045ae6..eeddf60 100644 --- a/tests/test_layout_predictor.py +++ b/tests/test_layout_predictor.py @@ -3,14 +3,14 @@ # SPDX-License-Identifier: MIT # import os +from pathlib import Path +import torch import numpy as np import pytest -from PIL import Image - from huggingface_hub import snapshot_download +from PIL import Image, ImageDraw, ImageFont -import docling_ibm_models.layoutmodel.layout_predictor as lp from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor @@ -26,12 +26,7 @@ def init() -> dict: "tests/test_data/samples/ADS.2007.page_123.png", ], "info1": { - "use_cpu_only": True, - "image_size": 640, - "threshold": 0.6, - }, - "info2": { - "use_cpu_only": True, + "device": "cpu", "image_size": 640, "threshold": 0.6, }, @@ -39,13 +34,11 @@ def init() -> dict: } # Download models from HF - download_path = snapshot_download(repo_id="ds4sd/docling-models", revision="v2.0.1") - artifact_path = os.path.join(download_path, "model_artifacts/layout/beehive_v0.0.5_pt") + download_path = snapshot_download(repo_id="ds4sd/docling-models", revision="v2.1.0") + artifact_path = os.path.join(download_path, "model_artifacts/layout") # Add the missing config keys init["artifact_path"] = artifact_path - init["info1"]["torch_file"] = os.path.join(artifact_path, lp.MODEL_CHECKPOINT_FN) - init["info2"]["torch_file"] = os.path.join(artifact_path, lp.MODEL_CHECKPOINT_FN) return init @@ -54,17 +47,18 @@ def test_layoutpredictor(init: dict): r""" Unit test for the LayoutPredictor """ - # Initialize LayoutPredictor with envvars - os.environ["USE_CPU_ONLY"] = "" - os.environ["OMP_NUM_THREADS"] = "2" - lpredictor = LayoutPredictor(init["artifact_path"]) - assert init["info1"] == lpredictor.info() + device = "cpu" + num_threads = 2 - # Initialize LayoutPredictor with optional parameters + # Initialize LayoutPredictor lpredictor = LayoutPredictor( - init["artifact_path"], use_cpu_only=True + init["artifact_path"], device=device, num_threads=num_threads ) - assert init["info2"] == lpredictor.info() + + # Check info + info = lpredictor.info() + assert info["device"] == device, "Wronly set device" + assert info["num_threads"] == num_threads, "Wronly set number of threads" # Unsupported input image is_exception = False @@ -79,6 +73,7 @@ def test_layoutpredictor(init: dict): for img_fn in init["test_imgs"]: with Image.open(img_fn) as img: w, h = img.size + # Load images as PIL objects for i, pred in enumerate(lpredictor.predict(img)): print("PIL pred: {}".format(pred)) @@ -86,7 +81,6 @@ def test_layoutpredictor(init: dict): assert pred["t"] >= 0 and pred["t"] <= h assert pred["r"] >= 0 and pred["r"] <= w assert pred["b"] >= 0 and pred["b"] <= h - assert i + 1 == init["pred_bboxes"] # Load images as numpy arrays diff --git a/tests/test_tf_predictor.py b/tests/test_tf_predictor.py index daee028..ba5077d 100644 --- a/tests/test_tf_predictor.py +++ b/tests/test_tf_predictor.py @@ -7,6 +7,7 @@ import os from pathlib import Path +import torch import pytest import cv2 from PIL import Image, ImageDraw @@ -103,8 +104,7 @@ "padding": False, "padding_size": 50, "disable_post_process": False, - "profiling": True, - "device_mode": "auto", + "profiling": True }, "dataset_wordmap": { "word_map_tag": { @@ -412,64 +412,14 @@ configs = [test_config] -def combine_checkpoint(save_dir): - r""" - Check if the checkpoint file is present as one part or 2 splits. - Combine parts into one file if needed - - Parameters - ---------- - save_dir : string - The directory to check for checkpoint files or splits of it - - Returns - ------- - int - 0: The full checkpoint file already exists, no combine was needed - 1: The splits were found, a combine has been done - -1: No full checkpoint and no splits exist. Error - """ - # Check if the full file already exists - full_file_pattern = os.path.join(save_dir, "*.check") - candidate = glob.glob(full_file_pattern) - if len(candidate) == 1: - print( - "combine_checkpoint: The whole checkpoint file was found: {}".format( - candidate[0] - ) - ) - return 0 - - # Check for splits - splits_pattern = os.path.join(save_dir, "*.check.a[a-z]") - splits = glob.glob(splits_pattern) - splits.sort() - if splits is None or len(splits) == 0: - print( - "combine_checkpoint: Both the full checkpoint and the splits are missing. Error" - ) - return -1 - - # Combine splits - full_fn = splits[0].rpartition(".check")[0] + ".check" - with open(full_fn, "wb") as f_out: - for split_fn in splits: - with open(split_fn, "rb") as f_split: - print("combine_checkpoint: read split: {}".format(split_fn)) - f_out.write(f_split.read()) - - print("combine_checkpoint: combine splits as: {}".format(full_fn)) - return 1 - - @pytest.fixture(scope="module") def init() -> list[dict]: r""" Initialize the testing environment """ # Download models from HF - download_path = snapshot_download(repo_id="ds4sd/docling-models", revision="v2.0.1") - save_dir = os.path.join(download_path, "model_artifacts/tableformer") + download_path = snapshot_download(repo_id="ds4sd/docling-models", revision="v2.1.0") + save_dir = os.path.join(download_path, "model_artifacts/tableformer/fast") # Add the missing config keys for config in configs: @@ -481,6 +431,8 @@ def test_tf_predictor(init): Test the TFPredictor """ viz = True + device = "cpu" + num_threads = 2 # Load the docling_api_data iocr_pages = [] @@ -501,12 +453,12 @@ def test_tf_predictor(init): # Loop over the test configs for test_config in init: # Check if the checkpoint file should be combined - assert ( - combine_checkpoint(test_config["model"]["save_dir"]) >= 0 - ), "Model checkpoint is missing" + # assert ( + # combine_checkpoint(test_config["model"]["save_dir"]) >= 0 + # ), "Model checkpoint is missing" # Loop over the iocr_pages - predictor = TFPredictor(test_config) + predictor = TFPredictor(test_config, device=device, num_threads=num_threads) for iocr_page in iocr_pages: # Prepare "Predict" parameters # iw = iocr_page["width"] @@ -626,33 +578,3 @@ def test_tf_predictor(init): profiling_data = AggProfiler().get_data() print("Profiling data:") print(json.dumps(profiling_data, indent=2, sort_keys=True)) - - # assert False - - -def test_device_mode(): - r""" - Test the "predict.device_mode" parameter - """ - mini_configs = [ - {"predict": {}}, - {"predict": {"device_mode": "cpu"}}, - {"predict": {"device_mode": "cuda"}}, - {"predict": {"device_mode": "gpu"}}, - {"predict": {"device_mode": "wrong"}}, - ] - - for i, config in enumerate(mini_configs): - device = tf_predictor.decide_device(config) - assert device in ["cpu", "cuda:0"], "Irrelevant device has been returned" - - if i == 0: - assert device == "cpu", "By default the 'cpu' device should be used" - elif i == 1: - assert device == "cpu", "An explicit 'cpu' device was given" - elif i == 2 or i == 3: - assert device == "cuda:0", "Cuda or gpu should become 'cuda:0'" - else: - assert ( - device == "cpu" - ), "A fall-back to 'cpu' should happen in case of error"