diff --git a/docling/backend/docling_parse_backend.py b/docling/backend/docling_parse_backend.py index 3082b6c0..a2924074 100644 --- a/docling/backend/docling_parse_backend.py +++ b/docling/backend/docling_parse_backend.py @@ -80,7 +80,9 @@ def get_text_cells(self) -> Iterable[Cell]: cell_counter += 1 def draw_clusters_and_cells(): - image = self.get_page_image() + image = ( + self.get_page_image() + ) # make new image to avoid drawing on the saved ones draw = ImageDraw.Draw(image) for c in cells: x0, y0, x1, y1 = c.bbox.as_tuple() diff --git a/docling/backend/pypdfium2_backend.py b/docling/backend/pypdfium2_backend.py index e5540f4c..33f059df 100644 --- a/docling/backend/pypdfium2_backend.py +++ b/docling/backend/pypdfium2_backend.py @@ -134,7 +134,9 @@ def merge_group(group: List[Cell]) -> Cell: return merged_cells def draw_clusters_and_cells(): - image = self.get_page_image() + image = ( + self.get_page_image() + ) # make new image to avoid drawing on the saved ones draw = ImageDraw.Draw(image) for c in cells: x0, y0, x1, y1 = c.bbox.as_tuple() diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 10086917..6f28ddd3 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -234,14 +234,29 @@ class Page(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) page_no: int - page_hash: str = None - size: PageSize = None - image: Image = None + page_hash: Optional[str] = None + size: Optional[PageSize] = None cells: List[Cell] = None predictions: PagePredictions = PagePredictions() - assembled: AssembledUnit = None + assembled: Optional[AssembledUnit] = None - _backend: PdfPageBackend = None # Internal PDF backend + _backend: Optional[PdfPageBackend] = ( + None # Internal PDF backend. By default it is cleared during assembling. + ) + _image_cache: Dict[float, Image] = ( + {} + ) # Cache of images in different scales. By default it is cleared during assembling. + + def get_image(self, scale: float = 1.0) -> Optional[Image]: + if self._backend is None: + return self._image_cache.get(scale, None) + if not scale in self._image_cache: + self._image_cache[scale] = self._backend.get_page_image(scale=scale) + return self._image_cache[scale] + + @property + def image(self) -> Optional[Image]: + return self.get_image() class DocumentStream(BaseModel): diff --git a/docling/document_converter.py b/docling/document_converter.py index 9954bc9b..cb3da3da 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -189,9 +189,7 @@ def process_document(self, in_doc: InputDocument) -> ConvertedDocument: # Remove page images (can be disabled) if not self.assemble_options.keep_page_images: - assembled_page.image = ( - None # Comment this if you want to visualize page images - ) + assembled_page._image_cache = {} # Unload backend assembled_page._backend.unload() @@ -231,7 +229,7 @@ def initialize_page(self, doc: InputDocument, page: Page) -> Page: # Generate the page image and store it in the page object def populate_page_images(self, doc: InputDocument, page: Page) -> Page: - page.image = page._backend.get_page_image() + page.get_image() # this will trigger storing the image in the internal cache return page @@ -247,7 +245,7 @@ def draw_text_boxes(image, cells): draw.rectangle([(x0, y0), (x1, y1)], outline="red") image.show() - # draw_text_boxes(page.image, cells) + # draw_text_boxes(page.get_image(scale=1.0), cells) return page diff --git a/docling/models/easyocr_model.py b/docling/models/easyocr_model.py index d5bca501..d9452ce6 100644 --- a/docling/models/easyocr_model.py +++ b/docling/models/easyocr_model.py @@ -30,7 +30,7 @@ def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: for page in page_batch: # rects = page._fpage. - high_res_image = page._backend.get_page_image(scale=self.scale) + high_res_image = page.get_image(scale=self.scale) im = numpy.array(high_res_image) result = self.reader.readtext(im) diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py index 93f80d54..af7b8e7b 100644 --- a/docling/models/layout_model.py +++ b/docling/models/layout_model.py @@ -267,7 +267,9 @@ def postprocess(self, clusters: List[Cluster], cells: List[Cell], page_height): def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: for page in page_batch: clusters = [] - for ix, pred_item in enumerate(self.layout_predictor.predict(page.image)): + for ix, pred_item in enumerate( + self.layout_predictor.predict(page.get_image(scale=1.0)) + ): cluster = Cluster( id=ix, label=pred_item["label"], diff --git a/docling/models/table_structure_model.py b/docling/models/table_structure_model.py index 09c789d2..f7d03cb9 100644 --- a/docling/models/table_structure_model.py +++ b/docling/models/table_structure_model.py @@ -34,7 +34,9 @@ def __init__(self, config): self.scale = 2.0 # Scale up table input images to 144 dpi def draw_table_and_cells(self, page: Page, tbl_list: List[TableElement]): - image = page._backend.get_page_image() + image = ( + page._backend.get_page_image() + ) # make new image to avoid drawing on the saved ones draw = ImageDraw.Draw(image) for table_element in tbl_list: @@ -94,13 +96,7 @@ def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: "width": page.size.width * self.scale, "height": page.size.height * self.scale, } - # add image to page input. - if self.scale == 1.0: - page_input["image"] = numpy.asarray(page.image) - else: # render new page image on the fly at desired scale - page_input["image"] = numpy.asarray( - page._backend.get_page_image(scale=self.scale) - ) + page_input["image"] = numpy.asarray(page.get_image(scale=self.scale)) table_clusters, table_bboxes = zip(*in_tables)