fix: TableFormer raises IndexError: too many indices for array

Signed-off-by: Maxim Lysak <[email protected]> Co-authored-by: Maxim Lysak <[email protected]>
DS4SD · Sep 3, 2024 · ad494ca · ad494ca
1 parent b478eae
commit ad494ca
Show file tree

Hide file tree

Showing 5 changed files with 112 additions and 23 deletions.
diff --git a/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py b/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py
@@ -127,13 +127,14 @@ def match_cells(self, iocr_page, table_bbox, prediction):
             Dictionary with all details about the mathings between the table and pdf cells
         """
         pdf_cells = copy.deepcopy(iocr_page["tokens"])
-        for word in pdf_cells:
-            word["bbox"] = [
-                word["bbox"]["l"],
-                word["bbox"]["t"],
-                word["bbox"]["r"],
-                word["bbox"]["b"],
-            ]
+        if len(pdf_cells) > 0:
+            for word in pdf_cells:
+                word["bbox"] = [
+                    word["bbox"]["l"],
+                    word["bbox"]["t"],
+                    word["bbox"]["r"],
+                    word["bbox"]["b"],
+                ]
         table_bboxes = prediction["bboxes"]
         table_classes = prediction["classes"]
         # BBOXES transformed...
@@ -145,9 +146,13 @@ def match_cells(self, iocr_page, table_bbox, prediction):
         table_cells = self._build_table_cells(
             html_seq, otsl_seq, table_bboxes_page, table_classes
         )
-        matches, matches_counter = self._intersection_over_pdf_match(
-            table_cells, pdf_cells
-        )
+
+        matches = {}
+        matches_counter = 0
+        if len(pdf_cells) > 0:
+            matches, matches_counter = self._intersection_over_pdf_match(
+                table_cells, pdf_cells
+            )
 
         self._log().debug("matches_counter: {}".format(matches_counter))
 
@@ -188,13 +193,14 @@ def match_cells_dummy(self, iocr_page, table_bbox, prediction):
             Dictionary with all details about the mathings between the table and pdf cells
         """
         pdf_cells = copy.deepcopy(iocr_page["tokens"])
-        for word in pdf_cells:
-            word["bbox"] = [
-                word["bbox"]["l"],
-                word["bbox"]["t"],
-                word["bbox"]["r"],
-                word["bbox"]["b"],
-            ]
+        if len(pdf_cells) > 0:
+            for word in pdf_cells:
+                word["bbox"] = [
+                    word["bbox"]["l"],
+                    word["bbox"]["t"],
+                    word["bbox"]["r"],
+                    word["bbox"]["b"],
+                ]
 
         table_bboxes = prediction["bboxes"]
         table_classes = prediction["classes"]

diff --git a/docling_ibm_models/tableformer/data_management/tf_predictor.py b/docling_ibm_models/tableformer/data_management/tf_predictor.py
@@ -696,7 +696,12 @@ def predict_dummy(
             prediction["bboxes"] = corrected_bboxes
 
         # Match the cells
-        matching_details = {"table_cells": [], "matches": {}}
+        matching_details = {
+            "table_cells": [],
+            "matches": {},
+            "pdf_cells": [],
+            "prediction_bboxes_page": [],
+        }
 
         # Table bbox upscaling will scale predicted bboxes too within cell matcher
         scaled_table_bbox = [
@@ -803,7 +808,12 @@ def predict(
             prediction["bboxes"] = corrected_bboxes
 
         # Match the cells
-        matching_details = {"table_cells": [], "matches": {}}
+        matching_details = {
+            "table_cells": [],
+            "matches": {},
+            "pdf_cells": [],
+            "prediction_bboxes_page": [],
+        }
 
         # Table bbox upscaling will scale predicted bboxes too within cell matcher
         scaled_table_bbox = [
@@ -819,10 +829,13 @@ def predict(
             )
         # Post-processing
         if len(prediction["bboxes"]) > 0:
-            if self.enable_post_process:
-                AggProfiler().begin("post_process", self._prof)
-                matching_details = self._post_processor.process(matching_details)
-                AggProfiler().end("post_process", self._prof)
+            if (
+                len(iocr_page["tokens"]) > 0
+            ):  # There are at least some pdf cells to match with
+                if self.enable_post_process:
+                    AggProfiler().begin("post_process", self._prof)
+                    matching_details = self._post_processor.process(matching_details)
+                    AggProfiler().end("post_process", self._prof)
 
         # Generate the expected Docling responses
         AggProfiler().begin("generate_docling_response", self._prof)

diff --git a/tests/test_data/samples/empty_iocr.png b/tests/test_data/samples/empty_iocr.png
diff --git a/tests/test_data/samples/empty_iocr.png.json b/tests/test_data/samples/empty_iocr.png.json
@@ -0,0 +1,67 @@
+{
+    "doc_source_type": {},
+    "font_dist_info": {},
+    "info": {
+        "histogram": {
+            "mean-char-height": {},
+            "mean-char-width": {},
+            "number-of-chars": {}
+        },
+        "styles": []
+    },
+    "title": "",
+    "metadata": {
+        "numPages": 1
+    },
+    "pages": [
+        {
+            "blocks": [],
+            "cells": [],
+            "height": 1612,
+            "width": 1237,
+            "dimensions": {
+                "bbox": [
+                    0.0,
+                    0.0,
+                    1237,
+                    1612
+                ],
+                "height": 1612,
+                "origin": "TopLeft",
+                "width": 1237
+            },
+            "fonts": [],
+            "links": [],
+            "rotation": 0.0,
+            "rectangles": [],
+            "textPositions": [],
+            "text_lines": [],
+            "tokens": [],
+            "localized_image_locations": [],
+            "scanned_elements": [],
+            "paths": [],
+            "pageNumber": 1,
+            "page_image": {},
+            "lang": [
+                "en",
+                "pt",
+                "fr",
+                "it",
+                "es",
+                "fi"
+            ]
+        }
+    ],
+    "settings": {},
+    "passedHeadersFooters": {
+        "headerFooters": {
+            "1": {
+                "headerHeight": 0,
+                "footerHeight": 0
+            }
+        },
+        "headerFound": false,
+        "footerFound": false
+    },
+    "styles": []
+}
diff --git a/tests/test_tf_predictor.py b/tests/test_tf_predictor.py
@@ -25,14 +25,17 @@
     "table_jsons": [
         "./tests/test_data/samples/ADS.2007.page_123.png_iocr.parse_format.json",
         "./tests/test_data/samples/PHM.2013.page_30.png_iocr.parse_format.json",
+        "./tests/test_data/samples/empty_iocr.png.json"
     ],
     "png_images": [
         "./tests/test_data/samples/ADS.2007.page_123.png",
         "./tests/test_data/samples/PHM.2013.page_30.png",
+        "./tests/test_data/samples/empty_iocr.png"
     ],
     "table_bboxes": [
         [[178, 748, 1061, 976], [177, 1163, 1062, 1329]],
         [[100, 186, 1135, 525]],
+        [[178, 748, 1061, 976], [177, 1163, 1062, 1329]]
     ],
 }