Skip to content

Commit

Permalink
proceed processing the content of single cell table as if its just pa…
Browse files Browse the repository at this point in the history
…rt of the body

Signed-off-by: Maksym Lysak <[email protected]>
  • Loading branch information
Maksym Lysak committed Nov 12, 2024
1 parent f7b58df commit b46ae1a
Showing 1 changed file with 7 additions and 14 deletions.
21 changes: 7 additions & 14 deletions docling/backend/msword_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ def get_level(self) -> int:
def walk_linear(self, body, docx_obj, doc) -> DoclingDocument:
for element in body:
tag_name = etree.QName(element).localname

# Check for Inline Images (drawings or blip elements)
found_drawing = etree.ElementBase.xpath(
element, ".//w:drawing", namespaces=self.xml_namespaces
Expand Down Expand Up @@ -164,8 +163,6 @@ def str_to_int(self, s, default=0):
return default

def get_numId_and_ilvl(self, paragraph):
if not hasattr(paragraph._element, "find"):
return None, None
# Access the XML element of the paragraph
numPr = paragraph._element.find(
".//w:numPr", namespaces=paragraph._element.nsmap
Expand Down Expand Up @@ -448,17 +445,13 @@ def get_rowspan(cell):
for row in table.rows:
# Calculate the max number of columns
num_cols = max(num_cols, sum(get_colspan(cell) for cell in row.cells))
# if row.cells:
# num_cols = max(num_cols, len(row.cells))

print("num_rows = {}, num_cols = {}".format(num_rows, num_cols))
if num_rows == 1:
if num_cols == 1:
cell_element = table.rows[0].cells[0]
for paragraph in cell_element.paragraphs:
# print(paragraph.text)
self.handle_text_elements(paragraph, docx_obj, doc)
return

if num_rows == 1 and num_cols == 1:
cell_element = table.rows[0].cells[0]
# In case we have a table of only 1 cell, we consider it furniture
# And proceed processing the content of the cell as though it's in the document body
self.walk_linear(cell_element._element, docx_obj, doc)
return

# Initialize the table grid
table_grid = [[None for _ in range(num_cols)] for _ in range(num_rows)]
Expand Down

0 comments on commit b46ae1a

Please sign in to comment.