From af6f2b9baad4ad594760880799fddf4efc8bf6f7 Mon Sep 17 00:00:00 2001 From: Matteo-Omenetti Date: Mon, 3 Feb 2025 14:16:11 +0100 Subject: [PATCH] changed table repr. to otsl fixed new line Signed-off-by: Matteo-Omenetti --- docling_core/types/doc/document.py | 17 +---------------- test/test_docling_doc.py | 7 +++---- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index cb7ba4c..b21c722 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -2452,7 +2452,7 @@ def close_lists( def save_as_document_tokens( self, filename: Path, - delim: str = "\n\n", + delim: str = "\n", from_element: int = 0, to_element: int = sys.maxsize, labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS, @@ -2463,11 +2463,7 @@ def save_as_document_tokens( add_page_index: bool = True, # table specific flags add_table_cell_location: bool = False, - add_table_cell_label: bool = True, add_table_cell_text: bool = True, - # specifics - page_no: Optional[int] = None, - with_groups: bool = True, ): r"""Save the document content to a DocumentToken format.""" out = self.export_to_document_tokens( @@ -2482,9 +2478,7 @@ def save_as_document_tokens( add_page_index=add_page_index, # table specific flags add_table_cell_location=add_table_cell_location, - add_table_cell_label=add_table_cell_label, add_table_cell_text=add_table_cell_text, - # specifics ) with open(filename, "w", encoding="utf-8") as fw: @@ -2503,10 +2497,7 @@ def export_to_document_tokens( add_page_index: bool = True, # table specific flags add_table_cell_location: bool = False, - add_table_cell_label: bool = True, add_table_cell_text: bool = True, - # specifics - newline: bool = True, ) -> str: r"""Exports the document content to a DocumentToken format. @@ -2523,7 +2514,6 @@ def export_to_document_tokens( :param add_content: bool: (Default value = True) :param add_page_index: bool: (Default value = True) :param # table specific flagsadd_table_cell_location: bool - :param add_table_cell_label: bool: (Default value = True) :param add_table_cell_text: bool: (Default value = True) :returns: The content of the document formatted as a DocTags string. :rtype: str @@ -2568,11 +2558,6 @@ def add_page_break(result, item, previous_page_no, delim, add_page_break): return result, current_page_no - if newline: - delim = "\n" - else: - delim = "" - prev_level = 0 # Track the previous item's level in_ordered_list: List[bool] = [] # False diff --git a/test/test_docling_doc.py b/test/test_docling_doc.py index 0041161..a1f333a 100644 --- a/test/test_docling_doc.py +++ b/test/test_docling_doc.py @@ -280,10 +280,9 @@ def _test_export_methods(doc: DoclingDocument, filename: str): # Test DocTags export ... dt_pred = doc.export_to_document_tokens() - # print("\n\n\n\n\n\n\n") - # print(filename) - # print(dt_pred) - # print("\n\n\n\n\n\n\n") + print("\n\n\n") + print(dt_pred) + print("\n\n\n") _verify_regression_test(dt_pred, filename=filename, ext="dt") # Test Tables export ...