Skip to content

Commit

Permalink
updated the regression tests
Browse files Browse the repository at this point in the history
Signed-off-by: Peter Staar <[email protected]>
  • Loading branch information
PeterStaar-IBM committed Jan 31, 2025
1 parent 5ef3e35 commit 009ed51
Show file tree
Hide file tree
Showing 73 changed files with 1,740,740 additions and 213,301 deletions.
2 changes: 1 addition & 1 deletion docling_parse/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def export_to_textlines(
self,
add_location: bool = True,
add_fontkey: bool = False,
add_fontname: bool = False,
add_fontname: bool = True,
) -> List[str]:
lines: List[str] = []
for cell in self.cells:
Expand Down
8 changes: 4 additions & 4 deletions docling_parse/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,12 +274,12 @@ def visualise_py(
pdf_doc: PdfDocument = parser.load(path_or_stream=pdf_path, lazy=True)

page_nos = [page_num]
if page_num==-1:
page_nos = [(page_ind+1) for page_ind in range(0, pdf_doc.number_of_pages())]
if page_num == -1:
page_nos = [(page_ind + 1) for page_ind in range(0, pdf_doc.number_of_pages())]

for page_no in page_nos:
print(f"parsing {pdf_path} on page: {page_no}")

pdf_page: ParsedPdfPage = pdf_doc.get_page(page_no=page_no)

if category in ["sanitized", "both"]:
Expand All @@ -294,7 +294,7 @@ def visualise_py(
lines = pdf_page.original.export_to_textlines(add_fontkey=True)
print(f"text-lines (original, page_no: {page_no}):")
print("\n".join(lines))

lines = pdf_page.sanitized.export_to_textlines(add_fontkey=True)
print(f"text-lines (sanitized, page_no: {page_no}):")
print("\n".join(lines))
Expand Down
Loading

0 comments on commit 009ed51

Please sign in to comment.