Skip to content

Commit b56b208

Browse files
committed
Renaming and cleanup
Signed-off-by: Christoph Auer <[email protected]>
1 parent 04096f2 commit b56b208

File tree

5 files changed

+16
-16
lines changed

5 files changed

+16
-16
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ for page_no, pred_page in pdf_doc.iterate_pages():
8282
print(word.rect, ": ", word.text)
8383

8484
# create a PIL image with the char cells
85-
img = pred_page.render_as_image(label=TextCellUnit.CHAR)
85+
img = pred_page.render_as_image(cell_unit=TextCellUnit.CHAR)
8686
img.show()
8787
```
8888

docling_parse/pdf_parser.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
PdfLine,
1515
PdfMetaData,
1616
PdfPageBoundaryType,
17-
PdfPageDimensions,
17+
PdfPageGeometry,
1818
PdfTableOfContents,
1919
PdfTextCell,
2020
SegmentedPdfPage,
@@ -157,7 +157,7 @@ def load_all_pages(self, create_words: bool = True, create_lines: bool = True):
157157
create_textlines=create_lines,
158158
) # put on cache
159159

160-
def _to_dimension(self, dimension: dict) -> PdfPageDimensions:
160+
def _to_page_geometry(self, dimension: dict) -> PdfPageGeometry:
161161

162162
boundary_type: PdfPageBoundaryType = PdfPageBoundaryType(
163163
dimension["page_boundary"]
@@ -217,7 +217,7 @@ def _to_dimension(self, dimension: dict) -> PdfPageDimensions:
217217
coord_origin=CoordOrigin.BOTTOMLEFT,
218218
)
219219

220-
return PdfPageDimensions(
220+
return PdfPageGeometry(
221221
angle=dimension["angle"],
222222
boundary_type=boundary_type,
223223
rect=rect,
@@ -319,7 +319,7 @@ def _to_segmented_page(
319319
) -> SegmentedPdfPage:
320320

321321
segmented_page = SegmentedPdfPage(
322-
dimension=self._to_dimension(page["dimension"]),
322+
dimension=self._to_page_geometry(page["dimension"]),
323323
char_cells=self._to_cells(page["cells"]),
324324
word_cells=[],
325325
textline_cells=[],

docling_parse/visualize.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def visualise_py(
147147
if category in ["all", "char"]:
148148

149149
img = pdf_page.render_as_image(
150-
label=TextCellUnit.CHAR,
150+
cell_unit=TextCellUnit.CHAR,
151151
draw_cells_bbox=(not display_text),
152152
draw_cells_text=display_text,
153153
)
@@ -162,7 +162,7 @@ def visualise_py(
162162

163163
if log_text:
164164
lines = pdf_page.export_to_textlines(
165-
label=TextCellUnit.CHAR,
165+
cell_unit=TextCellUnit.CHAR,
166166
add_fontkey=True,
167167
add_fontname=False,
168168
)
@@ -171,7 +171,7 @@ def visualise_py(
171171

172172
if category in ["all", "word"]:
173173
img = pdf_page.render_as_image(
174-
label=TextCellUnit.WORD,
174+
cell_unit=TextCellUnit.WORD,
175175
draw_cells_bbox=(not display_text),
176176
draw_cells_text=display_text,
177177
)
@@ -186,7 +186,7 @@ def visualise_py(
186186

187187
if log_text:
188188
lines = pdf_page.export_to_textlines(
189-
label=TextCellUnit.WORD,
189+
cell_unit=TextCellUnit.WORD,
190190
add_fontkey=True,
191191
add_fontname=False,
192192
)
@@ -195,7 +195,7 @@ def visualise_py(
195195

196196
if category in ["all", "line"]:
197197
img = pdf_page.render_as_image(
198-
label=TextCellUnit.LINE,
198+
cell_unit=TextCellUnit.LINE,
199199
draw_cells_bbox=(not display_text),
200200
draw_cells_text=display_text,
201201
)
@@ -210,7 +210,7 @@ def visualise_py(
210210

211211
if log_text:
212212
lines = pdf_page.export_to_textlines(
213-
label=TextCellUnit.LINE,
213+
cell_unit=TextCellUnit.LINE,
214214
add_fontkey=True,
215215
add_fontname=False,
216216
)

poetry.lock

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/test_parse.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -243,11 +243,11 @@ def test_reference_documents_from_filenames():
243243
true_page = SegmentedPdfPage.load_from_json(fname)
244244
verify_SegmentedPdfPage(true_page, pred_page, filename=fname)
245245

246-
img = pred_page.render_as_image(label=TextCellUnit.CHAR)
246+
img = pred_page.render_as_image(cell_unit=TextCellUnit.CHAR)
247247
# img.show()
248-
img = pred_page.render_as_image(label=TextCellUnit.WORD)
248+
img = pred_page.render_as_image(cell_unit=TextCellUnit.WORD)
249249
# img.show()
250-
img = pred_page.render_as_image(label=TextCellUnit.LINE)
250+
img = pred_page.render_as_image(cell_unit=TextCellUnit.LINE)
251251
# img.show()
252252

253253
toc: PdfTableOfContents = pdf_doc.get_table_of_contents()

0 commit comments

Comments
 (0)