Skip to content

Commit 8f26686

Browse files
authored
Merge pull request #1224 from icecraft/fix/new_api
2 parents f58a7a7 + 87af738 commit 8f26686

File tree

3 files changed

+20
-6
lines changed

3 files changed

+20
-6
lines changed

magic_pdf/config/constants.py

+5
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,8 @@ class MODEL_NAME:
5151
UniMerNet_v2_Small = 'unimernet_small'
5252

5353
RAPID_TABLE = 'rapid_table'
54+
55+
56+
PARSE_TYPE_TXT = 'txt'
57+
PARSE_TYPE_OCR = 'ocr'
58+

magic_pdf/model/operators.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@
66
from magic_pdf.config.enums import SupportedPdfParseMethod
77
from magic_pdf.data.data_reader_writer import DataWriter
88
from magic_pdf.data.dataset import Dataset
9+
from magic_pdf.libs.version import __version__
910
from magic_pdf.filter import classify
1011
from magic_pdf.libs.draw_bbox import draw_model_bbox
1112
from magic_pdf.pdf_parse_union_core_v2 import pdf_parse_union
1213
from magic_pdf.pipe.operators import PipeResult
1314
from magic_pdf.model import InferenceResultBase
14-
15+
from magic_pdf.libs.version import __version__
16+
from magic_pdf.config.constants import PARSE_TYPE_TXT, PARSE_TYPE_OCR
1517

1618
class InferenceResult(InferenceResultBase):
1719
def __init__(self, inference_results: list, dataset: Dataset):
@@ -129,7 +131,7 @@ def proc(*args, **kwargs) -> PipeResult:
129131
res = pdf_parse_union(*args, **kwargs)
130132
return PipeResult(res, self._dataset)
131133

132-
return self.apply(
134+
res = self.apply(
133135
proc,
134136
self._dataset,
135137
imageWriter,
@@ -139,6 +141,11 @@ def proc(*args, **kwargs) -> PipeResult:
139141
debug_mode=debug_mode,
140142
lang=lang,
141143
)
144+
res['_parse_type'] = PARSE_TYPE_TXT
145+
res['_version_name'] = __version__
146+
147+
return res
148+
142149

143150
def pipe_ocr_mode(
144151
self,
@@ -166,7 +173,7 @@ def proc(*args, **kwargs) -> PipeResult:
166173
res = pdf_parse_union(*args, **kwargs)
167174
return PipeResult(res, self._dataset)
168175

169-
return self.apply(
176+
res = self.apply(
170177
proc,
171178
self._dataset,
172179
imageWriter,
@@ -176,3 +183,7 @@ def proc(*args, **kwargs) -> PipeResult:
176183
debug_mode=debug_mode,
177184
lang=lang,
178185
)
186+
res['_parse_type'] = PARSE_TYPE_OCR
187+
188+
res['_version_name'] = __version__
189+
return res

magic_pdf/user_api.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515
from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
1616
from magic_pdf.pdf_parse_by_ocr import parse_pdf_by_ocr
1717
from magic_pdf.pdf_parse_by_txt import parse_pdf_by_txt
18-
19-
PARSE_TYPE_TXT = 'txt'
20-
PARSE_TYPE_OCR = 'ocr'
18+
from magic_pdf.config.constants import PARSE_TYPE_TXT, PARSE_TYPE_OCR
2119

2220

2321
def parse_txt_pdf(

0 commit comments

Comments
 (0)