6
6
from magic_pdf .config .enums import SupportedPdfParseMethod
7
7
from magic_pdf .data .data_reader_writer import DataWriter
8
8
from magic_pdf .data .dataset import Dataset
9
+ from magic_pdf .libs .version import __version__
9
10
from magic_pdf .filter import classify
10
11
from magic_pdf .libs .draw_bbox import draw_model_bbox
11
12
from magic_pdf .pdf_parse_union_core_v2 import pdf_parse_union
12
13
from magic_pdf .pipe .operators import PipeResult
13
14
from magic_pdf .model import InferenceResultBase
14
-
15
+ from magic_pdf .libs .version import __version__
16
+ from magic_pdf .config .constants import PARSE_TYPE_TXT , PARSE_TYPE_OCR
15
17
16
18
class InferenceResult (InferenceResultBase ):
17
19
def __init__ (self , inference_results : list , dataset : Dataset ):
@@ -129,7 +131,7 @@ def proc(*args, **kwargs) -> PipeResult:
129
131
res = pdf_parse_union (* args , ** kwargs )
130
132
return PipeResult (res , self ._dataset )
131
133
132
- return self .apply (
134
+ res = self .apply (
133
135
proc ,
134
136
self ._dataset ,
135
137
imageWriter ,
@@ -139,6 +141,11 @@ def proc(*args, **kwargs) -> PipeResult:
139
141
debug_mode = debug_mode ,
140
142
lang = lang ,
141
143
)
144
+ res ['_parse_type' ] = PARSE_TYPE_TXT
145
+ res ['_version_name' ] = __version__
146
+
147
+ return res
148
+
142
149
143
150
def pipe_ocr_mode (
144
151
self ,
@@ -166,7 +173,7 @@ def proc(*args, **kwargs) -> PipeResult:
166
173
res = pdf_parse_union (* args , ** kwargs )
167
174
return PipeResult (res , self ._dataset )
168
175
169
- return self .apply (
176
+ res = self .apply (
170
177
proc ,
171
178
self ._dataset ,
172
179
imageWriter ,
@@ -176,3 +183,7 @@ def proc(*args, **kwargs) -> PipeResult:
176
183
debug_mode = debug_mode ,
177
184
lang = lang ,
178
185
)
186
+ res ['_parse_type' ] = PARSE_TYPE_OCR
187
+
188
+ res ['_version_name' ] = __version__
189
+ return res
0 commit comments