Skip to content

Commit

Permalink
fix(ocr): improve ONNX model initialization and error handling
Browse files Browse the repository at this point in the history
- Add key length validation for ONNX model initialization
- Move import statements to the top of the file
- Wrap model initialization in a try-except block for better error handling
- Refactor code to improve readability and maintainability
  • Loading branch information
myhloli committed Jan 20, 2025
1 parent f473028 commit b3d60b9
Showing 1 changed file with 33 additions and 26 deletions.
59 changes: 33 additions & 26 deletions magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from magic_pdf.libs.boxbase import __is_overlaps_y_exceeds_threshold
from magic_pdf.pre_proc.ocr_dict_merge import merge_spans_to_line

import importlib.resources
from paddleocr import PaddleOCR
from ppocr.utils.utility import check_and_read


Expand Down Expand Up @@ -327,30 +329,35 @@ def get_onnx_model(self, **kwargs):
return self._models[key]

def onnx_model_init(key):

import importlib.resources

with importlib.resources.path('rapidocr_onnxruntime.models','') as resource_path:
onnx_model = None
additional_ocr_params = {
"use_onnx": True,
"det_model_dir": f'{resource_path}/ch_PP-OCRv4_det_infer.onnx',
"rec_model_dir": f'{resource_path}/ch_PP-OCRv4_rec_infer.onnx',
"cls_model_dir": f'{resource_path}/ch_ppocr_mobile_v2.0_cls_infer.onnx',
"det_db_box_thresh": key[1],
"use_dilation": key[2],
"det_db_unclip_ratio": key[3],
}
# logger.info(f"additional_ocr_params: {additional_ocr_params}")

if key[0] is not None:
additional_ocr_params["lang"] = key[0]

from paddleocr import PaddleOCR
onnx_model = PaddleOCR(**additional_ocr_params)

if onnx_model is None:
logger.error('model init failed')
if len(key) < 4:
logger.error('Invalid key length, expected at least 4 elements')
exit(1)
else:
return onnx_model

try:
with importlib.resources.path('rapidocr_onnxruntime.models', '') as resource_path:
additional_ocr_params = {
"use_onnx": True,
"det_model_dir": f'{resource_path}/ch_PP-OCRv4_det_infer.onnx',
"rec_model_dir": f'{resource_path}/ch_PP-OCRv4_rec_infer.onnx',
"cls_model_dir": f'{resource_path}/ch_ppocr_mobile_v2.0_cls_infer.onnx',
"det_db_box_thresh": key[1],
"use_dilation": key[2],
"det_db_unclip_ratio": key[3],
}

if key[0] is not None:
additional_ocr_params["lang"] = key[0]

# logger.info(f"additional_ocr_params: {additional_ocr_params}")

onnx_model = PaddleOCR(**additional_ocr_params)

if onnx_model is None:
logger.error('model init failed')
exit(1)
else:
return onnx_model

except Exception as e:
logger.exception(f'Error initializing model: {e}')
exit(1)

0 comments on commit b3d60b9

Please sign in to comment.