diff --git a/magic_pdf/model/sub_modules/language_detection/utils.py b/magic_pdf/model/sub_modules/language_detection/utils.py index 5f60aa89..583e2407 100644 --- a/magic_pdf/model/sub_modules/language_detection/utils.py +++ b/magic_pdf/model/sub_modules/language_detection/utils.py @@ -24,11 +24,11 @@ def get_model_config(): config_path = os.path.join(model_config_dir, 'model_configs.yaml') with open(config_path, 'r', encoding='utf-8') as f: configs = yaml.load(f, Loader=yaml.FullLoader) - return local_models_dir, device, configs + return root_dir, local_models_dir, device, configs def get_text_images(simple_images): - local_models_dir, device, configs = get_model_config() + _, local_models_dir, device, configs = get_model_config() atom_model_manager = AtomModelSingleton() temp_layout_model = atom_model_manager.get_atom_model( atom_model_name=AtomicModel.Layout, @@ -69,15 +69,11 @@ def model_init(model_name: str): atom_model_manager = AtomModelSingleton() if model_name == MODEL_NAME.YOLO_V11_LangDetect: - local_models_dir, device, configs = get_model_config() + root_dir, _, device, _ = get_model_config() model = atom_model_manager.get_atom_model( atom_model_name=AtomicModel.LangDetect, langdetect_model_name=MODEL_NAME.YOLO_V11_LangDetect, - langdetect_model_weight=str( - os.path.join( - local_models_dir, configs['weights'][MODEL_NAME.YOLO_V11_LangDetect] - ) - ), + langdetect_model_weight=str(os.path.join(root_dir, 'resources', 'yolov11-langdetect', 'yolo_v11_ft.pt')), device=device, ) else: diff --git a/magic_pdf/pdf_parse_union_core_v2.py b/magic_pdf/pdf_parse_union_core_v2.py index 06ce1786..6dcf6e59 100644 --- a/magic_pdf/pdf_parse_union_core_v2.py +++ b/magic_pdf/pdf_parse_union_core_v2.py @@ -768,6 +768,11 @@ def parse_page_core( """重排block""" sorted_blocks = sorted(fix_blocks, key=lambda b: b['index']) + """block内重排(img和table的block内多个caption或footnote的排序)""" + for block in sorted_blocks: + if block['type'] in [BlockType.Image, BlockType.Table]: + block['blocks'] = sorted(block['blocks'], key=lambda b: b['index']) + """获取QA需要外置的list""" images, tables, interline_equations = get_qa_need_list_v2(sorted_blocks) diff --git a/magic_pdf/resources/model_config/model_configs.yaml b/magic_pdf/resources/model_config/model_configs.yaml index c3c8ab3b..a11f509f 100644 --- a/magic_pdf/resources/model_config/model_configs.yaml +++ b/magic_pdf/resources/model_config/model_configs.yaml @@ -5,5 +5,4 @@ weights: unimernet_small: MFR/unimernet_small struct_eqtable: TabRec/StructEqTable tablemaster: TabRec/TableMaster - rapid_table: TabRec/RapidTable - yolo_v11n_langdetect: LangDetect/YOLO/yolo_v11_ft.pt \ No newline at end of file + rapid_table: TabRec/RapidTable \ No newline at end of file diff --git a/magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt b/magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt new file mode 100644 index 00000000..8e7dbe70 Binary files /dev/null and b/magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt differ diff --git a/projects/gradio_app/examples/complex_layout.pdf b/projects/gradio_app/examples/complex_layout.pdf old mode 100755 new mode 100644 index a4fc9c0f..f9d09673 Binary files a/projects/gradio_app/examples/complex_layout.pdf and b/projects/gradio_app/examples/complex_layout.pdf differ diff --git a/setup.py b/setup.py index 527db274..e234903a 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ def parse_requirements(filename): "doclayout_yolo==0.0.2", # doclayout_yolo "rapidocr-paddle", # rapidocr-paddle "rapidocr_onnxruntime", - "rapid_table", # rapid_table + "rapid_table==0.3.0", # rapid_table "PyYAML", # yaml "openai", # openai SDK "detectron2"