From 60208b1ba06a72fa53e24f8608e90e4d73aeb2e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B5=B5=E5=B0=8F=E8=92=99?= Date: Tue, 23 Apr 2024 15:38:05 +0800 Subject: [PATCH] fix draw_layout_bbox logic --- magic_pdf/cli/magicpdf.py | 7 ++++++- magic_pdf/libs/draw_bbox.py | 37 ++++++++++++++++++++++++------------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/magic_pdf/cli/magicpdf.py b/magic_pdf/cli/magicpdf.py index e8f709f3..9aa49836 100644 --- a/magic_pdf/cli/magicpdf.py +++ b/magic_pdf/cli/magicpdf.py @@ -27,6 +27,7 @@ from loguru import logger from pathlib import Path +from magic_pdf.libs.draw_bbox import draw_layout_bbox from magic_pdf.pipe.UNIPipe import UNIPipe from magic_pdf.pipe.OCRPipe import OCRPipe from magic_pdf.pipe.TXTPipe import TXTPipe @@ -56,7 +57,7 @@ def prepare_env(pdf_file_name, method): return local_image_dir, local_md_dir -def _do_parse(pdf_file_name, pdf_bytes, model_list, parse_method, image_writer, md_writer, image_dir): +def _do_parse(pdf_file_name, pdf_bytes, model_list, parse_method, image_writer, md_writer, image_dir, local_md_dir): if parse_method == "auto": pipe = UNIPipe(pdf_bytes, model_list, image_writer, image_dir, is_debug=True) elif parse_method == "txt": @@ -69,6 +70,8 @@ def _do_parse(pdf_file_name, pdf_bytes, model_list, parse_method, image_writer, pipe.pipe_classify() pipe.pipe_parse() + pdf_info = pipe.pdf_mid_data['pdf_info'] + draw_layout_bbox(pdf_info, pdf_bytes, local_md_dir) md_content = pipe.pipe_mk_markdown() #part_file_name = datetime.now().strftime("%H-%M-%S") md_writer.write( @@ -144,6 +147,7 @@ def read_s3_path(s3path): local_image_rw, local_md_rw, os.path.basename(local_image_dir), + local_md_dir ) @@ -185,6 +189,7 @@ def read_fn(path): local_image_rw, local_md_rw, os.path.basename(local_image_dir), + local_md_dir ) diff --git a/magic_pdf/libs/draw_bbox.py b/magic_pdf/libs/draw_bbox.py index f1ff43b7..1bf10954 100644 --- a/magic_pdf/libs/draw_bbox.py +++ b/magic_pdf/libs/draw_bbox.py @@ -2,7 +2,7 @@ from magic_pdf.libs.ocr_content_type import ContentType -def draw_bbox_without_number(i, bbox_list, page, rgb_config): +def draw_bbox_without_number(i, bbox_list, page, rgb_config, fill_config): new_rgb = [] for item in rgb_config: item = float(item) / 255 @@ -11,10 +11,13 @@ def draw_bbox_without_number(i, bbox_list, page, rgb_config): for bbox in page_data: x0, y0, x1, y1 = bbox rect_coords = fitz.Rect(x0, y0, x1, y1) # Define the rectangle - page.draw_rect(rect_coords, color=new_rgb, fill=None, width=0.5, overlay=True) # Draw the rectangle + if fill_config: + page.draw_rect(rect_coords, color=None, fill=new_rgb, fill_opacity=0.3, width=0.5, overlay=True) # Draw the rectangle + else: + page.draw_rect(rect_coords, color=new_rgb, fill=None, fill_opacity=1, width=0.5, overlay=True) # Draw the rectangle -def draw_bbox_with_number(i, bbox_list, page, rgb_config): +def draw_bbox_with_number(i, bbox_list, page, rgb_config, fill_config): new_rgb = [] for item in rgb_config: item = float(item) / 255 @@ -23,27 +26,35 @@ def draw_bbox_with_number(i, bbox_list, page, rgb_config): for j, bbox in enumerate(page_data): x0, y0, x1, y1 = bbox rect_coords = fitz.Rect(x0, y0, x1, y1) # Define the rectangle - page.draw_rect(rect_coords, color=new_rgb, fill=None, width=0.5, overlay=True) # Draw the rectangle - page.insert_text((x0, y0), str(j + 1), fontsize=10, color=new_rgb) # Insert the index at the top left corner of the rectangle + if fill_config: + page.draw_rect(rect_coords, color=None, fill=new_rgb, fill_opacity=0.3, width=0.5, overlay=True) # Draw the rectangle + else: + page.draw_rect(rect_coords, color=new_rgb, fill=None, fill_opacity=1, width=0.5, overlay=True) # Draw the rectangle + page.insert_text((x0, y0+10), str(j + 1), fontsize=10, color=new_rgb) # Insert the index at the top left corner of the rectangle -def draw_layout_bbox(pdf_info_dict, pdf_bytes, out_path): +def draw_layout_bbox(pdf_info, pdf_bytes, out_path): layout_bbox_list = [] + blocks_bbox_list = [] dropped_bbox_list = [] - for page in pdf_info_dict.values(): + for page in pdf_info: page_layout_list = [] page_dropped_list = [] + page_blocks_bbox_list = [] for layout in page['layout_bboxes']: page_layout_list.append(layout['layout_bbox']) layout_bbox_list.append(page_layout_list) - for drop_tag, dropped_bboxes in page['droped_bboxes'].items(): - for dropped_bbox in dropped_bboxes: - page_dropped_list.append(dropped_bbox) + for dropped_bbox in page['discarded_blocks']: + page_dropped_list.append(dropped_bbox['bbox']) dropped_bbox_list.append(page_dropped_list) + for block in page['para_blocks']: + page_blocks_bbox_list.append(block['bbox']) + blocks_bbox_list.append(page_blocks_bbox_list) pdf_docs = fitz.open("pdf", pdf_bytes) for i, page in enumerate(pdf_docs): - draw_bbox_with_number(i, layout_bbox_list, page, [255, 0, 0]) - draw_bbox_without_number(i, dropped_bbox_list, page, [0, 255, 0]) + draw_bbox_with_number(i, layout_bbox_list, page, [255, 0, 0], False) + draw_bbox_without_number(i, dropped_bbox_list, page, [0, 255, 0], True) + draw_bbox_without_number(i, blocks_bbox_list, page, [0, 0, 255], True) # Save the PDF pdf_docs.save(f"{out_path}/layout.pdf") @@ -55,7 +66,7 @@ def draw_text_bbox(pdf_info_dict, pdf_bytes, out_path): page_text_list = [] page_inline_equation_list = [] page_interline_equation_list = [] - for block in page['preproc_blocks']: + for block in page['para_blocks']: for line in block['lines']: for span in line['spans']: if span['type'] == ContentType.Text: