Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: draw block based on block_type #63

Merged
merged 1 commit into from
Apr 23, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 134 additions & 40 deletions magic_pdf/libs/draw_bbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,23 @@ def draw_bbox_without_number(i, bbox_list, page, rgb_config, fill_config):
x0, y0, x1, y1 = bbox
rect_coords = fitz.Rect(x0, y0, x1, y1) # Define the rectangle
if fill_config:
page.draw_rect(rect_coords, color=None, fill=new_rgb, fill_opacity=0.3, width=0.5, overlay=True) # Draw the rectangle
page.draw_rect(
rect_coords,
color=None,
fill=new_rgb,
fill_opacity=0.3,
width=0.5,
overlay=True,
) # Draw the rectangle
else:
page.draw_rect(rect_coords, color=new_rgb, fill=None, fill_opacity=1, width=0.5, overlay=True) # Draw the rectangle
page.draw_rect(
rect_coords,
color=new_rgb,
fill=None,
fill_opacity=1,
width=0.5,
overlay=True,
) # Draw the rectangle


def draw_bbox_with_number(i, bbox_list, page, rgb_config, fill_config):
Expand All @@ -27,37 +41,113 @@ def draw_bbox_with_number(i, bbox_list, page, rgb_config, fill_config):
x0, y0, x1, y1 = bbox
rect_coords = fitz.Rect(x0, y0, x1, y1) # Define the rectangle
if fill_config:
page.draw_rect(rect_coords, color=None, fill=new_rgb, fill_opacity=0.3, width=0.5, overlay=True) # Draw the rectangle
page.draw_rect(
rect_coords,
color=None,
fill=new_rgb,
fill_opacity=0.3,
width=0.5,
overlay=True,
) # Draw the rectangle
else:
page.draw_rect(rect_coords, color=new_rgb, fill=None, fill_opacity=1, width=0.5, overlay=True) # Draw the rectangle
page.insert_text((x0, y0+10), str(j + 1), fontsize=10, color=new_rgb) # Insert the index at the top left corner of the rectangle
page.draw_rect(
rect_coords,
color=new_rgb,
fill=None,
fill_opacity=1,
width=0.5,
overlay=True,
) # Draw the rectangle
page.insert_text(
(x0, y0 + 10), str(j + 1), fontsize=10, color=new_rgb
) # Insert the index at the top left corner of the rectangle


def draw_layout_bbox(pdf_info, pdf_bytes, out_path):
layout_bbox_list = []
blocks_bbox_list = []
dropped_bbox_list = []
tables_list, tables_body_list, tables_caption_list, tables_footnote_list = (
[],
[],
[],
[],
)
imgs_list, imgs_body_list, imgs_caption_list = [], [], []
titles_list = []
texts_list = []
interequations_list = []
for page in pdf_info:
page_layout_list = []
page_dropped_list = []
page_blocks_bbox_list = []
for layout in page['layout_bboxes']:
page_layout_list.append(layout['layout_bbox'])
tables, tables_body, tables_caption, tables_footnote = [], [], [], []
imgs, imgs_body, imgs_caption = [], [], []
titles = []
texts = []
interequations = []
for layout in page["layout_bboxes"]:
page_layout_list.append(layout["layout_bbox"])
layout_bbox_list.append(page_layout_list)
for dropped_bbox in page['discarded_blocks']:
page_dropped_list.append(dropped_bbox['bbox'])
for dropped_bbox in page["discarded_blocks"]:
page_dropped_list.append(dropped_bbox["bbox"])
dropped_bbox_list.append(page_dropped_list)
for block in page['para_blocks']:
page_blocks_bbox_list.append(block['bbox'])
blocks_bbox_list.append(page_blocks_bbox_list)
for block in page["para_blocks"]:
bbox = block["bbox"]
if block["type"] == BlockType.Table:
tables.append(bbox)
for nested_block in block["blocks"]:
bbox = nested_block["bbox"]
if nested_block["type"] == BlockType.TableBody:
tables_body.append(bbox)
elif nested_block["type"] == BlockType.TableCaption:
tables_caption.append(bbox)
elif nested_block["type"] == BlockType.TableFootnote:
tables_footnote.append(bbox)
elif block["type"] == BlockType.Image:
imgs.append(bbox)
for nested_block in block["blocks"]:
bbox = nested_block["bbox"]
if nested_block["type"] == BlockType.ImageBody:
imgs_body.append(bbox)
elif nested_block["type"] == BlockType.ImageCaption:
imgs_caption.append(bbox)
elif block["type"] == BlockType.Title:
titles.append(bbox)
elif block["type"] == BlockType.Text:
texts.append(bbox)
elif block["type"] == BlockType.InterlineEquation:
interequations.append(bbox)
tables_list.append(tables)
tables_body_list.append(tables_body)
tables_caption_list.append(tables_caption)
tables_footnote_list.append(tables_footnote)
imgs_list.append(imgs)
imgs_body_list.append(imgs_body)
imgs_caption_list.append(imgs_caption)
titles_list.append(titles)
texts_list.append(texts)
interequations_list.append(interequations)

pdf_docs = fitz.open("pdf", pdf_bytes)
for i, page in enumerate(pdf_docs):
draw_bbox_with_number(i, layout_bbox_list, page, [255, 0, 0], False)
draw_bbox_without_number(i, dropped_bbox_list, page, [0, 255, 0], True)
draw_bbox_without_number(i, blocks_bbox_list, page, [0, 0, 255], True)
draw_bbox_without_number(i, tables_list, page, [153, 153, 0], True) # color !
draw_bbox_without_number(i, tables_body_list, page, [204, 204, 0], True)
draw_bbox_without_number(i, tables_caption_list, page, [255, 255, 102], True)
draw_bbox_without_number(i, tables_footnote_list, page, [229, 255, 204], True)
draw_bbox_without_number(i, imgs_list, page, [51, 102, 0], True)
draw_bbox_without_number(i, imgs_body_list, page, [153, 255, 51], True)
draw_bbox_without_number(i, imgs_caption_list, page, [102, 178, 255], True)
draw_bbox_without_number(i, titles_list, page, [102, 102, 255], True)
draw_bbox_without_number(i, texts_list, page, [153, 0, 76], True)
draw_bbox_without_number(i, interequations_list, page, [160, 160, 160], True)

# Save the PDF
pdf_docs.save(f"{out_path}/layout.pdf")


def draw_span_bbox(pdf_info, pdf_bytes, out_path):
text_list = []
inline_equation_list = []
Expand All @@ -70,34 +160,38 @@ def draw_span_bbox(pdf_info, pdf_bytes, out_path):
page_interline_equation_list = []
page_image_list = []
page_table_list = []
for block in page['para_blocks']:
if block['type'] in [BlockType.Text, BlockType.Title, BlockType.InterlineEquation]:
for line in block['lines']:
for span in line['spans']:
if span['type'] == ContentType.Text:
page_text_list.append(span['bbox'])
elif span['type'] == ContentType.InlineEquation:
page_inline_equation_list.append(span['bbox'])
elif span['type'] == ContentType.InterlineEquation:
page_interline_equation_list.append(span['bbox'])
elif span['type'] == ContentType.Image:
page_image_list.append(span['bbox'])
elif span['type'] == ContentType.Table:
page_table_list.append(span['bbox'])
elif block['type'] in [BlockType.Image, BlockType.Table]:
for block in page["para_blocks"]:
if block["type"] in [
BlockType.Text,
BlockType.Title,
BlockType.InterlineEquation,
]:
for line in block["lines"]:
for span in line["spans"]:
if span["type"] == ContentType.Text:
page_text_list.append(span["bbox"])
elif span["type"] == ContentType.InlineEquation:
page_inline_equation_list.append(span["bbox"])
elif span["type"] == ContentType.InterlineEquation:
page_interline_equation_list.append(span["bbox"])
elif span["type"] == ContentType.Image:
page_image_list.append(span["bbox"])
elif span["type"] == ContentType.Table:
page_table_list.append(span["bbox"])
elif block["type"] in [BlockType.Image, BlockType.Table]:
for sub_block in block["blocks"]:
for line in sub_block['lines']:
for span in line['spans']:
if span['type'] == ContentType.Text:
page_text_list.append(span['bbox'])
elif span['type'] == ContentType.InlineEquation:
page_inline_equation_list.append(span['bbox'])
elif span['type'] == ContentType.InterlineEquation:
page_interline_equation_list.append(span['bbox'])
elif span['type'] == ContentType.Image:
page_image_list.append(span['bbox'])
elif span['type'] == ContentType.Table:
page_table_list.append(span['bbox'])
for line in sub_block["lines"]:
for span in line["spans"]:
if span["type"] == ContentType.Text:
page_text_list.append(span["bbox"])
elif span["type"] == ContentType.InlineEquation:
page_inline_equation_list.append(span["bbox"])
elif span["type"] == ContentType.InterlineEquation:
page_interline_equation_list.append(span["bbox"])
elif span["type"] == ContentType.Image:
page_image_list.append(span["bbox"])
elif span["type"] == ContentType.Table:
page_table_list.append(span["bbox"])
text_list.append(page_text_list)
inline_equation_list.append(page_inline_equation_list)
interline_equation_list.append(page_interline_equation_list)
Expand Down
Loading