Skip to content

Commit

Permalink
Merge pull request #1601 from myhloli/dev
Browse files Browse the repository at this point in the history
refactor(pdf_parse): uncomment char bbox validation logic
  • Loading branch information
myhloli authored Jan 22, 2025
2 parents 98c0568 + c38060d commit c7a3a68
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
7 changes: 5 additions & 2 deletions magic_pdf/libs/boxbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,13 @@ def calculate_iou(bbox1, bbox2):
bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])

if any([bbox1_area == 0, bbox2_area == 0]):
return 0

# Compute the intersection over union by taking the intersection area
# and dividing it by the sum of both areas minus the intersection area
iou = intersection_area / float(bbox1_area + bbox2_area -
intersection_area)
iou = intersection_area / float(bbox1_area + bbox2_area - intersection_area)

return iou


Expand Down
7 changes: 4 additions & 3 deletions magic_pdf/pdf_parse_union_core_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,10 @@ def fill_char_in_spans(spans, all_chars):

for char in all_chars:
# 跳过非法bbox的char
x1, y1, x2, y2 = char['bbox']
if abs(x1 - x2) <= 0.01 or abs(y1 - y2) <= 0.01:
continue
# x1, y1, x2, y2 = char['bbox']
# if abs(x1 - x2) <= 0.01 or abs(y1 - y2) <= 0.01:
# continue

for span in spans:
if calculate_char_in_span(char['bbox'], span['bbox'], char['c']):
span['chars'].append(char)
Expand Down

0 comments on commit c7a3a68

Please sign in to comment.