Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/magicpdf/Magic-PDF
Browse files Browse the repository at this point in the history
  • Loading branch information
quyuan committed Apr 25, 2024
2 parents 4888341 + 37112ca commit 59c023f
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
7 changes: 4 additions & 3 deletions magic_pdf/pre_proc/ocr_detect_all_bboxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ def remove_need_drop_blocks(all_bboxes, discarded_blocks):
if calculate_overlap_area_in_bbox1_area_ratio(block_bbox, discarded_block['bbox']) > 0.6:
need_remove.append(block)

for block in need_remove:
all_bboxes.remove(block)

if len(need_remove) > 0:
for block in need_remove:
all_bboxes.remove(block)
return all_bboxes


Expand All @@ -92,6 +92,7 @@ def remove_overlaps_min_blocks(all_bboxes):
bbox_to_remove = next((block for block in all_bboxes if block[:4] == overlap_box), None)
if bbox_to_remove is not None:
need_remove.append(bbox_to_remove)

if len(need_remove) > 0:
for block in need_remove:
all_bboxes.remove(block)
Expand Down
8 changes: 5 additions & 3 deletions magic_pdf/pre_proc/ocr_span_list_modify.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ def remove_overlaps_min_spans(spans):
if bbox_to_remove is not None:
dropped_spans.append(bbox_to_remove)

if len(dropped_spans > 0):
if len(dropped_spans) > 0:
for dropped_span in dropped_spans:
spans.remove(dropped_span)
dropped_span['tag'] = DropTag.SPAN_OVERLAP

return spans, dropped_spans


Expand All @@ -35,8 +36,9 @@ def remove_spans_by_bboxes(spans, need_remove_spans_bboxes):
need_remove_spans.append(span)
break

for span in need_remove_spans:
spans.remove(span)
if len(need_remove_spans) > 0:
for span in need_remove_spans:
spans.remove(span)

return spans

Expand Down

0 comments on commit 59c023f

Please sign in to comment.