Skip to content

Commit

Permalink
feat: update remove overlap
Browse files Browse the repository at this point in the history
  • Loading branch information
许瑞 authored and 许瑞 committed Apr 28, 2024
1 parent 96d17cb commit 6a3d1f2
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
3 changes: 0 additions & 3 deletions magic_pdf/model/magic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,9 +461,6 @@ def __get_blocks_by_type(
blocks.append(block)
return blocks

def get_model_list(self, page_no):
return self.__model_list[page_no]


if __name__ == "__main__":
drw = DiskReaderWriter(r"D:/project/20231108code-clean")
Expand Down
16 changes: 14 additions & 2 deletions magic_pdf/pre_proc/remove_bbox_overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,21 @@

def _remove_overlap_between_bbox(spans):
res = []
for v in spans:

keeps = [True] * len(spans)
for i in range(len(spans)):
for j in range(len(spans)):
if i == j:
continue
if _is_in(spans[i]["bbox"], spans[j]["bbox"]):
keeps[i] = False

for idx, v in enumerate(spans):
if not keeps[idx]:
continue

for i in range(len(res)):
if _is_in(res[i]["bbox"], v["bbox"]) or _is_in(v["bbox"], res[i]["bbox"]):
if _is_in(v["bbox"], res[i]["bbox"]):
continue
if _is_in_or_part_overlap(res[i]["bbox"], v["bbox"]):
ix0, iy0, ix1, iy1 = res[i]["bbox"]
Expand Down

0 comments on commit 6a3d1f2

Please sign in to comment.