Skip to content

Commit

Permalink
修复了para_split内容丢失
Browse files Browse the repository at this point in the history
  • Loading branch information
liukaiwen committed Apr 23, 2024
1 parent 1d16c31 commit f4a7e0d
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions magic_pdf/para/para_split_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,11 +246,11 @@ def __group_line_by_layout(blocks, layout_bboxes, lang="en"):
for lyout in layout_bboxes:
lines = [line for block in blocks if block["type"] == BlockType.Text and is_in_layout(block['bbox'], lyout['layout_bbox']) for line in
block['lines']]
blocks = [block for block in blocks if is_in_layout(block['bbox'], lyout['layout_bbox'])]
blocks_in_layout = [block for block in blocks if is_in_layout(block['bbox'], lyout['layout_bbox'])]


lines_group.append(lines)
blocks_group.append(blocks)
blocks_group.append(blocks_in_layout)
return lines_group, blocks_group


Expand Down

0 comments on commit f4a7e0d

Please sign in to comment.