From f4a7e0d74dec9ed3573c630eb3db792d3e90cbd0 Mon Sep 17 00:00:00 2001 From: liukaiwen Date: Tue, 23 Apr 2024 17:30:18 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BA=86para=5Fsplit?= =?UTF-8?q?=E5=86=85=E5=AE=B9=E4=B8=A2=E5=A4=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- magic_pdf/para/para_split_v2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/magic_pdf/para/para_split_v2.py b/magic_pdf/para/para_split_v2.py index 748eb31c..353dfb32 100644 --- a/magic_pdf/para/para_split_v2.py +++ b/magic_pdf/para/para_split_v2.py @@ -246,11 +246,11 @@ def __group_line_by_layout(blocks, layout_bboxes, lang="en"): for lyout in layout_bboxes: lines = [line for block in blocks if block["type"] == BlockType.Text and is_in_layout(block['bbox'], lyout['layout_bbox']) for line in block['lines']] - blocks = [block for block in blocks if is_in_layout(block['bbox'], lyout['layout_bbox'])] + blocks_in_layout = [block for block in blocks if is_in_layout(block['bbox'], lyout['layout_bbox'])] lines_group.append(lines) - blocks_group.append(blocks) + blocks_group.append(blocks_in_layout) return lines_group, blocks_group