From d986e39313117bb6e481d68e2ef979f58c7cc843 Mon Sep 17 00:00:00 2001 From: myhloli Date: Fri, 17 Jan 2025 22:49:42 +0800 Subject: [PATCH] feat(llm_aided): add reasonability check and fine-tuning guidelines - Added instructions for checking the reasonability of heading levels - Included guidelines for making fine adjustments based on context and logic - Emphasized the importance of aligning the final result with the document's actual structure --- magic_pdf/post_proc/llm_aided.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/magic_pdf/post_proc/llm_aided.py b/magic_pdf/post_proc/llm_aided.py index c2eb3c86..5149cb07 100644 --- a/magic_pdf/post_proc/llm_aided.py +++ b/magic_pdf/post_proc/llm_aided.py @@ -115,6 +115,11 @@ def llm_aided_title(pdf_info_dict, title_aided_config): - 标题层级最多为4级,不要添加过多的层级 - 优化后的标题只保留代表该标题的层级的整数,不要保留其他信息 +5. 合理性检查与微调: + - 在完成初步分级后,仔细检查分级结果的合理性 + - 根据上下文关系和逻辑顺序,对不合理的分级进行微调 + - 确保最终的分级结果符合文档的实际结构和逻辑 + IMPORTANT: 请直接返回优化过的由标题层级组成的json,格式如下: {{"0":1,"1":2,"2":2,"3":3}}