From bd1834284edd6297e458f960d12cfd520206c914 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B5=B5=E5=B0=8F=E8=92=99?= Date: Mon, 3 Jun 2024 18:51:38 +0800 Subject: [PATCH] add version_name to middle json --- magic_pdf/libs/commons.py | 15 +++++++++++++++ magic_pdf/user_api.py | 8 +++++++- setup.py | 18 ++++-------------- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/magic_pdf/libs/commons.py b/magic_pdf/libs/commons.py index 5cf580eb..ffe69e86 100644 --- a/magic_pdf/libs/commons.py +++ b/magic_pdf/libs/commons.py @@ -1,6 +1,7 @@ import datetime import json import os, re, configparser +import subprocess import time import boto3 @@ -11,6 +12,20 @@ import fitz # 1.23.9中已经切换到rebase # import fitz_old as fitz # 使用1.23.9之前的pymupdf库 + +def get_version(): + command = ["git", "describe", "--tags"] + try: + version = subprocess.check_output(command).decode().strip() + version_parts = version.split("-") + if len(version_parts) > 1 and version_parts[0].startswith("magic_pdf"): + return version_parts[1] + else: + raise ValueError(f"Invalid version tag {version}. Expected format is magic_pdf--released.") + except Exception as e: + print(e) + return "0.0.0" + def get_delta_time(input_time): return round(time.time() - input_time, 2) diff --git a/magic_pdf/user_api.py b/magic_pdf/user_api.py index d9bce627..8f58b314 100644 --- a/magic_pdf/user_api.py +++ b/magic_pdf/user_api.py @@ -16,11 +16,11 @@ from loguru import logger +from magic_pdf.libs.commons import get_version from magic_pdf.rw import AbsReaderWriter from magic_pdf.pdf_parse_by_ocr_v2 import parse_pdf_by_ocr from magic_pdf.pdf_parse_by_txt_v2 import parse_pdf_by_txt - PARSE_TYPE_TXT = "txt" PARSE_TYPE_OCR = "ocr" @@ -39,6 +39,8 @@ def parse_txt_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWrit pdf_info_dict["_parse_type"] = PARSE_TYPE_TXT + pdf_info_dict["_version_name"] = get_version() + return pdf_info_dict @@ -57,6 +59,8 @@ def parse_ocr_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWrit pdf_info_dict["_parse_type"] = PARSE_TYPE_OCR + pdf_info_dict["_version_name"] = get_version() + return pdf_info_dict @@ -118,4 +122,6 @@ def calculate_not_printable_rate(text): else: pdf_info_dict["_parse_type"] = PARSE_TYPE_TXT + pdf_info_dict["_version_name"] = get_version() + return pdf_info_dict diff --git a/setup.py b/setup.py index db40fb5a..721d2a52 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,8 @@ from setuptools import setup, find_packages -import subprocess + +from magic_pdf.libs.commons import get_version + + def parse_requirements(filename): with open(filename) as f: lines = f.read().splitlines() @@ -15,23 +18,10 @@ def parse_requirements(filename): return requires -def get_version(): - command = ["git", "describe", "--tags"] - try: - version = subprocess.check_output(command).decode().strip() - version_parts = version.split("-") - if len(version_parts) > 1 and version_parts[0].startswith("magic_pdf"): - return version_parts[1] - else: - raise ValueError(f"Invalid version tag {version}. Expected format is magic_pdf--released.") - except Exception as e: - print(e) - return "0.0.0" setup( name="magic_pdf", # 项目名 - # version="0.1.3", # 版本号 version=get_version(), # 自动从tag中获取版本号 packages=find_packages(), # 包含所有的包 install_requires=parse_requirements('requirements.txt'), # 项目依赖的第三方库