Skip to content

Commit

Permalink
add version_name to middle json
Browse files Browse the repository at this point in the history
  • Loading branch information
myhloli committed Jun 3, 2024
1 parent 496045f commit bd18342
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 15 deletions.
15 changes: 15 additions & 0 deletions magic_pdf/libs/commons.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime
import json
import os, re, configparser
import subprocess
import time

import boto3
Expand All @@ -11,6 +12,20 @@
import fitz # 1.23.9中已经切换到rebase
# import fitz_old as fitz # 使用1.23.9之前的pymupdf库


def get_version():
command = ["git", "describe", "--tags"]
try:
version = subprocess.check_output(command).decode().strip()
version_parts = version.split("-")
if len(version_parts) > 1 and version_parts[0].startswith("magic_pdf"):
return version_parts[1]
else:
raise ValueError(f"Invalid version tag {version}. Expected format is magic_pdf-<version>-released.")
except Exception as e:
print(e)
return "0.0.0"

def get_delta_time(input_time):
return round(time.time() - input_time, 2)

Expand Down
8 changes: 7 additions & 1 deletion magic_pdf/user_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@

from loguru import logger

from magic_pdf.libs.commons import get_version
from magic_pdf.rw import AbsReaderWriter
from magic_pdf.pdf_parse_by_ocr_v2 import parse_pdf_by_ocr
from magic_pdf.pdf_parse_by_txt_v2 import parse_pdf_by_txt


PARSE_TYPE_TXT = "txt"
PARSE_TYPE_OCR = "ocr"

Expand All @@ -39,6 +39,8 @@ def parse_txt_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWrit

pdf_info_dict["_parse_type"] = PARSE_TYPE_TXT

pdf_info_dict["_version_name"] = get_version()

return pdf_info_dict


Expand All @@ -57,6 +59,8 @@ def parse_ocr_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWrit

pdf_info_dict["_parse_type"] = PARSE_TYPE_OCR

pdf_info_dict["_version_name"] = get_version()

return pdf_info_dict


Expand Down Expand Up @@ -118,4 +122,6 @@ def calculate_not_printable_rate(text):
else:
pdf_info_dict["_parse_type"] = PARSE_TYPE_TXT

pdf_info_dict["_version_name"] = get_version()

return pdf_info_dict
18 changes: 4 additions & 14 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from setuptools import setup, find_packages
import subprocess

from magic_pdf.libs.commons import get_version


def parse_requirements(filename):
with open(filename) as f:
lines = f.read().splitlines()
Expand All @@ -15,23 +18,10 @@ def parse_requirements(filename):

return requires

def get_version():
command = ["git", "describe", "--tags"]
try:
version = subprocess.check_output(command).decode().strip()
version_parts = version.split("-")
if len(version_parts) > 1 and version_parts[0].startswith("magic_pdf"):
return version_parts[1]
else:
raise ValueError(f"Invalid version tag {version}. Expected format is magic_pdf-<version>-released.")
except Exception as e:
print(e)
return "0.0.0"


setup(
name="magic_pdf", # 项目名
# version="0.1.3", # 版本号
version=get_version(), # 自动从tag中获取版本号
packages=find_packages(), # 包含所有的包
install_requires=parse_requirements('requirements.txt'), # 项目依赖的第三方库
Expand Down

0 comments on commit bd18342

Please sign in to comment.