MarkPDFdown · jorben · May 24, 2025 · May 24, 2025
diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml
@@ -25,4 +25,4 @@ jobs:
           changelog_filename: CHANGELOG.md
           committer_username: 'github-actions[bot]'
           committer_email: 'github-actions[bot]@users.noreply.github.com'
-          release_version: ${{ github.event.inputs.release_version }}
+          release_version: ${{ github.event.inputs.release_version }}
diff --git a/.gitignore b/.gitignore
@@ -175,4 +175,4 @@ cython_debug/
 .pypirc
 
 # MacOS ds_store files
-.DS_Store
+.DS_Store
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,17 @@
+repos:
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: v0.11.11
+    hooks:
+      - id: ruff
+        args: [ --fix ]
+      - id: ruff-format
+
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0 # 2025-05-24
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-toml
+      - id: check-merge-conflict
+      - id: check-added-large-files
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.9.22
diff --git a/Dockerfile b/Dockerfile
@@ -1,5 +1,5 @@
 from python:3.9-slim
 WORKDIR /app
 COPY . /app
-RUN pip install -r requirements.txt
-CMD ["python", "main.py"]
+RUN pip install -e .
+CMD ["python", "main.py"]
diff --git a/Makefile b/Makefile
@@ -0,0 +1,23 @@
+.PHONY: install format lint test check update
+
+install:
+	uv sync
+
+format:
+	uv run ruff format .
+
+lint:
+	uv run ruff check .
+
+fix:
+	uv run ruff check . --fix
+
+test:
+	uv run pytest
+
+check:
+	uv run pre-commit run --all-files
+
+update:
+	uv lock --upgrade
+	uv run pre-commit autoupdate
diff --git a/README.md b/README.md
@@ -30,6 +30,23 @@ MarkPDFDown is designed to simplify the process of converting PDF documents into
 
 ## Installation
 
+### Using uv (Recommended)
+
+```bash
+# Install uv if you haven't already
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# Clone the repository
+git clone https://github.com/MarkPDFdown/markpdfdown.git
+cd markpdfdown
+
+# Install dependencies and create virtual environment
+uv sync
+
+```
+
+### Using conda
+
 ```bash
 conda create -n markpdfdown python=3.9
 conda activate markpdfdown
@@ -39,17 +56,16 @@ git clone https://github.com/MarkPDFdown/markpdfdown.git
 cd markpdfdown
 
 # Install dependencies
-pip install -r requirements.txt
-
+pip install -e .
 ```
 ## Usage
 ```bash
 # Set up your OpenAI API key
-export OPENAI_API_KEY=<your-api-key>
+export OPENAI_API_KEY="your-api-key"
 # Optionally, set up your OpenAI API base
-export OPENAI_API_BASE=<your-api-base>
+export OPENAI_API_BASE="your-api-base"
 # Optionally, set up your OpenAI API model
-export OPENAI_DEFAULT_MODEL=<your-model>
+export OPENAI_DEFAULT_MODEL="your-model"
 
 # pdf to markdown
 python main.py < tests/input.pdf > output.md
@@ -64,22 +80,75 @@ python main.py page_start page_end < tests/input.pdf > output.md
 
 ## Docker Usage
 ```bash
-docker run -i -e OPENAI_API_KEY=<your-api-key> -e OPENAI_API_BASE=<your-api-base> -e OPENAI_DEFAULT_MODEL=<your-model> jorbenzhu/markpdfdown < input.pdf > output.md
+docker run -i -e OPENAI_API_KEY=your-api-key -e OPENAI_API_BASE=your-api-base -e OPENAI_DEFAULT_MODEL=your-model jorbenzhu/markpdfdown < input.pdf > output.md
+```
+
+## Development Setup
+
+### Code Quality Tools
+
+This project uses `ruff` for linting and formatting, and `pre-commit` for automated code quality checks.
+
+#### Install development dependencies
+
+```bash
+# If using uv
+uv sync --group dev
+
+# If using pip
+pip install -e ".[dev]"
+```
+
+#### Set up pre-commit hooks
+
+```bash
+# Install pre-commit hooks
+pre-commit install
+
+# Run pre-commit on all files (optional)
+pre-commit run --all-files
+```
+
+#### Code formatting and linting
+
+```bash
+# Format code with ruff
+ruff format
+
+# Run linting checks
+ruff check
+
+# Fix auto-fixable issues
+ruff check --fix
 ```
 
 ## Requirements
 - Python 3.9+
-- Dependencies listed in `requirements.txt`
+- [uv](https://astral.sh/uv/) (recommended for package management) or conda/pip
+- Dependencies specified in `pyproject.toml`
 - Access to the specified multimodal AI model
 
 ## Contributing
 Contributions are welcome! Please feel free to submit a Pull Request.
 
 1. Fork the repository
 2. Create your feature branch ( `git checkout -b feature/amazing-feature` )
-3. Commit your changes ( `git commit -m 'feat: Add some amazing feature'` )
-4. Push to the branch ( `git push origin feature/amazing-feature` )
-5. Open a Pull Request
+3. Set up the development environment:
+   ```bash
+   uv sync --group dev
+   pre-commit install
+   ```
+4. Make your changes and ensure code quality:
+   ```bash
+   ruff format
+   ruff check --fix
+   pre-commit run --all-files
+   ```
+5. Commit your changes ( `git commit -m 'feat: Add some amazing feature'` )
+6. Push to the branch ( `git push origin feature/amazing-feature` )
+7. Open a Pull Request
+
+Please ensure your code follows the project's coding standards by running the linting and formatting tools before submitting.
 
 ## License
 This project is licensed under the Apache License 2.0. See the LICENSE file for details.

diff --git a/README_zh.md b/README_zh.md
@@ -30,6 +30,22 @@ MarkPDFDown 是一款智能PDF转换Markdown工具，通过先进的多模态AI
 
 ## 安装指南
 
+### 使用 uv（推荐）
+
+```bash
+# 安装 uv（如果尚未安装）
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# 克隆仓库
+git clone https://github.com/MarkPDFdown/markpdfdown.git
+cd markpdfdown
+
+# 安装依赖并创建虚拟环境
+uv sync
+```
+
+### 使用 conda
+
 ```bash
 conda create -n markpdfdown python=3.9
 conda activate markpdfdown
@@ -39,7 +55,7 @@ git clone https://github.com/MarkPDFdown/markpdfdown.git
 cd markpdfdown
 
 # 安装依赖
-pip install -r requirements.txt
+pip install -e .
 ```
 
 ## 使用指南
@@ -69,19 +85,72 @@ python main.py 起始页码 结束页码 < input.pdf > output.md
 docker run -i -e OPENAI_API_KEY=<你的API密钥> -e OPENAI_API_BASE=<你的API端点> -e OPENAI_DEFAULT_MODEL=<你的模型> jorbenzhu/markpdfdown < input.pdf > output.md
 ```
 
+## 开发环境设置
+
+### 代码质量工具
+
+本项目使用 `ruff` 进行代码检查和格式化，使用 `pre-commit` 进行自动化代码质量检查。
+
+#### 安装开发依赖
+
+```bash
+# 如果使用 uv
+uv sync --group dev
+
+# 如果使用 pip
+pip install -e ".[dev]"
+```
+
+#### 设置 pre-commit 钩子
+
+```bash
+# 安装 pre-commit 钩子
+pre-commit install
+
+# 在所有文件上运行 pre-commit（可选）
+pre-commit run --all-files
+```
+
+#### 代码格式化和检查
+
+```bash
+# 使用 ruff 格式化代码
+ruff format
+
+# 运行代码检查
+ruff check
+
+# 修复可自动修复的问题
+ruff check --fix
+```
+
 ## 依赖环境
 - Python 3.9+
-- 依赖库详见 `requirements.txt`
+- [uv](https://astral.sh/uv/)（推荐的包管理工具）或 conda/pip
+- 项目依赖详见 `pyproject.toml`
 - 可访问的多模态AI模型服务
 
 ## 贡献指南
 欢迎贡献代码！请按以下流程提交PR：
 
 1. Fork 本仓库
 2. 新建功能分支（ `git checkout -b feature/somefeat` ）
-3. 提交修改（ `git commit -m 'feat: 添加XX新功能'` ）
-4. 推送分支（ `git push origin feature/somefeat` ）
-5. 提交Pull Request
+3. 设置开发环境：
+   ```bash
+   uv sync --group dev
+   pre-commit install
+   ```
+4. 进行修改并确保代码质量：
+   ```bash
+   ruff format
+   ruff check --fix
+   pre-commit run --all-files
+   ```
+5. 提交修改（ `git commit -m 'feat: 添加XX新功能'` ）
+6. 推送分支（ `git push origin feature/somefeat` ）
+7. 提交Pull Request
+
+请确保在提交前运行代码检查和格式化工具，以符合项目的代码规范。
 
 ## 开源协议
 本项目采用 Apache License 2.0 开源协议，详见 LICENSE 文件。
@@ -97,4 +166,4 @@ docker run -i -e OPENAI_API_KEY=<你的API密钥> -e OPENAI_API_BASE=<你的API
 [Size]: https://img.shields.io/docker/image-size/jorbenzhu/markpdfdown/latest?color=066da5&label=size
 [Pulls]: https://img.shields.io/docker/pulls/jorbenzhu/markpdfdown.svg?style=flat&label=pulls&logo=docker
 [Tag]: https://img.shields.io/github/release/markpdfdown/markpdfdown.svg
-[License]: https://img.shields.io/github/license/markpdfdown/markpdfdown
+[License]: https://img.shields.io/github/license/markpdfdown/markpdfdown
diff --git a/core/FileWorker.py b/core/FileWorker.py
@@ -1,58 +1,62 @@
-import os
 import logging
-from typing import List
+import os
 
 logger = logging.getLogger(__name__)
 
+
 class FileWorker:
     """
     Base class Worker, define common interface
     """
+
     def __init__(self, input_path: str):
         """
         Initialize Worker
-        
+
         Args:
             input_path (str): Input file path
         """
         self.input_path = input_path
-        
-    def convert_to_images(self, output_dir: str = ".", **kwargs) -> List[str]:
+
+    def convert_to_images(self, output_dir: str = ".", **kwargs) -> list[str]:
         """
         Convert input file to images
-        
+
         Args:
             output_dir (str): Output directory
             **kwargs: Other parameters
-            
+
         Returns:
             List[str]: List of generated image paths
         """
         raise NotImplementedError("Subclasses must implement this method")
 
+
 def create_worker(input_path: str, start_page: int = 1, end_page: int = 0):
     """
     Create corresponding Worker instance based on file extension
-    
+
     Args:
         input_path (str): Input file path
         start_page (int): Starting page number
         end_page (int): Ending page number
-        
+
     Returns:
         Worker: Worker instance
     """
     _, ext = os.path.splitext(input_path)
     ext = ext.lower()
-    
+
     worker = None
-    if ext == '.pdf':
+    if ext == ".pdf":
         from .PDFWorker import PDFWorker
+
         worker = PDFWorker(input_path, start_page, end_page)
-    elif ext == '.jpg' or ext == '.jpeg' or ext == '.png' or ext == '.bmp':
+    elif ext == ".jpg" or ext == ".jpeg" or ext == ".png" or ext == ".bmp":
         from .ImageWorker import ImageWorker
+
         worker = ImageWorker(input_path)
     else:
         raise ValueError(f"Unsupported file type: {ext}")
-        
-    return worker 
+
+    return worker