Skip to content

Commit 1eab0db

Browse files
committed
feat: Added support for converting images (JPG/PNG/BMP) to Markdown #6
1 parent d4e6398 commit 1eab0db

File tree

6 files changed

+49
-9
lines changed

6 files changed

+49
-9
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ MarkPDFDown is designed to simplify the process of converting PDF documents into
2020
## Features
2121

2222
- **PDF to Markdown Conversion**: Transform any PDF document into well-formatted Markdown
23+
- **Image to Markdown Conversion**: Transform image into well-formatted Markdown
2324
- **Multimodal Understanding**: Leverages AI to comprehend document structure and content
2425
- **Format Preservation**: Maintains headings, lists, tables, and other formatting elements
2526
- **Customizable Model**: Configure the model to suit your needs
@@ -56,6 +57,9 @@ python main.py < tests/input.pdf > output.md
5657
## Advanced Usage
5758
```bash
5859
python main.py page_start page_end < tests/input.pdf > output.md
60+
61+
# image to markdown
62+
python main.py < tests/input_image.png > output.md
5963
```
6064

6165
## Docker Usage

README_zh.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ MarkPDFDown 是一款智能PDF转换Markdown工具,通过先进的多模态AI
2020
## 功能特性
2121

2222
- **PDF转Markdown**:支持任意PDF文档的格式转换
23+
- **图片转Markdown**:支持JPG/PNG/BMP图片内容转Markdown
2324
- **多模态理解**:利用AI理解文档结构和内容
2425
- **格式保留**:完整保留标题、列表、表格等排版元素
2526
- **模型定制**:支持自定义AI模型参数配置
@@ -52,6 +53,9 @@ export OPENAI_DEFAULT_MODEL=<你的模型>
5253

5354
# 运行转换程序
5455
python main.py < tests/input.pdf > output.md
56+
57+
# 图片转换
58+
python main.py < tests/input_image.png > output.md
5559
```
5660

5761
## 高级用法

core/FileWorker.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,9 @@ def create_worker(input_path: str, start_page: int = 1, end_page: int = 0):
4949
if ext == '.pdf':
5050
from .PDFWorker import PDFWorker
5151
worker = PDFWorker(input_path, start_page, end_page)
52-
# can add other types of Worker here
53-
# elif ext == '.docx':
54-
# from .DocxWorker import DocxWorker
55-
# worker = DocxWorker(input_path, start_page, end_page)
52+
elif ext == '.jpg' or ext == '.jpeg' or ext == '.png' or ext == '.bmp':
53+
from .ImageWorker import ImageWorker
54+
worker = ImageWorker(input_path)
5655
else:
5756
raise ValueError(f"Unsupported file type: {ext}")
5857

core/ImageWorker.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import os
2+
import logging
3+
from typing import List
4+
from .FileWorker import FileWorker
5+
6+
logger = logging.getLogger(__name__)
7+
8+
class ImageWorker(FileWorker):
9+
"""
10+
Worker class for processing image files
11+
"""
12+
def __init__(self, input_path: str):
13+
super().__init__(input_path)
14+
self.output_dir = os.path.dirname(input_path)
15+
logger.info("Processing image file %s", input_path)
16+
17+
18+
19+
def convert_to_images(self) -> List[str]:
20+
"""
21+
Mock function for image conversion
22+
23+
Returns:
24+
List[str]: List of generated image paths
25+
"""
26+
return [self.input_path]

main.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ def convert_image_to_markdown(image_path):
9898

9999
# Try to get extension from file name
100100
input_filename = os.path.basename(sys.stdin.buffer.name)
101-
logger.info("Input file: %s", input_filename)
102101
input_ext = os.path.splitext(input_filename)[1]
103102

104103
# If there is no extension or the file comes from standard input, try to determine the type by file content
@@ -107,10 +106,18 @@ def convert_image_to_markdown(image_path):
107106
if input_data.startswith(b'%PDF-'):
108107
input_ext = '.pdf'
109108
logger.info("Recognized as PDF file by file content")
110-
# You can add more file type detection
111-
# elif input_data.startswith(b'PK\x03\x04'): # DOCX, XLSX, etc. ZIP format files
112-
# input_ext = '.docx' # Default set to docx
113-
# logger.info("Recognized as Office document file by file content")
109+
# JPEG file magic number/signature is FF D8 FF DB
110+
elif input_data.startswith(b'\xFF\xD8\xFF\xDB'):
111+
input_ext = '.jpg'
112+
logger.info("Recognized as JPEG file by file content")
113+
# PNG file magic number/signature is 89 50 4E 47
114+
elif input_data.startswith(b'\x89\x50\x4E\x47'):
115+
input_ext = '.png'
116+
logger.info("Recognized as PNG file by file content")
117+
# BMP file magic number/signature is 42 4D
118+
elif input_data.startswith(b'\x42\x4D'):
119+
input_ext = '.bmp'
120+
logger.info("Recognized as BMP file by file content")
114121
else:
115122
logger.error("Unsupported file type")
116123
exit(1)

tests/input_image.png

625 KB
Loading

0 commit comments

Comments
 (0)