Skip to content

Commit

Permalink
update: changed "method" parameter fro PdfConverter to "pdf_engine" f…
Browse files Browse the repository at this point in the history
…or better user instruction. Add examples for PdfConverter.convert() calling.
  • Loading branch information
tungsten106 committed Dec 24, 2024
1 parent 797e0d4 commit ba5df9b
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions src/markitdown/_markitdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,18 +677,23 @@ def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:

class PdfConverter(DocumentConverter):
"""
Converts PDFs to Markdown. Most style information is ignored, so the results are essentially plain-text.
Converts PDFs to Markdown. Most style information is ignored, so the results are essentially plain-text.
"""

def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
"""
Example:
>>> source = "https://arxiv.org/pdf/2308.08155v2.pdf"
>>> markitdown.convert(source, pdf_engine="pymupdf4llm")
"""
# Bail if not a PDF
extension = kwargs.get("file_extension", "")
if extension.lower() != ".pdf":
return None
method = kwargs.get("method", "pdfminer")
if method == "pdfminer":
pdf_engine = kwargs.get("pdf_engine", "pdfminer")
if pdf_engine == "pdfminer":
text_content = pdfminer.high_level.extract_text(local_path)
elif method == "pymupdf4llm":
elif pdf_engine == "pymupdf4llm":
text_content = pymupdf4llm.to_markdown(local_path, show_progress=False)
else:
return None # unknown method
Expand Down

0 comments on commit ba5df9b

Please sign in to comment.