Skip to content

Commit

Permalink
Pre-commit
Browse files Browse the repository at this point in the history
  • Loading branch information
MauroDruwel committed Jan 4, 2025
1 parent 3a6f023 commit 1b0d149
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
12 changes: 7 additions & 5 deletions src/markitdown/_markitdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,18 +705,18 @@ def sanitize_filename(self, name: str) -> str:
"""Sanitizes a string to make it a valid file name across different operating systems."""
# Normalize underscore
name = re.sub(r"\s+", "_", name.strip())

# Replace invalid characters with underscores
name = re.sub(r'[\\/*?:"<>|]', "_", name)

# Remove leading and trailing dots and spaces
name = name.strip(" .")

# Limit the length of the filename to a reasonable length (e.g., 251 characters)
max_length = 251
if len(name) > max_length:
name = name[:max_length]

return name

def truncate_filename(self, name: str, max_length: int, extension: str = "") -> str:
Expand All @@ -736,7 +736,9 @@ def unique_filename(self, base_path: str, max_length: int = 251) -> str:
while os.path.exists(unique_path):
suffix = f"_{counter}"
# Ensure base is short enough to add the suffix
truncated_base = self.truncate_filename(base, max_length - len(suffix) - len(ext))
truncated_base = self.truncate_filename(
base, max_length - len(suffix) - len(ext)
)
unique_path = f"{truncated_base}{suffix}{ext}"
counter += 1

Expand Down
2 changes: 1 addition & 1 deletion tests/test_markitdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
"# Abstract",
"# Introduction",
"AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
"GitHub_-_microsoft_markitdown__Python_tool_for_converting_files_and_office_documents_to_Markdown.png"
"GitHub_-_microsoft_markitdown__Python_tool_for_converting_files_and_office_documents_to_Markdown.png",
]

PPTX_TEST_STRINGS = [
Expand Down

0 comments on commit 1b0d149

Please sign in to comment.