-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
148 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
Content-Type: multipart/mixed; boundary="===============8484938434343225034==" | ||
MIME-Version: 1.0 | ||
Subject: Test Email Document | ||
From: John Doe <[email protected]> | ||
To: Jane Smith <[email protected]> | ||
Date: Wed, 18 Dec 2024 10:00:00 +0000 | ||
CC: [email protected] | ||
|
||
--===============8484938434343225034== | ||
Content-Type: text/plain; charset="us-ascii" | ||
MIME-Version: 1.0 | ||
Content-Transfer-Encoding: 7bit | ||
This is a test email with multiple parts. | ||
It contains: | ||
- Plain text content | ||
- An attachment | ||
- Various headers | ||
Best regards, | ||
John Doe | ||
--===============8484938434343225034== | ||
Content-Type: application/txt | ||
MIME-Version: 1.0 | ||
Content-Transfer-Encoding: base64 | ||
Content-Disposition: attachment; filename="test.txt" | ||
VGhpcyBpcyB0ZXN0IGF0dGFjaG1lbnQgY29udGVudA== | ||
--===============8484938434343225034==-- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -126,6 +126,20 @@ | |
"髙橋淳,35,名古屋", | ||
] | ||
|
||
EML_TEST_STRINGS = [ | ||
"## Email Headers", | ||
"**From:** John Doe <[email protected]>", | ||
"**To:** Jane Smith <[email protected]>", | ||
"**Subject:** Test Email Document", | ||
"**CC:** [email protected]", | ||
"## Email Content", | ||
"This is a test email with multiple parts", | ||
"- Plain text content", | ||
"- An attachment", | ||
"## Attachments", | ||
"- test.txt (application/txt, 31 bytes)", | ||
] | ||
|
||
LLM_TEST_STRINGS = [ | ||
"5bda1dd6", | ||
] | ||
|
@@ -197,6 +211,13 @@ def test_markitdown_local() -> None: | |
text_content = result.text_content.replace("\\", "") | ||
assert test_string in text_content | ||
|
||
# Test EML processing | ||
result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.eml")) | ||
assert result.title == "Test Email Document" | ||
for test_string in EML_TEST_STRINGS: | ||
text_content = result.text_content.replace("\\", "") | ||
assert test_string in text_content | ||
|
||
# Test HTML processing | ||
result = markitdown.convert( | ||
os.path.join(TEST_FILES_DIR, "test_blog.html"), url=BLOG_TEST_URL | ||
|