Skip to content

Commit d543259

Browse files
committed
Cleaned up test of parse_pdf_to_pages
1 parent 0751f34 commit d543259

File tree

1 file changed

+7
-10
lines changed

1 file changed

+7
-10
lines changed

tests/test_paperqa.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -989,18 +989,15 @@ async def test_pdf_reader_w_no_chunks(stub_data_dir: Path) -> None:
989989
assert docs.texts[0].embedding is None, "Should have deferred the embedding"
990990

991991

992-
def test_pdf_reader_get_text(stub_data_dir: Path) -> None:
992+
def test_parse_pdf_to_pages(stub_data_dir: Path) -> None:
993993
filepath = stub_data_dir / "pasa.pdf"
994-
parsedText = parse_pdf_to_pages(filepath)
995-
assert parsedText is not None, "Parsed text should not be None"
996-
assert parsedText.content is not None
997-
assert len(parsedText.content) > 0, "Parsed text should not be empty"
998-
assert "1" in parsedText.content, "Parsed text should contain page 1"
999-
firsttext = parsedText.content["1"]
994+
parsed_text = parse_pdf_to_pages(filepath)
995+
assert isinstance(parsed_text.content, dict)
996+
assert "1" in parsed_text.content, "Parsed text should contain page 1"
1000997
assert (
1001-
"Abstract\n\nWe introduce PaSa, an advanced Paper Search\nagent powered by large language models."
1002-
in firsttext
1003-
)
998+
"Abstract\n\nWe introduce PaSa, an advanced Paper Search"
999+
"\nagent powered by large language models."
1000+
) in parsed_text.content["1"]
10041001

10051002

10061003
@pytest.mark.vcr

0 commit comments

Comments
 (0)