Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ jobs:
run: echo "UV_PYTHON=${{ matrix.python-version }}" >> "$GITHUB_ENV"
- run: uv sync
- run: uv run pylint src packages
- run: uv run refurb .
- uses: suzuki-shunsuke/[email protected]
test:
runs-on: ubuntu-latest
Expand Down
2 changes: 0 additions & 2 deletions packages/paper-qa-pymupdf/src/paperqa_pymupdf/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import pymupdf
from paperqa.types import ParsedMedia, ParsedMetadata, ParsedText
from paperqa.utils import ImpossibleParsingError
from paperqa.version import __version__ as pqa_version


def setup_pymupdf_python_logging() -> None:
Expand Down Expand Up @@ -171,7 +170,6 @@ def parse_pdf_to_pages(

metadata = ParsedMetadata(
parsing_libraries=[f"{pymupdf.__name__} ({pymupdf.__version__})"],
paperqa_version=pqa_version,
total_parsed_text_length=total_length,
count_parsed_media=count_media,
parse_type="pdf",
Expand Down
6 changes: 3 additions & 3 deletions packages/paper-qa-pymupdf/tests/test_paperqa_pymupdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,14 @@ async def test_parse_pdf_to_pages() -> None:
fig_1_text.text = "stub" # Replace text to confirm multimodality works
docs = Docs()
assert await docs.aadd_texts(texts=[fig_1_text], doc=doc)
for query, substrings_min_counts in [
for query, substrings_min_counts in (
("What actions can the Crawler take?", [(("search", "expand", "stop"), 2)]),
("What actions can the Selector take?", [(("select", "drop"), 2)]),
(
"How many User Query are there, and what do they do?",
[(("two", "2"), 2), (("crawler", "selector"), 2)],
),
]:
):
session = await docs.aquery(query=query)
assert session.contexts, "Expected contexts to be generated"
assert all(
Expand All @@ -107,7 +107,7 @@ async def test_parse_pdf_to_pages() -> None:
assert page_text
assert full_page_image.index == 0, "Full page image should have index 0"
assert isinstance(full_page_image.data, bytes)
assert len(full_page_image.data) > 0, "Full page image should have data"
assert full_page_image.data, "Full page image should have data"
# Check useful attributes are present and are JSON serializable
json.dumps(p2_image.info)
for attr in ("width", "height"):
Expand Down
2 changes: 0 additions & 2 deletions packages/paper-qa-pypdf/src/paperqa_pypdf/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import pypdf
from paperqa.types import ParsedMedia, ParsedMetadata, ParsedText
from paperqa.utils import ImpossibleParsingError
from paperqa.version import __version__ as pqa_version

try:
import pypdfium2 as pdfium
Expand Down Expand Up @@ -96,7 +95,6 @@ def parse_pdf_to_pages(
else pypdf_version_str
)
],
paperqa_version=pqa_version,
total_parsed_text_length=total_length,
count_parsed_media=count_media,
parse_type="pdf",
Expand Down
4 changes: 2 additions & 2 deletions packages/paper-qa-pypdf/tests/test_paperqa_pypdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,14 @@ async def test_parse_pdf_to_pages() -> None:
fig_1_text.text = "stub" # Replace text to confirm multimodality works
docs = Docs()
assert await docs.aadd_texts(texts=[fig_1_text], doc=doc)
for query, substrings_min_counts in [
for query, substrings_min_counts in (
("What actions can the Crawler take?", [(("search", "expand", "stop"), 2)]),
("What actions can the Selector take?", [(("select", "drop"), 2)]),
(
"How many User Query are there, and what do they do?",
[(("two", "2"), 2), (("crawler", "selector"), 2)],
),
]:
):
session = await docs.aquery(query=query)
assert session.contexts, "Expected contexts to be generated"
assert all(
Expand Down
8 changes: 2 additions & 6 deletions src/paperqa/agents/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,12 +263,8 @@ async def gather_evidence(self, question: str, state: EnvironmentState) -> str:
)

top_contexts = "\n\n".join(
[
f"- {sc.context}"
for n, sc in enumerate(
sorted_contexts[: self.settings.agent.agent_evidence_n]
)
]
f"- {sc.context}"
for sc in sorted_contexts[: self.settings.agent.agent_evidence_n]
)

best_evidence = (
Expand Down
4 changes: 1 addition & 3 deletions src/paperqa/clients/journal_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,7 @@ async def main() -> None:
)
records = await process_csv(downloaded_path)

with open( # noqa: ASYNC230
DEFAULT_JOURNAL_QUALITY_CSV_PATH, "w", encoding="utf-8"
) as csvfile:
with DEFAULT_JOURNAL_QUALITY_CSV_PATH.open("w", encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["clean_name", "quality"])
for name, quality in records:
Expand Down
2 changes: 1 addition & 1 deletion src/paperqa/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ def string_to_bytes(value: str) -> bytes:
"""Convert a base64-encoded string to bytes."""
# 1. Convert base64 string to base64 bytes
# 2. Convert base64 bytes to original bytes
return base64.b64decode(value.encode("utf-8"))
return base64.b64decode(value.encode("utf-8")) # noqa: FURB120


def validate_image(path: StrOrBytesPath | IO[bytes]) -> None:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_paperqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -1470,7 +1470,7 @@ async def test_read_doc_images_metadata(stub_data_dir: Path) -> None:
assert isinstance(parsed_image, ParsedMedia)
assert parsed_image.index == 0
assert isinstance(parsed_image.data, bytes)
assert len(parsed_image.data) > 0
assert parsed_image.data
assert not parsed_image.text, "Expected no text content for a standalone image"
assert parsed_image.info["suffix"] == ".png"
image_id = parsed_image.to_id()
Expand Down