Skip to content

Commit 72ef415

Browse files
authored
More flexible types (#139)
* Added future possible types * Added back defaults to read_doc too * Exported more types * Added more typing hints
1 parent be22061 commit 72ef415

File tree

6 files changed

+17
-14
lines changed

6 files changed

+17
-14
lines changed

paperqa/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from .docs import Answer, Docs, PromptCollection
1+
from .docs import Answer, Docs, PromptCollection, Doc, Text
22
from .version import __version__
33

4-
__all__ = ["Docs", "Answer", "PromptCollection", "__version__"]
4+
__all__ = ["Docs", "Answer", "PromptCollection", "__version__", "Doc", "Text"]

paperqa/docs.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ def add_texts(
202202
):
203203
"""Add chunked texts to the collection. This is useful if you have already chunked the texts yourself."""
204204
if doc.dockey in self.docs:
205-
raise ValueError("Document already in collection.")
205+
raise ValueError(f"Document {doc.dockey} already in collection.")
206206
if len(texts) == 0:
207207
raise ValueError("No texts to add.")
208208
if doc.docname in self.docnames:
@@ -261,9 +261,7 @@ async def adoc_match(
261261
query, k=k + len(self.deleted_dockeys)
262262
)
263263
matched_docs = [self.docs[m.metadata["dockey"]] for m in matches]
264-
chain = make_chain(
265-
self.prompts.select, cast(BaseLanguageModel, self.summary_llm)
266-
)
264+
chain = make_chain(self.prompts.select, cast(BaseLanguageModel, self.llm))
267265
papers = [f"{d.docname}: {d.citation}" for d in matched_docs]
268266
result = await chain.arun( # type: ignore
269267
question=query, papers="\n".join(papers), callbacks=get_callbacks("filter")
@@ -507,7 +505,6 @@ async def aquery(
507505
else:
508506
callbacks = get_callbacks("answer")
509507
qa_chain = make_chain(self.prompts.qa, self.llm)
510-
print(self.prompts.qa)
511508
answer_text = await qa_chain.arun(
512509
context=answer.context,
513510
answer_length=answer.answer_length,

paperqa/prompts.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
'reply "I cannot answer". '
2626
"For each part of your answer, indicate which sources most support it "
2727
"via valid citation markers at the end of sentences, like (Example2012). "
28-
"Answer in an unbiased, comp rehensive, and scholarly tone. "
28+
"Answer in an unbiased, comprehensive, and scholarly tone. "
2929
"If the question is subjective, provide an opinionated answer in the concluding 1-2 sentences. \n\n"
3030
"{context}\n"
3131
"Question: {question}\n"
@@ -34,11 +34,12 @@
3434

3535
select_paper_prompt = PromptTemplate(
3636
input_variables=["question", "papers"],
37-
template="Select papers to help answer the question below. "
37+
template="Select papers that may help answer the question below. "
3838
"Papers are listed as $KEY: $PAPER_INFO. "
3939
"Return a list of keys, separated by commas. "
4040
'Return "None", if no papers are applicable. '
41-
"Choose papers that are relevant, from reputable sources, and timely. \n\n"
41+
"Choose papers that are relevant, from reputable sources, and timely "
42+
"(if the question requires timely information). \n\n"
4243
"Question: {question}\n\n"
4344
"{papers}\n\n"
4445
"Selected keys:",

paperqa/readers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,8 @@ def parse_code_txt(path: Path, doc: Doc, chunk_chars: int, overlap: int) -> List
128128
def read_doc(
129129
path: Path,
130130
doc: Doc,
131-
chunk_chars: int,
132-
overlap: int,
131+
chunk_chars: int = 3000,
132+
overlap: int = 100,
133133
force_pypdf: bool = False,
134134
) -> List[Text]:
135135
"""Parse a document into chunks."""

paperqa/types.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from pathlib import Path
2-
from typing import Any, Callable, List, Optional, Set, Union
2+
from typing import Any, Callable, Dict, List, Optional, Set, Union
33

44
from langchain.callbacks.base import BaseCallbackHandler
55
from langchain.callbacks.manager import (
@@ -106,6 +106,11 @@ class Answer(BaseModel):
106106
dockey_filter: Optional[Set[DocKey]] = None
107107
summary_length: str = "about 100 words"
108108
answer_length: str = "about 100 words"
109+
# these two below are for convenience
110+
# and are not set. But you can set them
111+
# if you want to use them.
112+
cost: Optional[float] = None
113+
token_counts: Optional[Dict[str, List[int]]] = None
109114

110115
def __str__(self) -> str:
111116
"""Return the answer as a string."""

paperqa/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "3.0.0.dev1"
1+
__version__ = "3.0.0.dev2"

0 commit comments

Comments
 (0)