Skip to content

Commit 8a1d632

Browse files
committed
Replaced Fals returns with ValueErrors
1 parent d0866d2 commit 8a1d632

File tree

3 files changed

+22
-9
lines changed

3 files changed

+22
-9
lines changed

paperqa/docs.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,25 @@ def __init__(self, chunk_size_limit: int = 3000) -> None:
4141
self.chunk_size_limit = chunk_size_limit
4242
self.keys = set()
4343

44-
def add(self, path: str, citation: str, key: Optional[str] = None) -> bool:
44+
def add(
45+
self,
46+
path: str,
47+
citation: str,
48+
key: Optional[str] = None,
49+
disable_check: bool = False,
50+
) -> None:
4551
"""Add a document to the collection."""
4652
if path in self.docs:
47-
return False
53+
raise ValueError(f"Document {path} already in collection.")
4854
if key is None:
4955
# get first name and year from citation
5056
try:
5157
author = re.search(r"([A-Z][a-z]+)", citation).group(1)
5258
except AttributeError:
5359
# panicking - no word??
54-
return False
60+
raise ValueError(
61+
f"Could not parse author from citation {citation}. Consider just passing key explicitly"
62+
)
5563
try:
5664
year = re.search(r"(\d{4})", citation).group(1)
5765
except AttributeError:
@@ -70,18 +78,20 @@ def add(self, path: str, citation: str, key: Optional[str] = None) -> bool:
7078
data = {"citation": citation, "key": key}
7179
d = gpt_index.SimpleDirectoryReader(input_files=[path]).load_data()
7280
# loose check to see if document was loaded
73-
if not maybe_is_text(d[0].text):
74-
return False
81+
if not disable_check and not maybe_is_text(d[0].text):
82+
raise ValueError(
83+
f"This does not look like a text document: {path}. Path disable_check to ignore this error."
84+
)
7585
with HiddenPrints():
7686
try:
7787
i = gpt_index.GPTSimpleVectorIndex(
7888
d, chunk_size_limit=self.chunk_size_limit
7989
)
8090
except UnicodeEncodeError:
81-
return False
91+
# want to make this a valueerror so we can catch it
92+
raise ValueError(f"Failed to load document {path}.")
8293
data["index"] = i
8394
self.docs[path] = data
84-
return True
8595

8696
# to pickle, we have to save the index as a file
8797
def __getstate__(self):

paperqa/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.3"
1+
__version__ = "0.0.4"

tests/test_paperqa.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,10 @@ def test_repeat_keys():
9999
f.write(r.text)
100100
docs = paperqa.Docs()
101101
docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now")
102-
docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now")
102+
try:
103+
docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now")
104+
except ValueError:
105+
pass
103106
assert len(docs.docs) == 1
104107

105108
# now with different paths

0 commit comments

Comments
 (0)