Skip to content

Commit 7fbdec8

Browse files
authored
Update docs.py (#28)
improve add docs method to prevent citation api call for documents we already have (aka duplicate)
1 parent 4c5504d commit 7fbdec8

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

paperqa/docs.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,12 @@ def add(
110110
chunk_chars: Optional[int] = 3000,
111111
) -> None:
112112
"""Add a document to the collection."""
113-
113+
114+
# first check to see if we already have this document
115+
# this way we don't make api call to create citation on file we already have
116+
if path in self.docs:
117+
raise ValueError(f"Document {path} already in collection.")
118+
114119
if citation is None:
115120
# peak first chunk
116121
texts, _ = read_doc(path, "", "", chunk_chars=chunk_chars)
@@ -119,8 +124,7 @@ def add(
119124
if len(citation) < 3 or "Unknown" in citation or "insufficient" in citation:
120125
citation = f"Unknown, {os.path.basename(path)}, {datetime.now().year}"
121126

122-
if path in self.docs:
123-
raise ValueError(f"Document {path} already in collection.")
127+
124128
if key is None:
125129
# get first name and year from citation
126130
try:

0 commit comments

Comments
 (0)