Added memory to query (#140)

whitead · web-flow · commit 5926831ffe5d · 2023-06-14T01:51:49.000-04:00
* Completed memory implementation

* Fixed some missing types
diff --git a/README.md b/README.md
@@ -187,11 +187,12 @@ Version 3 includes many changes to type the code, make it more focused/modular,
 
 The following new features are in v3:
 
-1. `add_url` and `add_file` are now supported for adding from URLs and file objects
-2. Prompts can be customized, and now can be executed pre and post query
-3. Consistent use of `dockey` and `docname` for unique and natural language names enable better tracking with external databases
-4. Texts and embeddings are no longer required to be part of `Docs` object, so you can use external databases or other strategies to manage them
-5. Various simplifications, bug fixes, and performance improvements
+1. Memory is now possible in `query` by setting `Docs(memory=True)` - this means follow-up questions will have a record of the previous question and answer.
+2. `add_url` and `add_file` are now supported for adding from URLs and file objects
+3. Prompts can be customized, and now can be executed pre and post query
+4. Consistent use of `dockey` and `docname` for unique and natural language names enable better tracking with external databases
+5. Texts and embeddings are no longer required to be part of `Docs` object, so you can use external databases or other strategies to manage them
+6. Various simplifications, bug fixes, and performance improvements
 
 ### Naming
 
diff --git a/paperqa/chains.py b/paperqa/chains.py
@@ -8,12 +8,20 @@
 )
 from langchain.chains import LLMChain
 from langchain.chat_models import ChatOpenAI
-from langchain.prompts import StringPromptTemplate
+from langchain.memory.chat_memory import BaseChatMemory
+from langchain.prompts import BasePromptTemplate, PromptTemplate, StringPromptTemplate
 from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate
 from langchain.schema import LLMResult, SystemMessage
 
 from .types import CBManager
 
+memory_prompt = PromptTemplate(
+    input_variables=["memory", "start"],
+    template="Previous answers that may be helpful:\n\n{memory}\n\n"
+    "----------------------------------------\n\n"
+    "{start}",
+)
+
 
 class FallbackLLMChain(LLMChain):
     """Chain that falls back to synchronous generation if the async generation fails."""
@@ -32,16 +40,44 @@ async def agenerate(
             return self.generate(input_list)
 
 
+# TODO: If upstream is fixed remove this
+
+
+class ExtendedHumanMessagePromptTemplate(HumanMessagePromptTemplate):
+    prompt: BasePromptTemplate
+
+
 def make_chain(
-    prompt: StringPromptTemplate, llm: BaseLanguageModel, skip_system: bool = False
+    prompt: StringPromptTemplate,
+    llm: BaseLanguageModel,
+    skip_system: bool = False,
+    memory: Optional[BaseChatMemory] = None,
 ) -> FallbackLLMChain:
+    if memory and len(memory.load_memory_variables({})["memory"]) > 0:
+        # we copy the prompt so we don't modify the original
+        # TODO: Figure out pipeline prompts to avoid this
+        # the problem with pipeline prompts is that
+        # the memory is a constant (or partial), not  a prompt
+        # and I cannot seem to make an empty prompt (or str)
+        # work as an input to pipeline prompt
+        assert isinstance(
+            prompt, PromptTemplate
+        ), "Memory only works with prompt templates - see comment above"
+        assert "memory" in memory.load_memory_variables({})
+        new_prompt = PromptTemplate(
+            input_variables=prompt.input_variables,
+            template=memory_prompt.format(
+                start=prompt.template, **memory.load_memory_variables({})
+            ),
+        )
+        prompt = new_prompt
     if type(llm) == ChatOpenAI:
         system_message_prompt = SystemMessage(
             content="Answer in an unbiased, concise, scholarly tone. "
             "You may refuse to answer if there is insufficient information. "
             "If there are ambiguous terms or acronyms, first define them. ",
         )
-        human_message_prompt = HumanMessagePromptTemplate(prompt=prompt)
+        human_message_prompt = ExtendedHumanMessagePromptTemplate(prompt=prompt)
         if skip_system:
             chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
         else:
diff --git a/paperqa/docs.py b/paperqa/docs.py
@@ -12,6 +12,8 @@
 from langchain.chat_models import ChatOpenAI
 from langchain.embeddings.base import Embeddings
 from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.memory import ConversationTokenBufferMemory
+from langchain.memory.chat_memory import BaseChatMemory
 from langchain.vectorstores import FAISS, VectorStore
 from pydantic import BaseModel, validator
 
@@ -48,19 +50,39 @@ class Docs(BaseModel, arbitrary_types_allowed=True, smart_union=True):
     max_concurrent: int = 5
     deleted_dockeys: Set[DocKey] = set()
     prompts: PromptCollection = PromptCollection()
+    memory: bool = False
+    memory_model: Optional[BaseChatMemory] = None
 
     # TODO: Not sure how to get this to work
     # while also passing mypy checks
     @validator("llm", "summary_llm")
     def check_llm(cls, v: Union[BaseLanguageModel, str]) -> BaseLanguageModel:
         if type(v) is str:
             return ChatOpenAI(temperature=0.1, model=v, client=None)
-        return v
+        return cast(BaseLanguageModel, v)
 
     @validator("summary_llm", always=True)
     def copy_llm_if_not_set(cls, v, values):
         return v or values["llm"]
 
+    @validator("memory_model", always=True)
+    def check_memory_model(cls, v, values):
+        if values["memory"]:
+            if v is None:
+                return ConversationTokenBufferMemory(
+                    llm=values["summary_llm"],
+                    max_token_limit=512,
+                    memory_key="memory",
+                    human_prefix="Question",
+                    ai_prefix="Answer",
+                    input_key="Question",
+                    output_key="Answer",
+                )
+            if v.memory_variables()[0] != "memory":
+                raise ValueError("Memory model must have memory_variables=['memory']")
+            return values["memory_model"]
+        return None
+
     def update_llm(
         self,
         llm: Union[BaseLanguageModel, str],
@@ -76,7 +98,7 @@ def update_llm(
             summary_llm = llm
         self.summary_llm = cast(BaseLanguageModel, summary_llm)
 
-    def get_unique_name(self, docname: str) -> str:
+    def _get_unique_name(self, docname: str) -> str:
         """Create a unique name given proposed name"""
         suffix = ""
         while docname + suffix in self.docnames:
@@ -182,12 +204,14 @@ def add(
             if match is not None:
                 year = match.group(1)  # type: ignore
             docname = f"{author}{year}"
-        docname = self.get_unique_name(docname)
+        docname = self._get_unique_name(docname)
         doc = Doc(docname=docname, citation=citation, dockey=dockey)
         texts = read_doc(path, doc, chunk_chars=chunk_chars, overlap=100)
         # loose check to see if document was loaded
-        if len(texts[0].text) < 10 or (
-            not disable_check and not maybe_is_text(texts[0].text)
+        if (
+            len(texts) == 0
+            or len(texts[0].text) < 10
+            or (not disable_check and not maybe_is_text(texts[0].text))
         ):
             raise ValueError(
                 f"This does not look like a text document: {path}. Path disable_check to ignore this error."
@@ -206,7 +230,7 @@ def add_texts(
         if len(texts) == 0:
             raise ValueError("No texts to add.")
         if doc.docname in self.docnames:
-            new_docname = self.get_unique_name(doc.docname)
+            new_docname = self._get_unique_name(doc.docname)
             for t in texts:
                 t.name = t.name.replace(doc.docname, new_docname)
             doc.docname = new_docname
@@ -261,7 +285,9 @@ async def adoc_match(
             query, k=k + len(self.deleted_dockeys)
         )
         matched_docs = [self.docs[m.metadata["dockey"]] for m in matches]
-        chain = make_chain(self.prompts.select, cast(BaseLanguageModel, self.llm))
+        chain = make_chain(
+            self.prompts.select, cast(BaseLanguageModel, self.llm), skip_system=True
+        )
         papers = [f"{d.docname}: {d.citation}" for d in matched_docs]
         result = await chain.arun(  # type: ignore
             question=query, papers="\n".join(papers), callbacks=get_callbacks("filter")
@@ -298,6 +324,11 @@ def _build_texts_index(self):
                 metadatas=metadatas,
             )
 
+    def clear_memory(self):
+        """Clear the memory of the model."""
+        if self.memory_model is not None:
+            self.memory_model.clear()
+
     def get_evidence(
         self,
         answer: Answer,
@@ -375,7 +406,9 @@ async def aget_evidence(
 
         async def process(match):
             callbacks = get_callbacks("evidence:" + match.metadata["name"])
-            summary_chain = make_chain(self.prompts.summary, self.summary_llm)
+            summary_chain = make_chain(
+                self.prompts.summary, self.summary_llm, memory=self.memory_model
+            )
             # This is dangerous because it
             # could mask errors that are important- like auth errors
             # I also cannot know what the exception
@@ -391,7 +424,7 @@ async def process(match):
                     callbacks=callbacks,
                 )
             except Exception as e:
-                if guess_is_4xx(e):
+                if guess_is_4xx(str(e)):
                     return None
                 raise e
             if "not applicable" in context.lower():
@@ -476,9 +509,9 @@ async def aquery(
         if answer is None:
             answer = Answer(question=query, answer_length=length_prompt)
         if len(answer.contexts) == 0:
-            # this is heuristic - max_sources and len(docs) are not
+            # this is heuristic - k and len(docs) are not
             # comparable - one is chunks and one is docs
-            if key_filter or (key_filter is None and len(self.docs) > max_sources):
+            if key_filter or (key_filter is None and len(self.docs) > k):
                 keys = await self.adoc_match(
                     answer.question, get_callbacks=get_callbacks
                 )
@@ -492,19 +525,27 @@ async def aquery(
                 get_callbacks=get_callbacks,
             )
         if self.prompts.pre is not None:
-            chain = make_chain(self.prompts.pre, self.llm)
+            chain = make_chain(
+                self.prompts.pre,
+                cast(BaseLanguageModel, self.llm),
+                memory=self.memory_model,
+            )
             pre = await chain.arun(
                 question=answer.question, callbacks=get_callbacks("pre")
             )
             answer.context = pre + "\n\n" + answer.context
         bib = dict()
-        if len(answer.context) < 10:
+        if len(answer.context) < 10 and not self.memory:
             answer_text = (
                 "I cannot answer this question due to insufficient information."
             )
         else:
             callbacks = get_callbacks("answer")
-            qa_chain = make_chain(self.prompts.qa, self.llm)
+            qa_chain = make_chain(
+                self.prompts.qa,
+                cast(BaseLanguageModel, self.llm),
+                memory=self.memory_model,
+            )
             answer_text = await qa_chain.arun(
                 context=answer.context,
                 answer_length=answer.answer_length,
@@ -531,11 +572,20 @@ async def aquery(
         answer.references = bib_str
 
         if self.prompts.post is not None:
-            chain = make_chain(self.prompts.post, self.llm)
+            chain = make_chain(
+                self.prompts.post,
+                cast(BaseLanguageModel, self.llm),
+                memory=self.memory_model,
+            )
             post = await chain.arun(**answer.dict(), callbacks=get_callbacks("post"))
             answer.answer = post
             answer.formatted_answer = f"Question: {query}\n\n{post}\n"
             if len(bib) > 0:
                 answer.formatted_answer += f"\nReferences\n\n{bib_str}\n"
+        if self.memory_model is not None:
+            answer.memory = self.memory_model.load_memory_variables(inputs={})["memory"]
+            self.memory_model.save_context(
+                {"Question": answer.question}, {"Answer": answer.answer}
+            )
 
         return answer
diff --git a/paperqa/prompts.py b/paperqa/prompts.py
@@ -6,13 +6,13 @@
     input_variables=["text", "citation", "question", "summary_length"],
     template="Summarize the text below to help answer a question. "
     "Do not directly answer the question, instead summarize "
-    "to give evidence to help answer the question. Include direct quotes. "
+    "to give evidence to help answer the question. "
     'Reply "Not applicable" if text is irrelevant. '
     "Use {summary_length}. At the end of your response, provide a score from 1-10 on a newline "
     "indicating relevance to question. Do not explain your score. "
     "\n\n"
-    "{text}\n"
-    "Extracted from {citation}\n"
+    "{text}\n\n"
+    "Excerpt from {citation}\n"
     "Question: {question}\n"
     "Relevant Information Summary:",
 )
diff --git a/paperqa/types.py b/paperqa/types.py
@@ -106,6 +106,7 @@ class Answer(BaseModel):
     dockey_filter: Optional[Set[DocKey]] = None
     summary_length: str = "about 100 words"
     answer_length: str = "about 100 words"
+    memory: Optional[str] = None
     # these two below are for convenience
     # and are not set. But you can set them
     # if you want to use them.
diff --git a/paperqa/version.py b/paperqa/version.py
@@ -1 +1 @@
-__version__ = "3.0.0.dev2"
+__version__ = "3.0.0.dev3"
diff --git a/setup.py b/setup.py
@@ -18,12 +18,12 @@
     packages=["paperqa", "paperqa.contrib"],
     install_requires=[
         "pypdf",
-        "langchain>=0.0.195",
+        "langchain>=0.0.198",
         "openai >= 0.27.8",
         "faiss-cpu",
         "PyCryptodome",
         "html2text",
-        "tiktoken",
+        "tiktoken>=0.4.0",
     ],
     test_suite="tests",
     long_description=long_description,
diff --git a/tests/test_paperqa.py b/tests/test_paperqa.py
@@ -529,3 +529,32 @@ def test_post_prompt():
         f.write(r.text)
     docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now")
     docs.query("What country is Bates from?")
+
+
+def test_memory():
+    docs = Docs(memory=True, k=3, max_sources=1, llm="gpt-3.5-turbo", key_filter=False)
+    docs.add_url(
+        "https://en.wikipedia.org/wiki/Red_Army",
+        citation="WikiMedia Foundation, 2023, Accessed now",
+        dockey="test",
+    )
+    answer1 = docs.query("When did the Soviet Union and Japan agree to a cease-fire?")
+    print(answer1.answer)
+    assert answer1.memory is not None
+    assert "1939" in answer1.answer
+    assert "Answer" in docs.memory_model.load_memory_variables({})["memory"]
+    answer2 = docs.query("When was the conflict resolved?")
+    assert "1941" in answer2.answer or "1945" in answer2.answer
+    assert answer2.memory is not None
+    assert "Answer" in docs.memory_model.load_memory_variables({})["memory"]
+    print(answer2.answer)
+
+    docs.clear_memory()
+
+    answer3 = docs.query("When was the conflict resolved?")
+    assert answer3.memory is not None
+    assert (
+        "I cannot answer" in answer3.answer
+        or "insufficient" in answer3.answer
+        or "does not provide" in answer3.answer
+    )

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "3.0.0.dev2"`
	`1`	`+__version__ = "3.0.0.dev3"`