Added support for latest langchain chat api

whitead · whitead · commit eeece9fdc86f · 2023-03-26T01:52:05.000+01:00
diff --git a/paperqa/docs.py b/paperqa/docs.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Tuple, Dict, Callable, Any
+from typing import List, Optional, Tuple, Dict, Callable, Any, Union
 from functools import reduce
 import os
 import os
@@ -10,13 +10,13 @@
     qa_prompt,
     search_prompt,
     citation_prompt,
-    chat_pref,
+    make_chain,
 )
 from dataclasses import dataclass
 from .readers import read_doc
 from langchain.vectorstores import FAISS
 from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.llms import OpenAI, OpenAIChat
+from langchain.chat_models import ChatOpenAI
 from langchain.llms.base import LLM
 from langchain.chains import LLMChain
 from langchain.callbacks import get_openai_callback
@@ -64,7 +64,7 @@ def __init__(
         summary_llm: Optional[LLM] = None,
         name: str = "default",
         index_path: Optional[Path] = None,
-        model_name: str = 'gpt-3.5-turbo'
+        model_name: str = "gpt-3.5-turbo",
     ) -> None:
         """Initialize the collection of documents.
 
@@ -82,26 +82,32 @@ def __init__(
         self.chunk_size_limit = chunk_size_limit
         self.keys = set()
         self._faiss_index = None
-        if llm is None:
-            llm = OpenAIChat(temperature=0.1, max_tokens=512, prefix_messages=chat_pref, model_name=model_name)
-        if summary_llm is None:
-            summary_llm = llm
         self.update_llm(llm, summary_llm)
         if index_path is None:
             index_path = Path.home() / ".paperqa" / name
         self.index_path = index_path
         self.name = name
 
-    def update_llm(self, llm: LLM, summary_llm: Optional[LLM] = None) -> None:
+    def update_llm(
+        self,
+        llm: Optional[Union[LLM, str]] = None,
+        summary_llm: Optional[Union[LLM, str]] = None,
+    ) -> None:
         """Update the LLM for answering questions."""
+        if llm is None:
+            llm = "gpt-3.5-turbo"
+        if type(llm) is str:
+            llm = ChatOpenAI(temperature=0.1, model=llm)
+        if type(summary_llm) is str:
+            summary_llm = ChatOpenAI(temperature=0.1, model=summary_llm)
         self.llm = llm
         if summary_llm is None:
             summary_llm = llm
         self.summary_llm = summary_llm
-        self.summary_chain = LLMChain(prompt=summary_prompt, llm=summary_llm)
-        self.qa_chain = LLMChain(prompt=qa_prompt, llm=llm)
-        self.search_chain = LLMChain(prompt=search_prompt, llm=llm)
-        self.cite_chain = LLMChain(prompt=citation_prompt, llm=llm)
+        self.summary_chain = make_chain(prompt=summary_prompt, llm=summary_llm)
+        self.qa_chain = make_chain(prompt=qa_prompt, llm=llm)
+        self.search_chain = make_chain(prompt=search_prompt, llm=summary_llm)
+        self.cite_chain = make_chain(prompt=citation_prompt, llm=summary_llm)
 
     def add(
         self,
@@ -112,12 +118,12 @@ def add(
         chunk_chars: Optional[int] = 3000,
     ) -> None:
         """Add a document to the collection."""
-        
-        # first check to see if we already have this document 
+
+        # first check to see if we already have this document
         # this way we don't make api call to create citation on file we already have
         if path in self.docs:
             raise ValueError(f"Document {path} already in collection.")
-        
+
         if citation is None:
             # peak first chunk
             texts, _ = read_doc(path, "", "", chunk_chars=chunk_chars)
@@ -126,7 +132,6 @@ def add(
             if len(citation) < 3 or "Unknown" in citation or "insufficient" in citation:
                 citation = f"Unknown, {os.path.basename(path)}, {datetime.now().year}"
 
-
         if key is None:
             # get first name and year from citation
             try:
@@ -212,9 +217,7 @@ def __setstate__(self, state):
         except:
             # they use some special exception type, but I don't want to import it
             self._faiss_index = None
-        self.update_llm(
-            OpenAIChat(temperature=0.1, max_tokens=512, prefix_messages=chat_pref)
-        )
+        self.update_llm("gpt-3.5-turbo")
 
     def _build_faiss_index(self):
         if self._faiss_index is None:
@@ -252,7 +255,9 @@ def get_evidence(
                 doc.metadata["key"],
                 doc.metadata["citation"],
                 self.summary_chain.run(
-                    question=answer.question, context_str=doc.page_content
+                    question=answer.question,
+                    context_str=doc.page_content,
+                    citation=doc.metadata["citation"],
                 ),
                 doc.page_content,
             )
diff --git a/paperqa/qaprompts.py b/paperqa/qaprompts.py
@@ -1,16 +1,22 @@
 import langchain.prompts as prompts
 from datetime import datetime
+from langchain.chains import LLMChain
+from langchain.chat_models import ChatOpenAI
+from langchain.schema import HumanMessage, SystemMessage
+from langchain.prompts.chat import HumanMessagePromptTemplate, ChatPromptTemplate
+
 
 summary_prompt = prompts.PromptTemplate(
-    input_variables=["question", "context_str"],
+    input_variables=["question", "context_str", "citation"],
     template="Summarize and provide direct quotes from the text below to help answer a question. "
-    "Do not directly answer the question, instead provide a summary and quotes with the context of the question. "
+    "Do not directly answer the question, instead summarize and "
+    "quote to give evidence to help answer the question. "
     "Do not use outside sources. "
     'Reply with "Not applicable" if the text is unrelated to the question. '
     "Use 75 or less words."
     "\n\n"
     "{context_str}\n"
-    "\n"
+    "Extracted from {citation}\n"
     "Question: {question}\n"
     "Relevant Information Summary:",
 )
@@ -20,7 +26,7 @@
     input_variables=["question", "context_str", "length"],
     template="Write an answer ({length}) "
     "for the question below solely based on the provided context. "
-    "If the context is irrelevant, "
+    "If the context provides insufficient information, "
     'reply "I cannot answer". '
     "For each sentence in your answer, indicate which sources most support it "
     "via valid citation markers at the end of sentences, like (Example2012). "
@@ -35,8 +41,8 @@
 search_prompt = prompts.PromptTemplate(
     input_variables=["question"],
     template="We want to answer the following question: {question} \n"
-    "Provide three different targeted keyword searches (one search per line) "
-    "that will find papers that help answer the question. Do not use boolean operators. "
+    "Provide three keyword searches (one search per line) "
+    "that will find papers to help answer the question. Do not use boolean operators. "
     "Recent years are 2021, 2022, 2023.\n\n"
     "1.",
 )
@@ -55,10 +61,15 @@ def _get_datetime():
     partial_variables={"date": _get_datetime},
 )
 
-chat_pref = [
-    {
-        "role": "system",
-        "content": "You are a scholarly researcher that answers in an unbiased, scholarly tone. "
-        "You sometimes refuse to answer if there is insufficient information.",
-    }
-]
+
+def make_chain(prompt, llm):
+    if type(llm) == ChatOpenAI:
+        system_message_prompt = SystemMessage(
+            content="You are a scholarly researcher that answers in an unbiased, scholarly tone. "
+            "You sometimes refuse to answer if there is insufficient information.",
+        )
+        human_message_prompt = HumanMessagePromptTemplate(prompt=prompt)
+        prompt = ChatPromptTemplate.from_messages(
+            [system_message_prompt, human_message_prompt]
+        )
+    return LLMChain(prompt=prompt, llm=llm)
diff --git a/paperqa/version.py b/paperqa/version.py
@@ -1 +1 @@
-__version__ = "0.0.30"
+__version__ = "0.1.0"

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.0.30"`
	`1`	`+__version__ = "0.1.0"`