Make Internet Search a system persona

onimsha · onimsha · commit 8a9b16fdd617 · 2024-09-13T07:13:03.000Z
Signed-off-by: Alex Co &lt;alex.tuan@mindvalley.com&gt;
diff --git a/backend/danswer/chat/load_yamls.py b/backend/danswer/chat/load_yamls.py
@@ -96,7 +96,17 @@ def load_personas_from_yaml(
             # Set specific overrides for image generation persona
             if persona.get("image_generation"):
                 llm_model_version_override = "gpt-4o"
-
+            
+            # Load Internet Search Tool. 
+            if persona.get("internet_search"):
+                internet_search_tool = (
+                    db_session.query(ToolDBModel)
+                    .filter(ToolDBModel.name == "InternetSearchTool")
+                    .first()
+                )
+                if internet_search_tool:
+                    tool_ids.append(internet_search_tool.id)
+                    
             existing_persona = (
                 db_session.query(Persona)
                 .filter(Persona.name == persona["name"])
diff --git a/backend/danswer/chat/personas.yaml b/backend/danswer/chat/personas.yaml
@@ -19,11 +19,11 @@ personas:
     # Default number of chunks to include as context, set to 0 to disable retrieval
     # Remove the field to set to the system default number of chunks/tokens to pass to Gen AI
     # Each chunk is 512 tokens long
-    num_chunks: 50
+    num_chunks: 20
     # Enable/Disable usage of the LLM chunk filter feature whereby each chunk is passed to the LLM to determine
     # if the chunk is useful or not towards the latest user query
     # This feature can be overriden for all personas via DISABLE_LLM_CHUNK_FILTER env variable
-    llm_relevance_filter: false
+    llm_relevance_filter: true
     # Enable/Disable usage of the LLM to extract query time filters including source type and time range filters
     llm_filter_extraction: true
     # Decay documents priority as they age, options are:
@@ -44,11 +44,11 @@ personas:
     document_sets: []
     icon_shape: 23013
     icon_color: "#6FB1FF"
-    display_priority: 1
+    display_priority: 0
     is_visible: true
 
   - id: 1
-    name: "General"
+    name: "General GPT"
     description: >
       Assistant with no access to documents. Chat with just the Large Language Model.
     prompts:
@@ -60,24 +60,25 @@ personas:
     document_sets: []
     icon_shape: 50910
     icon_color: "#FF6F6F"
-    display_priority: 0
+    display_priority: 1
     is_visible: true
 
   - id: 2
-    name: "Paraphrase"
+    name: "GPT Internet Search"
     description: >
-      Assistant that is heavily constrained and only provides exact quotes from Connected Sources.
+      Use this Assistant to search the Internet for you (via Bing) and getting the answer
     prompts:
-      - "Paraphrase"
-    num_chunks: 10
+      - "InternetSearch"
+    num_chunks: 0
     llm_relevance_filter: true
     llm_filter_extraction: true
     recency_bias: "auto"
     document_sets: []
     icon_shape: 45519
     icon_color: "#6FFF8D"
     display_priority: 2
-    is_visible: false
+    is_visible: true
+    internet_search: true
 
 
   - id: 3
@@ -95,4 +96,4 @@ personas:
     icon_color: "#9B59B6"
     image_generation: true 
     display_priority: 3
-    is_visible: true
+    is_visible: false
diff --git a/backend/danswer/chat/prompts.yaml b/backend/danswer/chat/prompts.yaml
@@ -107,3 +107,18 @@ prompts:
       directly from the documents.
     datetime_aware: true
     include_citations: true
+
+  - name: "InternetSearch"
+    description: "Use this Assistant to search the Internet for you (via Bing) and getting the answer"
+    system:  >
+      You are an intelligent AI agent designed to assist users by providing accurate and relevant information through internet searches. Your primary objectives are:
+      Information Retrieval: Search the internet to find reliable and up-to-date information based on user queries. Ensure that the sources you reference are credible and trustworthy.
+      Context Understanding: Analyze user questions to understand context and intent. Provide answers that are directly related to the user's needs, offering additional context when necessary.
+      Summarization: When presenting information, summarize findings clearly and concisely. Highlight key points and relevant details to enhance user understanding.
+      User Engagement: Maintain a friendly and engaging tone in your responses. Encourage users to ask follow-up questions or request further information.
+      Privacy and Safety: Respect user privacy and ensure that any personal information is handled securely. Avoid sharing sensitive or inappropriate content.
+      Continuous Learning: Adapt and improve your responses based on user interactions and feedback. Stay updated with the latest information and trends to provide the best assistance.
+    task: >
+      Search the internet for relevant information based on the user query. Provide a concise summary of the findings and include the sources of information.
+    datetime_aware: true
+    include_citations: true
diff --git a/backend/danswer/tools/built_in_tools.py b/backend/danswer/tools/built_in_tools.py
@@ -146,7 +146,6 @@ def auto_add_search_tool_to_personas(db_session: Session) -> None:
     db_session.commit()
     logger.notice("Completed adding SearchTool to relevant Personas.")
 
-
 _built_in_tools_cache: dict[int, Type[Tool]] | None = None
 
 
diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt
@@ -4,7 +4,7 @@ asyncpg==0.27.0
 atlassian-python-api==3.37.0
 beautifulsoup4==4.12.2
 boto3==1.34.84
-celery[redis]==5.3.4
+celery==5.3.4
 boto3==1.34.84
 chardet==5.2.0
 dask==2023.8.1