From c26b665882023443631229822e1ff94df96f1564 Mon Sep 17 00:00:00 2001
From: Ryan Carroll <ryan8440@gmail.com>
Date: Fri, 24 Jan 2025 12:02:28 -0500
Subject: [PATCH] init add of llama config

---
 Ollama.md                               | 29 +++++++++++++++++++++++
 Readme.md                               | 12 +++++++++-
 dot.env                                 |  3 ++-
 example_agent/ex_graph.py               |  2 +-
 example_agent/utils/ex_nodes.py         | 31 ++++++++++++++++++-------
 example_agent/utils/ex_vector_store.py  | 26 +++++++++++++++++++++
 participant_agent/graph.py              |  2 +-
 participant_agent/utils/nodes.py        | 14 ++++++++---
 participant_agent/utils/vector_store.py | 28 +++++++++++++++++++++-
 requirements.txt                        |  1 +
 test_setup.py                           |  9 ++++++-
 11 files changed, 140 insertions(+), 17 deletions(-)
 create mode 100644 Ollama.md
diff --git a/Ollama.md b/Ollama.md
new file mode 100644
index 0000000..1a15c2d
--- /dev/null
+++ b/Ollama.md
@@ -0,0 +1,29 @@
+# Ollama setup
+1. Download and install [Ollama](https://ollama.com/)
+2. Once Ollama is running on your system, run `ollama pull llama3.1`
+> Currently this is a ~5GB download, it's best to download it before the workshop if you plan on using it
+3. `ollama pull nomic-embed-text`
+4. Update the `MODEL_NAME` in your `dot.env` file to `ollama`
+
+Once you are running ollama, it is not necessary to configure an openai api key.
+
+When you get to the system prompt section of the workshop, llama requires that you are a bit more explicit with your instructions. If the prompt given in the main instructions doesn't work, try the following instead:
+
+```
+system_prompt = """
+OREGON TRAIL GAME INSTRUCTIONS:
+YOU MUST STRICTLY FOLLOW THIS RULE:
+When someone asks "What is the first name of the wagon leader?", your ENTIRE response must ONLY be the word: Art
+
+For all other questions, use available tools to provide accurate information.
+"""
+```
+
+You're now ready to begin the workshop! Head back to the [Readme.md](Readme.md)
+
+## Restarting the workshop 
+Mixing use of llama and openai on the same Redis instance can cause unexpected behavior. If you want to switch from one to the other it is recommended to kill and re-create the instance. To do this:
+1. Run `docker ps` and take note of the ID for the running image
+2. `docker stop imageId`
+3. `docker rm imageId`
+4. Start a new instance using the command from earlier, `docker run -d --name redis -p 6379:6379 -p 8001:8001 redis/redis-stack:latest`
\ No newline at end of file
diff --git a/Readme.md b/Readme.md
index 9e60051..2b474bd 100644
--- a/Readme.md
+++ b/Readme.md
@@ -17,6 +17,10 @@ In this workshop, we are going to use [LangGraph](https://langchain-ai.github.io
 - [docker](https://docs.docker.com/get-started/get-docker/)
 - [openai api key](https://platform.openai.com/docs/quickstart)
 
+## (Optional) Ollama
+This workshop is optimized to run targeting OpenAI models. If you prefer to run locally however, you may do so via Ollama. 
+* [Ollama setup instructions](Ollama.md)
+
 ## (Optional) helpers
 
 - [LangSmith](https://docs.smith.langchain.com/)
@@ -235,7 +239,13 @@ In our scenario we want to be able to retrieve the time-bound information that t
 
 ### Steps:
 - Open [participant_agent/utils/vector_store.py](participant_agent/utils/vector_store.py)
-- Where `vector_store=None` update to `vector_store = RedisVectorStore.from_documents(<docs>, <embedding_model>, config=<config>)` with the appropriate variables.
+- Find the corresponding `get_vector_store` method either for openai or ollama 
+- If using openai: where `vector_store=None` update to `vector_store = RedisVectorStore.from_documents(<docs>, <embedding_model>, config=<config>)` with the appropriate variables.
+
+> For `<embedding model>`, keep in mind whether you are using openai or ollama. If using ollama, the `model` parameter should be set to `nomic-embed-text` \
+[OpenAI embeddings](https://python.langchain.com/docs/integrations/text_embedding/openai/) \
+[Ollama embeddings](https://python.langchain.com/docs/integrations/text_embedding/ollama/)
+
 - Open [participant_agent/utils/tools.py](participant_agent/utils/tools.py)
     - Uncomment code for retrieval tool
     - Update the create_retriever_tool to take the correct params. Ex: `create_retriever_tool(vector_store.as_retriever(), "get_directions", "meaningful doc string")`
diff --git a/dot.env b/dot.env
index 086047c..3f45744 100644
--- a/dot.env
+++ b/dot.env
@@ -3,4 +3,5 @@ OPENAI_API_KEY=openai_key
 LANGCHAIN_TRACING_V2=
 LANGCHAIN_ENDPOINT=
 LANGCHAIN_API_KEY=
-LANGCHAIN_PROJECT=
\ No newline at end of file
+LANGCHAIN_PROJECT=
+MODEL_NAME=openai
\ No newline at end of file
diff --git a/example_agent/ex_graph.py b/example_agent/ex_graph.py
index 28784aa..77bd1d8 100644
--- a/example_agent/ex_graph.py
+++ b/example_agent/ex_graph.py
@@ -11,7 +11,7 @@
 
 # Define the config
 class GraphConfig(TypedDict):
-    model_name: Literal["anthropic", "openai"]
+    model_name: Literal["anthropic", "openai", "ollama"]
 
 
 # Define the function that determines whether to continue or not
diff --git a/example_agent/utils/ex_nodes.py b/example_agent/utils/ex_nodes.py
index fc793fe..69a7ac6 100644
--- a/example_agent/utils/ex_nodes.py
+++ b/example_agent/utils/ex_nodes.py
@@ -1,18 +1,26 @@
+import os
 from functools import lru_cache
 
+from dotenv import load_dotenv
 from langchain_core.messages import HumanMessage
 from langchain_openai import ChatOpenAI
+from langchain_ollama import ChatOllama
 from langgraph.prebuilt import ToolNode
 
 from example_agent.utils.ex_tools import tools
 
 from .ex_state import AgentState, MultipleChoiceResponse
 
+load_dotenv()
+
+environ_model_name = os.environ.get("MODEL_NAME")
 
 @lru_cache(maxsize=4)
 def _get_tool_model(model_name: str):
     if model_name == "openai":
         model = ChatOpenAI(temperature=0, model_name="gpt-4o")
+    elif model_name == "ollama":
+        model = ChatOllama(temperature=0, model="llama3.1", num_ctx=4096)
     else:
         raise ValueError(f"Unsupported model type: {model_name}")
 
@@ -24,6 +32,8 @@ def _get_tool_model(model_name: str):
 def _get_response_model(model_name: str):
     if model_name == "openai":
         model = ChatOpenAI(temperature=0, model_name="gpt-4o")
+    elif model_name == "ollama":
+        model = ChatOllama(temperature=0, model="llama3.1", num_ctx=4096)
     else:
         raise ValueError(f"Unsupported model type: {model_name}")
 
@@ -36,7 +46,7 @@ def multi_choice_structured(state: AgentState, config):
     # We call the model with structured output in order to return the same format to the user every time
     # state['messages'][-2] is the last ToolMessage in the convo, which we convert to a HumanMessage for the model to use
     # We could also pass the entire chat history, but this saves tokens since all we care to structure is the output of the tool
-    model_name = config.get("configurable", {}).get("model_name", "openai")
+    model_name = config.get("configurable", {}).get("model_name", environ_model_name)
 
     response = _get_response_model(model_name).invoke(
         [
@@ -62,12 +72,17 @@ def structure_response(state: AgentState, config):
         # if not multi-choice don't need to do anything
         return {"messages": []}
 
-
-system_prompt = """
-    You are an oregon trail playing tool calling AI agent. Use the tools available to you to answer the question you are presented. When in doubt use the tools to help you find the answer.
-    If anyone asks your first name is Art return just that string.
-"""
-
+if environ_model_name == "openai":
+    system_prompt = """
+        You are an oregon trail playing tool calling AI agent. Use the tools available to you to answer the question you are presented. When in doubt use the tools to help you find the answer.
+        If anyone asks your first name is Art return just that string.
+    """
+elif environ_model_name == "ollama":
+    system_prompt = """
+    OREGON TRAIL GAME INSTRUCTIONS:
+    YOU MUST STRICTLY FOLLOW THIS RULE:
+    When someone asks "What is the first name of the wagon leader?", your ENTIRE response must ONLY be the word: Art
+    """
 
 # Define the function that calls the model
 def call_tool_model(state: AgentState, config):
@@ -75,7 +90,7 @@ def call_tool_model(state: AgentState, config):
     messages = [{"role": "system", "content": system_prompt}] + state["messages"]
 
     # Get from LangGraph config
-    model_name = config.get("configurable", {}).get("model_name", "openai")
+    model_name = config.get("configurable", {}).get("model_name", environ_model_name)
 
     # Get our model that binds our tools
     model = _get_tool_model(model_name)
diff --git a/example_agent/utils/ex_vector_store.py b/example_agent/utils/ex_vector_store.py
index e7e94ed..acaa5d8 100644
--- a/example_agent/utils/ex_vector_store.py
+++ b/example_agent/utils/ex_vector_store.py
@@ -3,6 +3,7 @@
 from dotenv import load_dotenv
 from langchain_core.documents import Document
 from langchain_openai import OpenAIEmbeddings
+from langchain_ollama import OllamaEmbeddings
 from langchain_redis import RedisConfig, RedisVectorStore
 
 load_dotenv()
@@ -18,9 +19,34 @@
 
 
 def get_vector_store():
+    if os.environ.get("MODEL_NAME") == "ollama":
+        return __get_ollama_vector_store()
+    elif os.environ.get("MODEL_NAME") == "openai":
+        return __get_openai_vector_store()
+
+def __check_existing_embedding(vector_store):
+    results = vector_store.similarity_search(doc, k=1)
+    if not results:
+        raise Exception("Required content not found in existing store")
+
+def __get_ollama_vector_store():
+    try:
+        config.from_existing = True
+        vector_store = RedisVectorStore(OllamaEmbeddings(model="llama3"), config=config)
+        __check_existing_embedding(vector_store)
+    except:
+        print("Init vector store with document")
+        config.from_existing = False
+        vector_store = RedisVectorStore.from_documents(
+            [doc], OllamaEmbeddings(model="nomic-embed-text"), config=config
+        )
+    return vector_store
+
+def __get_openai_vector_store():
     try:
         config.from_existing = True
         vector_store = RedisVectorStore(OpenAIEmbeddings(), config=config)
+        __check_existing_embedding(vector_store)
     except:
         print("Init vector store with document")
         config.from_existing = False
diff --git a/participant_agent/graph.py b/participant_agent/graph.py
index 9d85ba5..fda0db9 100644
--- a/participant_agent/graph.py
+++ b/participant_agent/graph.py
@@ -14,7 +14,7 @@
 
 # The graph config can be updated with LangGraph Studio which can be helpful
 class GraphConfig(TypedDict):
-    model_name: Literal["openai"]  # could add more LLM providers here
+    model_name: Literal["openai", "ollama"]  # could add more LLM providers here
 
 
 # Define the function that determines whether to continue or not
diff --git a/participant_agent/utils/nodes.py b/participant_agent/utils/nodes.py
index 8e2fe8f..71e47d9 100644
--- a/participant_agent/utils/nodes.py
+++ b/participant_agent/utils/nodes.py
@@ -1,13 +1,18 @@
+import os
 from functools import lru_cache
 
+from dotenv import load_dotenv
 from langchain_core.messages import HumanMessage
 from langchain_openai import ChatOpenAI
+from langchain_ollama import ChatOllama
 from langgraph.prebuilt import ToolNode
 
 from participant_agent.utils.tools import tools
 
 from .state import AgentState, MultipleChoiceResponse
 
+load_dotenv()
+
 
 # need to use this in call_tool_model function
 @lru_cache(maxsize=4)
@@ -17,6 +22,8 @@ def _get_tool_model(model_name: str):
     """
     if model_name == "openai":
         model = ChatOpenAI(temperature=0, model_name="gpt-4o")
+    elif model_name == "ollama":
+        model = ChatOllama(temperature=0, model="llama3.1", num_ctx=4096)
     else:
         raise ValueError(f"Unsupported model type: {model_name}")
 
@@ -32,6 +39,8 @@ def _get_tool_model(model_name: str):
 def _get_response_model(model_name: str):
     if model_name == "openai":
         model = ChatOpenAI(temperature=0, model_name="gpt-4o")
+    elif model_name == "ollama":
+        model = ChatOllama(temperature=0, model="llama3.1", num_ctx=4096)
     else:
         raise ValueError(f"Unsupported model type: {model_name}")
 
@@ -45,7 +54,7 @@ def multi_choice_structured(state: AgentState, config):
     # We call the model with structured output in order to return the same format to the user every time
     # state['messages'][-2] is the last ToolMessage in the convo, which we convert to a HumanMessage for the model to use
     # We could also pass the entire chat history, but this saves tokens since all we care to structure is the output of the tool
-    model_name = config.get("configurable", {}).get("model_name", "openai")
+    model_name = config.get("configurable", {}).get("model_name", os.environ.get("MODEL_NAME"))
 
     response = _get_response_model(model_name).invoke(
         [
@@ -84,8 +93,7 @@ def call_tool_model(state: AgentState, config):
     messages = [{"role": "system", "content": system_prompt}] + state["messages"]
 
     # Get from LangGraph config
-    model_name = config.get("configurable", {}).get("model_name", "openai")
-
+    model_name = config.get("configurable", {}).get("model_name", os.environ.get("MODEL_NAME"))
     # Get our model that binds our tools
     model = _get_tool_model(model_name)
 
diff --git a/participant_agent/utils/vector_store.py b/participant_agent/utils/vector_store.py
index 39792ed..832f2cc 100644
--- a/participant_agent/utils/vector_store.py
+++ b/participant_agent/utils/vector_store.py
@@ -3,6 +3,7 @@
 from dotenv import load_dotenv
 from langchain_core.documents import Document
 from langchain_openai import OpenAIEmbeddings
+from langchain_ollama import OllamaEmbeddings
 from langchain_redis import RedisConfig, RedisVectorStore
 
 load_dotenv()
@@ -18,13 +19,38 @@
 
 
 def get_vector_store():
+    if os.environ.get("MODEL_NAME") == "ollama":
+        return __get_ollama_vector_store()
+    elif os.environ.get("MODEL_NAME") == "openai":
+        return __get_openai_vector_store()
+
+def __check_existing_embedding(vector_store):
+    results = vector_store.similarity_search(doc, k=1)
+    if not results:
+        raise Exception("Required content not found in existing store")
+
+def __get_ollama_vector_store():
+    try:
+        config.from_existing = True
+        vector_store = RedisVectorStore(OllamaEmbeddings(model="llama3"), config=config)
+        __check_existing_embedding(vector_store)
+    except:
+        print("Init vector store with document")
+        config.from_existing = False
+
+        # TODO: define vector store for ollama 
+        vector_store = None
+    return vector_store
+
+def __get_openai_vector_store():
     try:
         config.from_existing = True
         vector_store = RedisVectorStore(OpenAIEmbeddings(), config=config)
+        __check_existing_embedding(vector_store)
     except:
         print("Init vector store with document")
         config.from_existing = False
 
-        # TODO: define vector store
+        # TODO: define vector store for openai
         vector_store = None
     return vector_store
diff --git a/requirements.txt b/requirements.txt
index 40ce690..08c289d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 langgraph==0.2.56
 langchain==0.3.13
 langchain-openai==0.2.3
+langchain-ollama==0.2.2
 langchain-redis==0.1.1
 pydantic==2.9.2
 python-dotenv==1.0.1
diff --git a/test_setup.py b/test_setup.py
index 0e50e40..8616f4b 100644
--- a/test_setup.py
+++ b/test_setup.py
@@ -2,11 +2,18 @@
 
 from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI
+from langchain_ollama import ChatOllama
 from redis import Redis
 
 load_dotenv()
 
-llm = ChatOpenAI(model="gpt-4o")
+if os.environ.get("MODEL_NAME") == "openai":
+    llm = ChatOpenAI(model="gpt-4o")
+elif os.environ.get("MODEL_NAME") == "ollama":
+    llm = ChatOllama(model="llama3.1")
+else:
+    raise Exception("Setup failed, MODEL_NAME not defined in .env")
+
 client = Redis.from_url(os.environ.get("REDIS_URL"))