samugit83
diff --git a/‎CHANGELOG.md
+15 b/‎CHANGELOG.md
+15
diff --git a/‎README.md
+31-47 b/‎README.md
+31-47
diff --git a/‎app.py
+13-2 b/‎app.py
+13-2
diff --git a/‎code_agent/code_agent.py
+5-5 b/‎code_agent/code_agent.py
+5-5
diff --git a/‎code_agent/tool_generator.py
+4-3 b/‎code_agent/tool_generator.py
+4-3
diff --git a/‎deep_search/planner.py
+2-1 b/‎deep_search/planner.py
+2-1
diff --git a/‎docker-compose.yml
+11-1 b/‎docker-compose.yml
+11-1
@@ -77,3 +77,18 @@ All notable changes to this project will be documented in this file.
   - **Adaptive Retrieval-Augmented Generation RAG:** Focused on retrieval.
 - **Notebooks for RAG Techniques:**  
   - Organized all the RAG techniques into dedicated notebooks, each containing detailed explanations aimed at didactic purposes.
+
+[1.6.0] - 2025-03-09
+### Added
+- **Ollama Integration for Local LLM Models:**  
+  - Added support for running LLM models locally through Ollama integration.
+  - Users can now pull and run models directly on their machines by prefixing model names with `local_` in configuration (e.g., `local_deepseek-r1`, `local_llama3.3`, `local_phi4`).
+  - Provides data privacy, cost efficiency, and offline capability for sensitive applications.
+  - Automatically handles model downloading and initialization when specified models aren't already active.
+  - Supports running models even without GPU by switching to CPU (with reduced performance).
+- **Model-Specific Options Configuration:**  
+  - Added capability to set specific options for each Ollama model.
+  - Users can now customize parameters like temperature, top_p, top_k, and other inference settings on a per-model basis.
+  - Supports all Ollama model configuration options including context size, repetition penalties, and sampling parameters.
+  - Enables fine-tuned control over model behavior while maintaining the simplicity of the local integration.
+  - Configuration options can be set through the API for advanced model tuning.
@@ -2,7 +2,7 @@
 ![AutoCode Agent Global Workflow](./static/images/autocode.png)  
 
 # AutoCodeAgent - An innovative AI agent powered by IntelliChain, Deep Search, and multi-RAG techniques
-![version](https://img.shields.io/badge/version-1.5.0-blue)
+![version](https://img.shields.io/badge/version-1.6.0-blue)
 
 ## One agent, Infinite possibilities
 AutoCodeAgent redefines AI-powered problem solving by seamlessly integrating three groundbreaking modes:
@@ -20,7 +20,22 @@ You can also benefit from these techniques for educational purposes, as each one
 By fusing these three potent modes, AutoCodeAgent transforms intricate challenges into innovative, actionable solutions, setting a new standard in intelligent automation and advanced research.
 
 [Application Setup](#application-setup)  
-Step-by-step guide to setting up the project for the first time.
+Step-by-step guide to setting up the project for the first time. 
+
+AutoCodeAgent provides flexible integration with Large Language Models (LLMs) through both local and cloud-based solutions.
+Our agentic framework can communicate with LLM models in two ways:
+
+1. **Local Integration**: Using Ollama to run models directly on your machine using our prebuilt Docker container. 
+   - Supports running LLM models locally through Ollama integration
+   - Pull and run models directly on your machine by prefixing model names with `local_` in configuration (e.g., `local_deepseek-r1`, `local_llama3.3`, `local_phi4`)
+   - Automatically handles model downloading and initialization when specified models aren't already active
+   - Supports running models even without GPU by switching to CPU (with reduced performance)
+   - Customize parameters like temperature, top_p, top_k, and other inference settings on a per-model basis
+   
+For detailed information about local model management and Ollama integration, please refer to the complete documentation at [models/README.md](models/README.md)
+
+2. **Cloud Services**: Connecting to OpenAI's API for access to their hosted models
+
 
 
 ## IntelliChain sections
@@ -127,61 +142,32 @@ REDIS_HOST=redis
 REDIS_PORT=6379 
 REDIS_DB=0
 
-SIMPLE_RAG_CHUNK_SIZE=1500  # chunk size for simple rag
-SIMPLE_RAG_OVERLAP=200  # overlap for simple rag
-SIMPLE_RAG_EMBEDDING_MODEL=text-embedding-ada-002  # simple rag embedding model
-
-HYBRID_VECTOR_GRAPH_RAG_CHUNK_SIZE=1500  # chunk size for hybrid vector graph rag
-HYBRID_VECTOR_GRAPH_RAG_OVERLAP=200  # overlap for hybrid vector graph rag
-HYBRID_VECTOR_GRAPH_RAG_SUMMARIZATION_GRAPH_NODE_LENGTH=100  # summarization graph node length for hybrid vector graph rag
-HYBRID_VECTOR_GRAPH_RAG_SIMILARITY_RETRIEVE_THRESHOLD=0.9  # similarity retrieve threshold for hybrid vector graph rag
-HYBRID_VECTOR_GRAPH_RAG_SIMILARITY_EDGE_THRESHOLD=0.9  # similarity edge threshold for hybrid vector graph rag
-HYBRID_VECTOR_GRAPH_RAG_QUERY_MAX_DEPTH=3  # max depth for hybrid vector graph rag
-HYBRID_VECTOR_GRAPH_RAG_QUERY_TOP_K=3  # top k for hybrid vector graph rag
-HYBRID_VECTOR_GRAPH_RAG_QUERY_MAX_CONTEXT_LENGTH=10000  # max context length for hybrid vector graph rag
-HYBRID_VECTOR_GRAPH_RAG_EMBEDDING_VECTOR_MODEL=text-embedding-ada-002  # hybrid vector graph rag embedding vector model
-HYBRID_VECTOR_GRAPH_RAG_SUMMARIZATION_GRAPH_NODE_MODEL=gpt-4o  # hybrid vector graph rag summarization graph node model
-
-CHROMA_DB_PATH=./tools/rag/database/chroma_db # url for chroma db used in simple rag
-LLAMA_INDEX_DB_PATH=./tools/rag/database/llama_index # url for llama index db used in llama index rag tool
-LLAMA_INDEX_CONTEXT_WINDOW_DB_PATH=./tools/rag/database/llama_index_context_window # url for llama index context window db used in llama index context window rag tool
-LLAMA_INDEX_CORPUS_DIR=./tools/rag/llama_index/corpus # url for llama index corpus used in llama index rag tool
-LLAMA_INDEX_CONTEXT_WINDOW_CORPUS_DIR=./tools/rag/llama_index_context_window/corpus # url for llama index context window corpus used in llama index context window rag tool
-
-LLAMA_INDEX_CONTEXT_WINDOW_SIZE_INGEST=30 # sentences for llama index context window ingestion
-LLAMA_INDEX_CONTEXT_WINDOW_MAX_ADJACENT_CHARS_RAG_RETRIEVE=150 # max adjacent characters for llama index context window rag tool
-LLAMA_INDEX_CONTEXT_WINDOW_TOP_K_RAG_RETRIEVE=5 # top k chunks for llama index context window rag tool
-
-HYDE_RAG_CHUNK_SIZE=1500 # chunk size for hyde rag tool
-HYDE_RAG_QUERY_TOP_K=5 # query top k for hyde rag tool
-HYDE_GENERATE_HYPO_DOC_MODEL=gpt-4o # generate hyde rag tool model
-
-[email protected] # gmail user for send email tool
-PASSGMAILAPP=your_password # gmail password for send email tool
-
-TOOL_HELPER_MODEL=gpt-4o  # tool helper model
-JSON_PLAN_MODEL=gpt-4o  # json plan model
-EVALUATION_MODEL=gpt-4o  # evaluation model
-SURF_AI_JSON_TASK_MODEL=gpt-4o  # surf ai json task model, important: for surfAi you must use a multimodal modal with text + vision capabilities
-DEEP_SEARCH_MODEL=o3-mini  # deep search model
+[email protected] # Gmail user for default tool send_email 
+PASSGMAILAPP=your_password # Gmail app password for default tool send_email
 
 ELEVEN_API_KEY=API_KEY # elevenlabs api key for langchain tool
 OPENWEATHERMAP_API_KEY=API_KEY # openweathermap api key for langchain tool
 SERPAPI_API_KEY=API_KEY # serpapi api key for langchain tool and also deep search mode
 SERPER_API_KEY=API_KEY # serpapi api key for deep search mode (optional, the script use serpapi by default)
 ```
 
-4. Build the Docker image: 
+4. File params.py
+The `params.py` file contains a comprehensive configuration dictionary that controls the behavior of AutoCodeAgent 2.0's various RAG (Retrieval-Augmented Generation) systems and tools. This configuration file centralizes all adjustable parameters, making the system highly customizable.
+Additionally, it configures database paths for various vector stores (ChromaDB, LlamaIndex), email functionality credentials, and specifies which AI models to use for different components of the system (tool assistance, planning, evaluation, web automation, and search).
+You can set which models to use throughout the system - whether cloud-based models from OpenAI or local models running through Ollama or any other API-compatible service. Models can be specified by prefixing with "local_" for local models (e.g., "local_llama3") or using the standard model name for cloud services (e.g., "gpt-4o").
+This centralized configuration allows users to set the system's behavior by adjusting parameters without modifying core code.
+
+5. Build the Docker image: 
 ```bash
 docker-compose build
 ```
 
-5. Run the Docker container:
+6. Run the Docker container:
 ```bash
 docker-compose up -d
 ```
 
-6. Check the backend logs: 
+7. Check the backend logs: 
 ```bash
 docker logs -f flask_app 
 ```
@@ -190,7 +176,6 @@ If you want to rebuild and restart the application, and optimize docker space:
 docker-compose down
 docker-compose build --no-cache
 docker-compose up -d
-docker system prune -a --volumes -f
 docker builder prune -a -f
 docker logs -f flask_app    
 ```  
@@ -199,16 +184,16 @@ Is a good idea to always check docker space usage after building and starting th
 docker system df
 ```
 
-7. Access the AI Agent chat interface: 
+8. Access the AI Agent chat interface: 
 ```bash
 http://localhost:5000  
 
 ```
-8. To view the automated browser sessions (powered by SurfAi), open:
+9. To view the automated browser sessions (powered by SurfAi), open:
 ```bash
 http://localhost:6901/vnc.html
 ```
-9. To explore and interact with the Neo4j graph database, visit:
+10. To explore and interact with the Neo4j graph database, visit:
 ```bash
 http://localhost:7474/browser/
 ``` 
@@ -1256,4 +1241,3 @@ By contributing, you agree that your changes will be licensed under the same lic
 
 Thank you for helping improve this project! 🚀
 
-
@@ -9,7 +9,7 @@
 from tools.rag.hybrid_vector_graph_rag.ingest_corpus import hybrid_vector_graph_rag_ingest_corpus
 from tools.rag.llama_index.ingest_corpus import llama_index_ingest_corpus
 from tools.rag.llama_index_context_window.ingest_corpus import llama_index_context_window_ingest_corpus
-
+from models.models import call_model
 
 logging.basicConfig(
     level=logging.DEBUG,  
@@ -139,6 +139,7 @@ def hybrid_vector_graph_rag_ingest():
         logging.error(traceback.format_exc())
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500               
 
+
 #start the acquisition, parsing, and ingestion of all documents present in /tools/rag/llama_index/corpus
 # or /tools/rag/llama_index_context_window/corpus if isContextWindow is true
 @app.route('/llama-index-ingest-corpus', methods=['POST'])
@@ -159,10 +160,20 @@ def llama_index_ingest():
         return jsonify({"error": f"Internal server error: {str(e)}"}), 500     
 
 
+
+@app.route('/call-test-model', methods=['GET'])
+def call_test_model():
+    response = call_model(
+        chat_history=[{"role": "user", "content": "Tell me a story about a cat and a dog."}], 
+        model="local_llama3.2:1b"
+    )
+    return jsonify({"response": response}), 200
+
+
 if __name__ == '__main__':
     app.run(
         host='0.0.0.0',
         port=int(os.getenv('FLASK_PORT', 5000)),
         debug=True
-    )
+    ) 
 
@@ -1,12 +1,12 @@
 import json
-import os
 from typing import List, Dict
 from .tool_generator import generate_tools
 from .logging_handler import LoggingConfigurator
 from .agent_plan_generator import PlanGenerator
 from .agent_plan_evaluator import PlanEvaluator
 from .agent_subtask_executor import SubtaskExecutor
 from .utils import transform_final_answer
+from params import PARAMS
 
 class CodeAgent:
     def __init__(self, chat_history: List[Dict], tools: List[str], use_default_tools: bool = True): 
@@ -20,10 +20,10 @@ def __init__(self, chat_history: List[Dict], tools: List[str], use_default_tools
         self.logger = LoggingConfigurator.configure_logger(self.execution_logs)
         self.enrich_log = LoggingConfigurator.enrich_log
         self.models = {
-            "TOOL_HELPER_MODEL": os.getenv("TOOL_HELPER_MODEL"), 
-            "JSON_PLAN_MODEL": os.getenv("JSON_PLAN_MODEL"),
-            "EVALUATION_MODEL": os.getenv("EVALUATION_MODEL"),
-            "SIMPLE_RAG_EMBEDDING_MODEL": os.getenv("SIMPLE_RAG_EMBEDDING_MODEL")
+            "TOOL_HELPER_MODEL": PARAMS["TOOL_HELPER_MODEL"], 
+            "JSON_PLAN_MODEL": PARAMS["JSON_PLAN_MODEL"],
+            "EVALUATION_MODEL": PARAMS["EVALUATION_MODEL"],
+            "SIMPLE_RAG_EMBEDDING_MODEL": PARAMS["SIMPLE_RAG_EMBEDDING_MODEL"]
         }
         # Instantiate helper components.
         self.plan_generator = PlanGenerator(self.chat_history, self.tools, self.models["JSON_PLAN_MODEL"])
 
@@ -4,6 +4,7 @@
 from string import Template
 from langchain_community.agent_toolkits.load_tools import load_tools
 from .default_tools import DEFAULT_TOOLS, TOOLS_ACTIVATION
+from params import PARAMS
 
 
 logger = logging.getLogger(__name__)
@@ -51,9 +52,9 @@ def generate_tools(user_tools, use_default_tools):
 
     # Define variables to substitute.
     variables = {
-        "TOOL_HELPER_MODEL": os.getenv("TOOL_HELPER_MODEL", ""),
-        "JSON_PLAN_MODEL": os.getenv("JSON_PLAN_MODEL", ""),
-        "EVALUATION_MODEL": os.getenv("EVALUATION_MODEL", ""),
+        "TOOL_HELPER_MODEL": PARAMS["TOOL_HELPER_MODEL"],
+        "JSON_PLAN_MODEL": PARAMS["JSON_PLAN_MODEL"],
+        "EVALUATION_MODEL": PARAMS["EVALUATION_MODEL"],
         "GMAILUSER": os.getenv("GMAILUSER", ""),
         "PASSGMAILAPP": os.getenv("PASSGMAILAPP", "")
     }
 
@@ -16,6 +16,7 @@
 from .egot_engine import EGoTEngine 
 from .utils import apply_depth_settings 
 from tools.rag.llama_index.retrieve import retrieve_documents
+from params import PARAMS
 
 class DeepSearchAgentPlanner:
     def __init__(self, chat_history: List[Dict], **kwargs):
@@ -28,7 +29,7 @@ def __init__(self, chat_history: List[Dict], **kwargs):
         self.data.memory_logs = []
         self.logger = LoggingConfigurator.configure_logger(self.data.memory_logs)
         self.enrich_log = LoggingConfigurator.enrich_log
-        self.deep_search_model = os.getenv("DEEP_SEARCH_MODEL")
+        self.deep_search_model = PARAMS["DEEP_SEARCH_MODEL"]
 
         neo4j_uri = os.getenv("NEO4J_URI")
         neo4j_user = os.getenv("NEO4J_USER")
 
@@ -19,6 +19,15 @@ services:
       redis:
         condition: service_started
 
+  ollama:
+    image: ollama/ollama:latest
+    container_name: ollama_server
+    restart: unless-stopped
+    ports:
+      - "11434:11434"  
+    volumes:
+      - ollama_data:/data 
+
   neo4j:
     image: neo4j:4.4
     container_name: neo4j_db
@@ -58,4 +67,5 @@ volumes:
   neo4j_logs:
   neo4j_import:
   neo4j_plugins:
-  redis-data:
+  redis-data:
+  ollama_data: