wecode-ai · sunnights · Apr 19, 2026 · Apr 19, 2026 · Apr 19, 2026 · Apr 19, 2026
diff --git a/.env.example b/.env.example
@@ -160,6 +160,32 @@ EXECUTOR_MANAGER_PORT=8001
 # Used by executor manager to connect to executor containers in host mode
 # DOCKER_HOST_ADDR=host.docker.internal
 
+# =============================================================================
+# KNOWLEDGE RUNTIME
+# =============================================================================
+
+# Knowledge Runtime port (default: 8200)
+# Used by both docker-compose and start.sh
+KNOWLEDGE_RUNTIME_PORT=8200
+
+# Backend internal URL for Knowledge Runtime to fetch content
+# In docker-compose, this defaults to http://backend:8000
+# In local dev, this defaults to http://localhost:8000
+# BACKEND_INTERNAL_URL=http://backend:8000
+
+# Knowledge Runtime URL for Backend to call
+# In docker-compose, this defaults to http://knowledge_runtime:8200
+# In local dev, this defaults to http://localhost:8200
+# KNOWLEDGE_RUNTIME_URL=http://knowledge_runtime:8200
+
+# Content fetching timeout in seconds (default: 120)
+# CONTENT_FETCH_TIMEOUT=120
+
+# Logging configuration (file logging enabled by default)
+# LOG_FILE_ENABLED=true
+# LOG_DIR=/app/logs
+# LOG_LEVEL=INFO
+
 # =============================================================================
 # MCP (Model Context Protocol)
 # =============================================================================

diff --git a/AGENTS.md b/AGENTS.md
@@ -12,6 +12,7 @@ Wegent is an open-source AI-native operating system for defining, organizing, an
 - **Executor**: Task execution engine (Claude Code, Agno, Dify, ImageValidator)
 - **Executor Manager**: Task orchestration via Docker
 - **Chat Shell**: Lightweight AI chat engine for Chat Shell type (LangGraph + multi-LLM)
+- **Knowledge Runtime**: Standalone RAG HTTP service called by Backend (reuses knowledge_engine)
 - **Shared**: Common utilities, models, and cryptography
 
 **Core principles:**
@@ -542,6 +543,25 @@ t('actions.save')             // Ambiguous - which namespace?
 - `package` - Python package imported by Backend
 - `cli` - Command-line interface for interactive chat
 
+### Knowledge Runtime
+
+**Tech:** FastAPI, HTTP client, reuses knowledge_engine
+
+**Purpose:** Standalone HTTP service for RAG operations. Backend calls this service instead of using knowledge_engine directly.
+
+**Endpoints:**
+- `GET /internal/rag/health` - Health check
+- `GET /internal/rag/storage-types` - List available storage types
+- `POST /internal/rag/index` - Index document
+- `POST /internal/rag/query` - Query documents
+- `POST /internal/rag/delete-document-index` - Delete document index
+- `POST /internal/rag/purge-knowledge-index` - Purge knowledge base
+- `POST /internal/rag/drop-knowledge-index` - Drop physical index
+- `POST /internal/rag/all-chunks` - List all chunks
+- `POST /internal/rag/test-connection` - Test storage connection
+
+**Port:** 8200 (default)
+
 
 ---
 
@@ -579,6 +599,7 @@ cd backend && uv run pytest
 cd executor && uv run pytest
 cd executor_manager && uv run pytest
 cd chat_shell && uv run pytest
+cd knowledge_runtime && uv run pytest
 cd shared && uv run pytest
 cd frontend && npm test
 

diff --git a/backend/.env.example b/backend/.env.example
@@ -232,3 +232,9 @@ MEMORY_WRITE_TIMEOUT_SECONDS=10.0
 MEMORY_CONTEXT_MESSAGES=3
 
 EXECUTOR_LATEST_VERSION=1.0.0
+
+# Internal Service Token for service-to-service authentication
+# Used by: Backend -> knowledge_runtime, Backend -> chat_shell,
+#          chat_shell/knowledge_runtime -> Backend internal API
+# Generate using: openssl rand -hex 32
+# INTERNAL_SERVICE_TOKEN=your-secure-token-here
diff --git a/backend/app/api/api.py b/backend/app/api/api.py
@@ -36,7 +36,7 @@
 )
 from app.core.config import settings
 
-# RAG module is heavy (llama_index, scipy, pandas, grpc) - skip in standalone mode
+# RAG requires knowledge_runtime service - skip in standalone mode
 if not settings.STANDALONE_MODE:
     from app.api.endpoints import rag
 

diff --git a/backend/app/api/endpoints/adapter/retrievers.py b/backend/app/api/endpoints/adapter/retrievers.py
@@ -15,18 +15,11 @@
 from app.schemas.kind import Retriever
 from app.services.adapters.retriever_kinds import retriever_kinds_service
 from app.services.rag.gateway_factory import get_query_gateway
-from app.services.rag.local_gateway import LocalRagGateway
-from app.services.rag.remote_gateway import RemoteRagGatewayError
+from app.services.rag.remote_gateway import RemoteRagGateway, RemoteRagGatewayError
 from app.services.rag.runtime_specs import ConnectionTestRuntimeSpec
-from knowledge_engine.storage.factory import (
-    create_storage_backend_from_config,
-    get_all_storage_retrieval_methods,
-    get_supported_retrieval_methods,
-    get_supported_storage_types,
-)
 from shared.models import RuntimeRetrieverConfig
 
-# RAG module is heavy (llama_index, scipy, pandas, grpc) - skip in standalone mode
+# RAG requires knowledge_runtime service - skip in standalone mode
 
 router = APIRouter()
 logger = logging.getLogger(__name__)
@@ -43,7 +36,7 @@ def _check_rag_available():
 
 # Static routes must be defined before dynamic routes to avoid conflicts
 @router.get("/storage-types/retrieval-methods")
-def get_storage_retrieval_methods():
+async def get_storage_retrieval_methods():
     """
     Get supported retrieval methods for all storage types.
 
@@ -61,14 +54,25 @@ def get_storage_retrieval_methods():
     }
     """
     _check_rag_available()
+
+    gateway = RemoteRagGateway()
+    response = await gateway.get_storage_types()
+
+    # Convert StorageTypesResponse to the expected format
+    data = {
+        type_info.type: type_info.retrieval_methods
+        for type_info in response.storage_types
+    }
+    storage_types = [type_info.type for type_info in response.storage_types]
+
     return {
-        "data": get_all_storage_retrieval_methods(),
-        "storage_types": get_supported_storage_types(),
+        "data": data,
+        "storage_types": storage_types,
     }
 
 
 @router.get("/storage-types/{storage_type}/retrieval-methods")
-def get_storage_type_retrieval_methods(storage_type: str):
+async def get_storage_type_retrieval_methods(storage_type: str):
     """
     Get supported retrieval methods for a specific storage type.
 
@@ -86,14 +90,22 @@ def get_storage_type_retrieval_methods(storage_type: str):
     """
     _check_rag_available()
 
-    try:
-        methods = get_supported_retrieval_methods(storage_type)
-        return {
-            "storage_type": storage_type,
-            "retrieval_methods": methods,
-        }
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
+    gateway = RemoteRagGateway()
+    response = await gateway.get_storage_types()
+
+    # Find the storage type in the response (case-insensitive comparison)
+    for type_info in response.storage_types:
+        if type_info.type.casefold() == storage_type.casefold():
+            return {
+                "storage_type": type_info.type,  # Return normalized form from registry
+                "retrieval_methods": type_info.retrieval_methods,
+            }
+
+    raise HTTPException(
+        status_code=400,
+        detail=f"Unsupported storage type: {storage_type}. "
+        f"Supported types: {[t.type for t in response.storage_types]}",
+    )
 
 
 @router.get("")
@@ -259,15 +271,6 @@ async def test_retriever_connection(
         }
 
     try:
-        create_storage_backend_from_config(
-            storage_type=storage_type,
-            url=url,
-            username=username,
-            password=password,
-            api_key=api_key,
-            index_strategy={"mode": "per_dataset"},
-            ext={},
-        )
         runtime_spec = ConnectionTestRuntimeSpec(
             retriever_config=RuntimeRetrieverConfig(
                 name="connection-test",
@@ -284,14 +287,15 @@ async def test_retriever_connection(
             )
         )
         gateway = get_query_gateway()
-        try:
-            return await gateway.test_connection(runtime_spec)
-        except RemoteRagGatewayError:
-            return await LocalRagGateway().test_connection(runtime_spec)
+        return await gateway.test_connection(runtime_spec)
 
     except ValueError as e:
         return {"success": False, "message": str(e)}
 
+    except RemoteRagGatewayError as e:
+        logger.error(f"Retriever connection test failed: {str(e)}")
-        logger.error(f"Retriever connection test failed: {str(e)}")
+     except RemoteRagGatewayError as e:
+         logger.error("Retriever connection test failed: %s", e)
+         return {"success": False, "message": str(e)}
+
+     except Exception as e:
+         logger.error("Retriever connection test failed: %s", e)
+         return {"success": False, "message": f"Connection failed: {e!s}"}
-        logger.error(f"Retriever connection test failed: {str(e)}")
+     except RemoteRagGatewayError as e:
+         logger.error("Retriever connection test failed: %s", e)
+         return {"success": False, "message": str(e)}
+
+     except Exception as e:
+         logger.error("Retriever connection test failed: %s", e)
+         return {"success": False, "message": f"Connection failed: {e!s}"}
+        return {"success": False, "message": str(e)}
+
     except Exception as e:
         logger.error(f"Retriever connection test failed: {str(e)}")
         return {"success": False, "message": f"Connection failed: {str(e)}"}
diff --git a/backend/app/api/endpoints/internal/__init__.py b/backend/app/api/endpoints/internal/__init__.py
@@ -18,7 +18,7 @@
 from .workspace_archives import router as workspace_archives_router
 
 # RAG router is conditionally imported based on STANDALONE_MODE
-# RAG module is heavy (llama_index, scipy, pandas, grpc) - skip in standalone mode
+# RAG requires knowledge_runtime service - skip in standalone mode
 if not settings.STANDALONE_MODE:
     from .rag import router as rag_router
 

diff --git a/backend/app/api/endpoints/internal/rag.py b/backend/app/api/endpoints/internal/rag.py
@@ -24,13 +24,12 @@
 from app.services.knowledge.retrieval_persistence import (
     retrieval_persistence_service,
 )
-from app.services.rag.gateway_factory import get_query_gateway
-from app.services.rag.local_gateway import LocalRagGateway
-from app.services.rag.remote_gateway import (
-    RemoteRagGateway,
-    RemoteRagGatewayError,
-    should_fallback_to_local,
+from app.services.rag.gateway_factory import (
+    get_delete_gateway,
+    get_list_chunks_gateway,
+    get_query_gateway,
 )
+from app.services.rag.remote_gateway import RemoteRagGatewayError
 from app.services.rag.retrieval_service import RetrievalService
 from app.services.rag.runtime_resolver import RagRuntimeResolver
 from shared.models import (
@@ -198,13 +197,6 @@ def _resolve_document_names(
     )
 
 
-def _resolve_query_gateway(runtime_spec):
-    route_mode = getattr(runtime_spec, "route_mode", "auto")
-    if route_mode == "rag_retrieval":
-        return get_query_gateway()
-    return LocalRagGateway()
-
-
 def _finalize_query_runtime_spec(
     runtime_spec,
     db: Session,
@@ -240,12 +232,11 @@ def _finalize_query_runtime_spec(
     return runtime_spec.model_copy(update={"route_mode": resolved_route_mode})
 
 
-async def _execute_query_with_remote_fallback(runtime_spec, db: Session):
-    rag_gateway = _resolve_query_gateway(runtime_spec)
-    if (
-        isinstance(rag_gateway, RemoteRagGateway)
-        and getattr(runtime_spec, "route_mode", None) == "rag_retrieval"
-        and not getattr(runtime_spec, "knowledge_base_configs", None)
+async def _execute_query(runtime_spec, db: Session):
+    """Execute query using the gateway."""
+    rag_gateway = get_query_gateway()
+    if getattr(runtime_spec, "route_mode", None) == "rag_retrieval" and not getattr(
+        runtime_spec, "knowledge_base_configs", None
     ):
         runtime_spec = runtime_spec.model_copy(
             update={
@@ -257,17 +248,7 @@ async def _execute_query_with_remote_fallback(runtime_spec, db: Session):
                 )
             }
         )
-    try:
-        return await rag_gateway.query(runtime_spec, db=db)
-    except RemoteRagGatewayError as exc:
-        if not should_fallback_to_local(exc):
-            raise
-        logger.warning(
-            "[internal_rag] Remote query failed for KBs %s, falling back to local gateway: %s",
-            getattr(runtime_spec, "knowledge_base_ids", []),
-            exc,
-        )
-        return await LocalRagGateway().query(runtime_spec, db=db)
+    return await rag_gateway.query(runtime_spec, db=db)
 
 
 @router.post(
@@ -351,7 +332,7 @@ async def internal_retrieve(
             restricted_mode=restricted_mode,
         )
         runtime_spec = _finalize_query_runtime_spec(runtime_spec, db, runtime_context)
-        result = await _execute_query_with_remote_fallback(runtime_spec, db)
+        result = await _execute_query(runtime_spec, db)
 
         records = result.get("records", [])
 
@@ -654,7 +635,7 @@ async def get_all_chunks(
             query=request.query,
             metadata_condition=request.metadata_condition,
         )
-        result = await LocalRagGateway().list_chunks(
+        result = await get_list_chunks_gateway().list_chunks(
             runtime_spec,
             db=db,
         )
@@ -706,7 +687,7 @@ async def purge_knowledge_index(
             index_owner_user_id=request.index_owner_user_id,
             retriever_config=request.retriever_config.model_dump(mode="python"),
         )
-        return await LocalRagGateway().purge_knowledge_index(runtime_spec, db=db)
+        return await get_delete_gateway().purge_knowledge_index(runtime_spec, db=db)
     except ValueError as e:
         logger.warning("[internal_rag] Purge knowledge index error: %s", e)
         raise HTTPException(status_code=400, detail=str(e))
@@ -732,7 +713,7 @@ async def drop_knowledge_index(
             index_owner_user_id=request.index_owner_user_id,
             retriever_config=request.retriever_config.model_dump(mode="python"),
         )
-        return await LocalRagGateway().drop_knowledge_index(runtime_spec, db=db)
+        return await get_delete_gateway().drop_knowledge_index(runtime_spec, db=db)
     except ValueError as e:
         logger.warning("[internal_rag] Drop knowledge index error: %s", e)
         raise HTTPException(status_code=400, detail=str(e))