Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
7575b11
feat(shared): 导出 ContentRef 类型供 knowledge_runtime 使用
sunnights Apr 19, 2026
0216e1c
feat(knowledge_runtime): 新增独立 RAG HTTP 服务
sunnights Apr 19, 2026
6f8a37b
refactor(backend/rag): 重构 RAG 服务支持远程调用
sunnights Apr 19, 2026
254ad33
feat(knowledge_runtime): 添加 docker 部署支持和架构文档
sunnights Apr 19, 2026
92193e5
test(backend/rag): 修复 RAG 测试以匹配重构后的实现
sunnights Apr 19, 2026
1fd834b
feat(knowledge_runtime): 添加 TOKEN 认证机制
sunnights Apr 20, 2026
44fc8ea
refactor(config): 统一内部服务认证 token 为 INTERNAL_SERVICE_TOKEN
sunnights Apr 20, 2026
58e573f
feat(knowledge_runtime): 添加部署配置并将服务加入 rag profile
sunnights Apr 21, 2026
f043c64
Merge remote-tracking branch 'wecode-ai/main' into zy_knowledge_runtime
sunnights Apr 21, 2026
64c20d5
Merge remote-tracking branch 'wecode-ai/main' into knowledge_runtime
sunnights Apr 21, 2026
0b71f96
refactor(backend): remove local RAG mode and use knowledge_runtime ex…
sunnights Apr 21, 2026
1de10be
refactor(backend): remove direct knowledge_engine dependency
sunnights Apr 21, 2026
bf2053a
refactor(backend,chat-shell): remove unused llama-index dependencies
sunnights Apr 21, 2026
2205a1a
refactor(backend): 移除对 knowledge_engine 的直接依赖
sunnights Apr 21, 2026
88f36d5
fix: 修复多个服务的配置和错误处理问题
sunnights Apr 21, 2026
c9a47b7
fix(knowledge-runtime): 修复 Content-Disposition 解析和 venv 创建问题
sunnights Apr 21, 2026
6598317
fix(test): use MagicMock instead of AsyncMock for asyncio.to_thread t…
sunnights Apr 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,32 @@ EXECUTOR_MANAGER_PORT=8001
# Used by executor manager to connect to executor containers in host mode
# DOCKER_HOST_ADDR=host.docker.internal

# =============================================================================
# KNOWLEDGE RUNTIME
# =============================================================================

# Knowledge Runtime port (default: 8200)
# Used by both docker-compose and start.sh
KNOWLEDGE_RUNTIME_PORT=8200

# Backend internal URL for Knowledge Runtime to fetch content
# In docker-compose, this defaults to http://backend:8000
# In local dev, this defaults to http://localhost:8000
# BACKEND_INTERNAL_URL=http://backend:8000

# Knowledge Runtime URL for Backend to call
# In docker-compose, this defaults to http://knowledge_runtime:8200
# In local dev, this defaults to http://localhost:8200
# KNOWLEDGE_RUNTIME_URL=http://knowledge_runtime:8200

# Content fetching timeout in seconds (default: 120)
# CONTENT_FETCH_TIMEOUT=120

# Logging configuration (file logging enabled by default)
# LOG_FILE_ENABLED=true
# LOG_DIR=/app/logs
# LOG_LEVEL=INFO

# =============================================================================
# MCP (Model Context Protocol)
# =============================================================================
Expand Down
21 changes: 21 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Wegent is an open-source AI-native operating system for defining, organizing, an
- **Executor**: Task execution engine (Claude Code, Agno, Dify, ImageValidator)
- **Executor Manager**: Task orchestration via Docker
- **Chat Shell**: Lightweight AI chat engine for Chat Shell type (LangGraph + multi-LLM)
- **Knowledge Runtime**: Standalone RAG HTTP service called by Backend (reuses knowledge_engine)
- **Shared**: Common utilities, models, and cryptography

**Core principles:**
Expand Down Expand Up @@ -542,6 +543,25 @@ t('actions.save') // Ambiguous - which namespace?
- `package` - Python package imported by Backend
- `cli` - Command-line interface for interactive chat

### Knowledge Runtime

**Tech:** FastAPI, HTTP client, reuses knowledge_engine

**Purpose:** Standalone HTTP service for RAG operations. Backend calls this service instead of using knowledge_engine directly.

**Endpoints:**
- `GET /internal/rag/health` - Health check
- `GET /internal/rag/storage-types` - List available storage types
- `POST /internal/rag/index` - Index document
- `POST /internal/rag/query` - Query documents
- `POST /internal/rag/delete-document-index` - Delete document index
- `POST /internal/rag/purge-knowledge-index` - Purge knowledge base
- `POST /internal/rag/drop-knowledge-index` - Drop physical index
- `POST /internal/rag/all-chunks` - List all chunks
- `POST /internal/rag/test-connection` - Test storage connection

Comment thread
coderabbitai[bot] marked this conversation as resolved.
**Port:** 8200 (default)


---

Expand Down Expand Up @@ -579,6 +599,7 @@ cd backend && uv run pytest
cd executor && uv run pytest
cd executor_manager && uv run pytest
cd chat_shell && uv run pytest
cd knowledge_runtime && uv run pytest
cd shared && uv run pytest
cd frontend && npm test

Expand Down
6 changes: 6 additions & 0 deletions backend/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,9 @@ MEMORY_WRITE_TIMEOUT_SECONDS=10.0
MEMORY_CONTEXT_MESSAGES=3

EXECUTOR_LATEST_VERSION=1.0.0

# Internal Service Token for service-to-service authentication
# Used by: Backend -> knowledge_runtime, Backend -> chat_shell,
# chat_shell/knowledge_runtime -> Backend internal API
# Generate using: openssl rand -hex 32
# INTERNAL_SERVICE_TOKEN=your-secure-token-here
2 changes: 1 addition & 1 deletion backend/app/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
)
from app.core.config import settings

# RAG module is heavy (llama_index, scipy, pandas, grpc) - skip in standalone mode
# RAG requires knowledge_runtime service - skip in standalone mode
if not settings.STANDALONE_MODE:
from app.api.endpoints import rag

Expand Down
72 changes: 38 additions & 34 deletions backend/app/api/endpoints/adapter/retrievers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,11 @@
from app.schemas.kind import Retriever
from app.services.adapters.retriever_kinds import retriever_kinds_service
from app.services.rag.gateway_factory import get_query_gateway
from app.services.rag.local_gateway import LocalRagGateway
from app.services.rag.remote_gateway import RemoteRagGatewayError
from app.services.rag.remote_gateway import RemoteRagGateway, RemoteRagGatewayError
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Use the gateway factory and map remote storage-type failures.

Directly constructing RemoteRagGateway() bypasses the singleton/factory path already used by test_retriever_connection. Also, get_storage_types() failures currently bubble as generic 500s; map RemoteRagGatewayError to a clear 503.

Proposed update
-from app.services.rag.remote_gateway import RemoteRagGateway, RemoteRagGatewayError
+from app.services.rag.remote_gateway import RemoteRagGatewayError
@@
 def _check_rag_available():
@@
         )
+
+
+async def _get_storage_types_or_503():
+    """Fetch storage types from knowledge_runtime or map failures to 503."""
+    try:
+        return await get_query_gateway().get_storage_types()
+    except RemoteRagGatewayError as exc:
+        logger.warning(
+            "Failed to fetch storage types from knowledge_runtime",
+            extra={"error_type": type(exc).__name__},
+        )
+        raise HTTPException(
+            status_code=503,
+            detail="RAG knowledge runtime is unavailable",
+        ) from exc
@@
-    gateway = RemoteRagGateway()
-    response = await gateway.get_storage_types()
+    response = await _get_storage_types_or_503()
@@
-    gateway = RemoteRagGateway()
-    response = await gateway.get_storage_types()
+    response = await _get_storage_types_or_503()

Also applies to: 58-59, 93-94

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/app/api/endpoints/adapter/retrievers.py` at line 18, Replace direct
instantiation of RemoteRagGateway() with the existing gateway factory used by
test_retriever_connection (use the same factory call rather than new
RemoteRagGateway()), and wrap calls to get_storage_types() (and any other remote
gateway calls at the three locations noted) in try/except that catches
RemoteRagGatewayError and maps it to a 503 response (e.g., raise/return an HTTP
503 with a clear message) instead of allowing a generic 500; reference
RemoteRagGateway, RemoteRagGatewayError and get_storage_types to locate the
changes.

from app.services.rag.runtime_specs import ConnectionTestRuntimeSpec
from knowledge_engine.storage.factory import (
create_storage_backend_from_config,
get_all_storage_retrieval_methods,
get_supported_retrieval_methods,
get_supported_storage_types,
)
from shared.models import RuntimeRetrieverConfig

# RAG module is heavy (llama_index, scipy, pandas, grpc) - skip in standalone mode
# RAG requires knowledge_runtime service - skip in standalone mode

router = APIRouter()
logger = logging.getLogger(__name__)
Expand All @@ -43,7 +36,7 @@ def _check_rag_available():

# Static routes must be defined before dynamic routes to avoid conflicts
@router.get("/storage-types/retrieval-methods")
def get_storage_retrieval_methods():
async def get_storage_retrieval_methods():
"""
Get supported retrieval methods for all storage types.

Expand All @@ -61,14 +54,25 @@ def get_storage_retrieval_methods():
}
"""
_check_rag_available()

gateway = RemoteRagGateway()
response = await gateway.get_storage_types()

# Convert StorageTypesResponse to the expected format
data = {
type_info.type: type_info.retrieval_methods
for type_info in response.storage_types
}
storage_types = [type_info.type for type_info in response.storage_types]

return {
"data": get_all_storage_retrieval_methods(),
"storage_types": get_supported_storage_types(),
"data": data,
"storage_types": storage_types,
}


@router.get("/storage-types/{storage_type}/retrieval-methods")
def get_storage_type_retrieval_methods(storage_type: str):
async def get_storage_type_retrieval_methods(storage_type: str):
"""
Get supported retrieval methods for a specific storage type.

Expand All @@ -86,14 +90,22 @@ def get_storage_type_retrieval_methods(storage_type: str):
"""
_check_rag_available()

try:
methods = get_supported_retrieval_methods(storage_type)
return {
"storage_type": storage_type,
"retrieval_methods": methods,
}
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
gateway = RemoteRagGateway()
response = await gateway.get_storage_types()

# Find the storage type in the response (case-insensitive comparison)
for type_info in response.storage_types:
if type_info.type.casefold() == storage_type.casefold():
return {
"storage_type": type_info.type, # Return normalized form from registry
"retrieval_methods": type_info.retrieval_methods,
}

raise HTTPException(
status_code=400,
detail=f"Unsupported storage type: {storage_type}. "
f"Supported types: {[t.type for t in response.storage_types]}",
)


@router.get("")
Expand Down Expand Up @@ -259,15 +271,6 @@ async def test_retriever_connection(
}

try:
create_storage_backend_from_config(
storage_type=storage_type,
url=url,
username=username,
password=password,
api_key=api_key,
index_strategy={"mode": "per_dataset"},
ext={},
)
runtime_spec = ConnectionTestRuntimeSpec(
retriever_config=RuntimeRetrieverConfig(
name="connection-test",
Expand All @@ -284,14 +287,15 @@ async def test_retriever_connection(
)
)
gateway = get_query_gateway()
try:
return await gateway.test_connection(runtime_spec)
except RemoteRagGatewayError:
return await LocalRagGateway().test_connection(runtime_spec)
return await gateway.test_connection(runtime_spec)

except ValueError as e:
return {"success": False, "message": str(e)}

except RemoteRagGatewayError as e:
logger.error(f"Retriever connection test failed: {str(e)}")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify Ruff no longer reports RUF010 for this file.
uv run ruff check backend/app/api/endpoints/adapter/retrievers.py --select RUF010

Repository: wecode-ai/Wegent

Length of output: 102


🏁 Script executed:

# First, examine the specific file and line
cat -n backend/app/api/endpoints/adapter/retrievers.py | sed -n '290,305p'

Repository: wecode-ai/Wegent

Length of output: 618


🏁 Script executed:

# Check if ruff is available as a direct command
which ruff && ruff --version

Repository: wecode-ai/Wegent

Length of output: 105


🏁 Script executed:

# If ruff is available, check for RUF010 violations in the file
ruff check backend/app/api/endpoints/adapter/retrievers.py --select RUF010 2>&1 || echo "Command failed or no violations found"

Repository: wecode-ai/Wegent

Length of output: 1868


🏁 Script executed:

# Get more context around line 301 to understand the fix needed
cat -n backend/app/api/endpoints/adapter/retrievers.py | sed -n '299,302p'

Repository: wecode-ai/Wegent

Length of output: 252


Fix the Ruff RUF010 logging warnings.

Use lazy logging for logger calls instead of f-string interpolation. Line 301 requires a different approach since it's not a logger call.

Proposed updates
     except RemoteRagGatewayError as e:
-        logger.error(f"Retriever connection test failed: {str(e)}")
+        logger.error("Retriever connection test failed: %s", e)
         return {"success": False, "message": str(e)}
 
     except Exception as e:
-        logger.error(f"Retriever connection test failed: {str(e)}")
-        return {"success": False, "message": f"Connection failed: {str(e)}"}
+        logger.error("Retriever connection test failed: %s", e)
+        return {"success": False, "message": f"Connection failed: {e!s}"}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
logger.error(f"Retriever connection test failed: {str(e)}")
except RemoteRagGatewayError as e:
logger.error("Retriever connection test failed: %s", e)
return {"success": False, "message": str(e)}
except Exception as e:
logger.error("Retriever connection test failed: %s", e)
return {"success": False, "message": f"Connection failed: {e!s}"}
🧰 Tools
🪛 Ruff (0.15.10)

[warning] 296-296: Use explicit conversion flag

Replace with conversion flag

(RUF010)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/app/api/endpoints/adapter/retrievers.py` at line 296, Replace
f-string interpolation inside logger calls with lazy logging: change the call
using logger.error(f"Retriever connection test failed: {str(e)}") to
logger.error("Retriever connection test failed: %s", e). For the non-logger
usage mentioned (the line 301 case), avoid f-strings by building the message
into a variable first (e.g., msg = "Retriever connection test failed:
{}".format(e) or msg = "Retriever connection test failed: " + str(e)) and then
use that variable where needed (or pass it to raise/return) so no f-string is
used outside logger calls; target the logger.error call and the surrounding code
that references e.

return {"success": False, "message": str(e)}

except Exception as e:
logger.error(f"Retriever connection test failed: {str(e)}")
return {"success": False, "message": f"Connection failed: {str(e)}"}
2 changes: 1 addition & 1 deletion backend/app/api/endpoints/internal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from .workspace_archives import router as workspace_archives_router

# RAG router is conditionally imported based on STANDALONE_MODE
# RAG module is heavy (llama_index, scipy, pandas, grpc) - skip in standalone mode
# RAG requires knowledge_runtime service - skip in standalone mode
if not settings.STANDALONE_MODE:
from .rag import router as rag_router

Expand Down
49 changes: 15 additions & 34 deletions backend/app/api/endpoints/internal/rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,12 @@
from app.services.knowledge.retrieval_persistence import (
retrieval_persistence_service,
)
from app.services.rag.gateway_factory import get_query_gateway
from app.services.rag.local_gateway import LocalRagGateway
from app.services.rag.remote_gateway import (
RemoteRagGateway,
RemoteRagGatewayError,
should_fallback_to_local,
from app.services.rag.gateway_factory import (
get_delete_gateway,
get_list_chunks_gateway,
get_query_gateway,
)
from app.services.rag.remote_gateway import RemoteRagGatewayError
from app.services.rag.retrieval_service import RetrievalService
from app.services.rag.runtime_resolver import RagRuntimeResolver
from shared.models import (
Expand Down Expand Up @@ -198,13 +197,6 @@ def _resolve_document_names(
)


def _resolve_query_gateway(runtime_spec):
route_mode = getattr(runtime_spec, "route_mode", "auto")
if route_mode == "rag_retrieval":
return get_query_gateway()
return LocalRagGateway()


def _finalize_query_runtime_spec(
runtime_spec,
db: Session,
Expand Down Expand Up @@ -240,12 +232,11 @@ def _finalize_query_runtime_spec(
return runtime_spec.model_copy(update={"route_mode": resolved_route_mode})


async def _execute_query_with_remote_fallback(runtime_spec, db: Session):
rag_gateway = _resolve_query_gateway(runtime_spec)
if (
isinstance(rag_gateway, RemoteRagGateway)
and getattr(runtime_spec, "route_mode", None) == "rag_retrieval"
and not getattr(runtime_spec, "knowledge_base_configs", None)
async def _execute_query(runtime_spec, db: Session):
"""Execute query using the gateway."""
rag_gateway = get_query_gateway()
if getattr(runtime_spec, "route_mode", None) == "rag_retrieval" and not getattr(
runtime_spec, "knowledge_base_configs", None
):
runtime_spec = runtime_spec.model_copy(
update={
Expand All @@ -257,17 +248,7 @@ async def _execute_query_with_remote_fallback(runtime_spec, db: Session):
)
}
)
try:
return await rag_gateway.query(runtime_spec, db=db)
except RemoteRagGatewayError as exc:
if not should_fallback_to_local(exc):
raise
logger.warning(
"[internal_rag] Remote query failed for KBs %s, falling back to local gateway: %s",
getattr(runtime_spec, "knowledge_base_ids", []),
exc,
)
return await LocalRagGateway().query(runtime_spec, db=db)
return await rag_gateway.query(runtime_spec, db=db)


@router.post(
Expand Down Expand Up @@ -351,7 +332,7 @@ async def internal_retrieve(
restricted_mode=restricted_mode,
)
runtime_spec = _finalize_query_runtime_spec(runtime_spec, db, runtime_context)
result = await _execute_query_with_remote_fallback(runtime_spec, db)
result = await _execute_query(runtime_spec, db)

records = result.get("records", [])

Expand Down Expand Up @@ -654,7 +635,7 @@ async def get_all_chunks(
query=request.query,
metadata_condition=request.metadata_condition,
)
result = await LocalRagGateway().list_chunks(
result = await get_list_chunks_gateway().list_chunks(
runtime_spec,
db=db,
)
Expand Down Expand Up @@ -706,7 +687,7 @@ async def purge_knowledge_index(
index_owner_user_id=request.index_owner_user_id,
retriever_config=request.retriever_config.model_dump(mode="python"),
)
return await LocalRagGateway().purge_knowledge_index(runtime_spec, db=db)
return await get_delete_gateway().purge_knowledge_index(runtime_spec, db=db)
except ValueError as e:
logger.warning("[internal_rag] Purge knowledge index error: %s", e)
raise HTTPException(status_code=400, detail=str(e))
Expand All @@ -732,7 +713,7 @@ async def drop_knowledge_index(
index_owner_user_id=request.index_owner_user_id,
retriever_config=request.retriever_config.model_dump(mode="python"),
)
return await LocalRagGateway().drop_knowledge_index(runtime_spec, db=db)
return await get_delete_gateway().drop_knowledge_index(runtime_spec, db=db)
except ValueError as e:
logger.warning("[internal_rag] Drop knowledge index error: %s", e)
raise HTTPException(status_code=400, detail=str(e))
Expand Down
Loading
Loading