Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -237,3 +237,8 @@ outputs
evaluation/data/
test_add_pipeline.py
test_file_pipeline.py

# LanceDB local storage and scripts
data/
inspect_lancedb.py
memos_server.log
15 changes: 13 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ memos = "memos.cli:main"
tree-mem = [
"neo4j (>=5.28.1,<6.0.0)", # Graph database
"schedule (>=1.2.2,<2.0.0)", # Task scheduling
"lancedb (>=0.30.1,<1.0.0)", # LanceDB
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lancedb is declared with two different minimum versions across extras (tree-mem requires >=0.30.1 while lance-mem/all allow >=0.17.0). Mixed constraints like this can lead to confusing resolver behavior and makes it unclear which version the code is targeting. It would be better to align these constraints (and include any required companion deps like tantivy in the same extra if FTS is considered part of the feature set).

Suggested change
"lancedb (>=0.30.1,<1.0.0)", # LanceDB
"lancedb (>=0.17.0,<1.0.0)", # LanceDB

Copilot uses AI. Check for mistakes.
]

# MemScheduler
Expand Down Expand Up @@ -102,6 +103,13 @@ skill-mem = [
"alibabacloud-oss-v2 (>=1.2.2,<1.2.3)",
]

# Lance Vector DB
lance-mem = [
"lancedb (>=0.17.0,<1.0.0)", # Lance vector database
"pyarrow (>=18.0.0,<20.0.0)", # Arrow format support for Lance
"tantivy (>=0.22.0,<1.0.0)", # FTS engine for LanceDB
]

# Tavily Search
tavily = [
"tavily-python (>=0.5.0,<1.0.0)",
Expand Down Expand Up @@ -135,6 +143,9 @@ all = [
"rake-nltk (>=1.0.6,<1.1.0)",
"alibabacloud-oss-v2 (>=1.2.2,<1.2.3)",
"tavily-python (>=0.5.0,<1.0.0)",
"lancedb (>=0.17.0,<1.0.0)",
"pyarrow (>=18.0.0,<20.0.0)",
"tantivy (>=0.22.0,<1.0.0)",

# Uncategorized dependencies
]
Expand Down Expand Up @@ -199,8 +210,8 @@ langgraph = "^0.5.1"
pymysql = "^1.1.2"

[[tool.poetry.source]]
name = "mirrors"
url = "https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple/"
name = "volces"
url = "https://mirrors.volces.com/pypi/simple/"
priority = "supplemental"


Expand Down
14 changes: 14 additions & 0 deletions src/memos/api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1049,6 +1049,16 @@ def get_start_default_config() -> dict[str, Any]:

return config

@staticmethod
def get_lance_graph_config(user_id: str | None = None) -> dict[str, Any]:
"""Get LanceDB graph configuration."""
base_uri = os.getenv("LANCE_URI", "./data/lance_db")
return {
"uri": base_uri,
"user_name": user_id,
"embedding_dimension": int(os.getenv("EMBEDDING_DIMENSION", 2048)),
}

@staticmethod
def create_user_config(user_name: str, user_id: str) -> tuple["MOSConfig", "GeneralMemCube"]:
"""Create configuration for a specific user."""
Expand Down Expand Up @@ -1126,6 +1136,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple["MOSConfig", "Gene
neo4j_community_config = APIConfig.get_neo4j_community_config(user_id)
neo4j_config = APIConfig.get_neo4j_config(user_id)
polardb_config = APIConfig.get_polardb_config(user_id)
lance_config = APIConfig.get_lance_graph_config(user_id)
internet_config = (
APIConfig.get_internet_config()
if os.getenv("ENABLE_INTERNET", "false").lower() == "true"
Expand All @@ -1137,6 +1148,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple["MOSConfig", "Gene
"neo4j": neo4j_config,
"polardb": polardb_config,
"postgres": postgres_config,
"lance": lance_config,
}
# Support both GRAPH_DB_BACKEND and legacy NEO4J_BACKEND env vars
graph_db_backend = os.getenv(
Expand Down Expand Up @@ -1210,11 +1222,13 @@ def get_default_cube_config() -> "GeneralMemCubeConfig | None":
neo4j_config = APIConfig.get_neo4j_config(user_id="default")
polardb_config = APIConfig.get_polardb_config(user_id="default")
postgres_config = APIConfig.get_postgres_config(user_id="default")
lance_config = APIConfig.get_lance_graph_config(user_id="default")
graph_db_backend_map = {
"neo4j-community": neo4j_community_config,
"neo4j": neo4j_config,
"polardb": polardb_config,
"postgres": postgres_config,
"lance": lance_config,
}
internet_config = (
APIConfig.get_internet_config()
Expand Down
22 changes: 20 additions & 2 deletions src/memos/api/handlers/config_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def build_graph_db_config(user_id: str = "default") -> dict[str, Any]:
"neo4j": APIConfig.get_neo4j_config(user_id=user_id),
"polardb": APIConfig.get_polardb_config(user_id=user_id),
"postgres": APIConfig.get_postgres_config(user_id=user_id),
"lance": APIConfig.get_lance_graph_config(user_id=user_id),
}

# Support both GRAPH_DB_BACKEND and legacy NEO4J_BACKEND env vars
Expand All @@ -62,10 +63,27 @@ def build_vec_db_config() -> dict[str, Any]:
Returns:
Validated vector database configuration dictionary
"""
vec_db_backend = os.getenv("MOS_VEC_DB_BACKEND", "milvus").lower()

config = {}
if vec_db_backend == "milvus":
config = APIConfig.get_milvus_config()
elif vec_db_backend == "lance":
base_uri = os.getenv("LANCE_URI", "./data/lance_db")
config = {
"uri": base_uri,
"collection_name": ["memories"],
"embedding_dimension": int(os.getenv("EMBEDDING_DIMENSION", 2048)),
}
elif vec_db_backend == "qdrant":
config = APIConfig.get_qdrant_config()
else:
raise ValueError(f"Unsupported vector DB backend: {vec_db_backend}")

return VectorDBConfigFactory.model_validate(
{
"backend": "milvus",
"config": APIConfig.get_milvus_config(),
"backend": vec_db_backend,
"config": config,
}
)
Comment on lines +66 to 88
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

build_vec_db_config() now allows MOS_VEC_DB_BACKEND=lance, but VectorDBConfigFactory currently only accepts backends {qdrant, milvus}. As written, VectorDBConfigFactory.model_validate({"backend": "lance", ...}) will raise a validation error at runtime. Additionally, the config keys here (embedding_dimension) don’t match the existing vec-db config schema (vector_dimension), so even adding the backend mapping would still fail validation unless a Lance vec-db config class is introduced.

Copilot uses AI. Check for mistakes.

Expand Down
34 changes: 34 additions & 0 deletions src/memos/api/handlers/search_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

import copy
import logging
import math

from typing import Any
Expand Down Expand Up @@ -71,7 +72,40 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse
results = cube_view.search_memories(search_req_local)
if not search_req_local.relativity:
search_req_local.relativity = 0

self.logger.info(f"[SearchHandler] Relativity filter: {search_req_local.relativity}")

# Extract and log scores for visibility before filtering
if self.logger.isEnabledFor(logging.DEBUG):
score_details = []
for key in ("text_mem", "pref_mem"):
buckets = results.get(key)
if not isinstance(buckets, list):
continue
for bucket in buckets:
memories = bucket.get("memories")
if not isinstance(memories, list):
continue
for mem in memories:
if not isinstance(mem, dict):
continue
mem_text = mem.get("memory", "").replace("\n", " ")
# Truncate to 100 chars to avoid log flooding
if len(mem_text) > 100:
mem_text = mem_text[:100] + "..."
meta = mem.get("metadata", {})
score = meta.get("relativity", 1.0) if isinstance(meta, dict) else 1.0
try:
score_val = float(score) if score is not None else 1.0
except (TypeError, ValueError):
score_val = 1.0
score_details.append(f"[{score_val:.4f}] {mem_text}")

if score_details:
self.logger.debug(
f"[SearchHandler] Reranker scores before threshold ({search_req_local.relativity}): \n"
Comment on lines +78 to +106
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new DEBUG logging includes snippets of the raw memory text in server logs. Even gated behind DEBUG, this can leak sensitive user content into log files and observability pipelines. Consider logging only IDs and scores (or a hash/redacted preview) and/or guarding this behind an explicit config flag intended for secure debugging.

Suggested change
# Extract and log scores for visibility before filtering
if self.logger.isEnabledFor(logging.DEBUG):
score_details = []
for key in ("text_mem", "pref_mem"):
buckets = results.get(key)
if not isinstance(buckets, list):
continue
for bucket in buckets:
memories = bucket.get("memories")
if not isinstance(memories, list):
continue
for mem in memories:
if not isinstance(mem, dict):
continue
mem_text = mem.get("memory", "").replace("\n", " ")
# Truncate to 100 chars to avoid log flooding
if len(mem_text) > 100:
mem_text = mem_text[:100] + "..."
meta = mem.get("metadata", {})
score = meta.get("relativity", 1.0) if isinstance(meta, dict) else 1.0
try:
score_val = float(score) if score is not None else 1.0
except (TypeError, ValueError):
score_val = 1.0
score_details.append(f"[{score_val:.4f}] {mem_text}")
if score_details:
self.logger.debug(
f"[SearchHandler] Reranker scores before threshold ({search_req_local.relativity}): \n"
# Extract and log scores for visibility before filtering without logging raw memory text
if self.logger.isEnabledFor(logging.DEBUG):
score_details = []
for key in ("text_mem", "pref_mem"):
buckets = results.get(key)
if not isinstance(buckets, list):
continue
for bucket_index, bucket in enumerate(buckets):
memories = bucket.get("memories")
if not isinstance(memories, list):
continue
for mem_index, mem in enumerate(memories):
if not isinstance(mem, dict):
continue
meta = mem.get("metadata", {})
score = meta.get("relativity", 1.0) if isinstance(meta, dict) else 1.0
try:
score_val = float(score) if score is not None else 1.0
except (TypeError, ValueError):
score_val = 1.0
mem_id = mem.get("id") or mem.get("memory_id")
if mem_id is None and isinstance(meta, dict):
mem_id = meta.get("id") or meta.get("memory_id")
mem_ref = (
f"id={mem_id}"
if mem_id is not None
else f"bucket={bucket_index},index={mem_index}"
)
score_details.append(f"[{score_val:.4f}] {key} {mem_ref}")
if score_details:
self.logger.debug(
f"[SearchHandler] Reranker scores before threshold ({search_req_local.relativity}):\n"

Copilot uses AI. Check for mistakes.
+ "\n".join(score_details)
)
results = self._apply_relativity_threshold(results, search_req_local.relativity)

if search_req_local.dedup == "sim":
Expand Down
23 changes: 23 additions & 0 deletions src/memos/configs/graph_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,28 @@ def validate_config(self):
return self


class LanceGraphDBConfig(BaseConfig):
"""
LanceDB-specific configuration.
"""

uri: str = Field(..., description="The URI/path to the LanceDB dataset")
user_name: str | None = Field(
default=None,
description="Logical user or tenant ID for data isolation",
)
embedding_dimension: int = Field(default=768, description="Dimension of vector embedding")
compaction_version_threshold: int = Field(
default=500, description="Number of new versions to accumulate before triggering compaction"
)
compaction_interval_mins: int = Field(
default=30, description="Fallback interval in minutes to check and run compaction"
)
cleanup_older_than_days: int = Field(
default=7, description="Number of days to keep old versions before pruning"
)


class GraphDBConfigFactory(BaseModel):
backend: str = Field(..., description="Backend for graph database")
config: dict[str, Any] = Field(..., description="Configuration for the graph database backend")
Expand All @@ -250,6 +272,7 @@ class GraphDBConfigFactory(BaseModel):
"neo4j-community": Neo4jCommunityGraphDBConfig,
"polardb": PolarDBGraphDBConfig,
"postgres": PostgresGraphDBConfig,
"lance": LanceGraphDBConfig,
}

@field_validator("backend")
Expand Down
4 changes: 4 additions & 0 deletions src/memos/graph_dbs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,3 +302,7 @@ def add_nodes_batch(self, nodes: list[dict[str, Any]], user_name: str | None = N
- metadata: dict[str, Any] - Node metadata
user_name: Optional user name (will use config default if not provided)
"""

@abstractmethod
def node_not_exist(self, scope: str, user_name: str | None = None) -> bool:
pass
Comment on lines +306 to +308
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding the new abstract method node_not_exist() makes every BaseGraphDB subclass abstract until it implements it. At least PostgresGraphDB and Neo4jCommunityGraphDB currently have no node_not_exist, so instantiating those backends will raise TypeError: Can't instantiate abstract class .... Either provide a non-abstract default implementation here, or update all graph DB backends to implement it with consistent boolean semantics (some existing implementations currently return ints).

Suggested change
@abstractmethod
def node_not_exist(self, scope: str, user_name: str | None = None) -> bool:
pass
def node_not_exist(self, scope: str, user_name: str | None = None) -> bool:
"""
Return True when the given scope has no memory items, otherwise False.
This default implementation derives the result from ``get_all_memory_items``.
The ``user_name`` argument is accepted for API compatibility but is not used
here because the base retrieval API is scoped only by ``scope``. Backends
that need user-specific existence checks can override this method.
"""
return len(self.get_all_memory_items(scope)) == 0

Copilot uses AI. Check for mistakes.
2 changes: 2 additions & 0 deletions src/memos/graph_dbs/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from memos.configs.graph_db import GraphDBConfigFactory
from memos.graph_dbs.base import BaseGraphDB
from memos.graph_dbs.lance import LanceGraphDB
from memos.graph_dbs.neo4j import Neo4jGraphDB
from memos.graph_dbs.neo4j_community import Neo4jCommunityGraphDB
from memos.graph_dbs.polardb import PolarDBGraphDB
Expand All @@ -16,6 +17,7 @@ class GraphStoreFactory(BaseGraphDB):
"neo4j-community": Neo4jCommunityGraphDB,
"polardb": PolarDBGraphDB,
"postgres": PostgresGraphDB,
"lance": LanceGraphDB,
}

@classmethod
Expand Down
Loading
Loading