Skip to content
47 changes: 43 additions & 4 deletions langchain_postgres/v2/async_vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,19 +400,58 @@ async def aadd_documents(
async def adelete(
self,
ids: Optional[list] = None,
filter: Optional[dict] = None,
**kwargs: Any,
) -> Optional[bool]:
"""Delete records from the table.

Args:
ids: List of document IDs to delete.
filter: Metadata filter dictionary for bulk deletion.
Supports the same filter syntax as similarity_search.
Note: Filters only work on fields defined in metadata_columns,
not on fields stored in the metadata_json_column.

Returns:
True if deletion was successful, False if no criteria provided.

Raises:
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.

Examples:
Delete by IDs:
await vectorstore.adelete(ids=["id1", "id2"])

Delete by metadata filter (requires metadata_columns):
await vectorstore.adelete(filter={"source": "documentation"})
await vectorstore.adelete(filter={"$and": [{"category": "obsolete"}, {"year": {"$lt": 2020}}]})

Delete by both IDs and filter (must match both criteria):
await vectorstore.adelete(ids=["id1", "id2"], filter={"status": "archived"})
"""
if not ids:
if not ids and not filter:
return False

placeholders = ", ".join(f":id_{i}" for i in range(len(ids)))
param_dict = {f"id_{i}": id for i, id in enumerate(ids)}
query = f'DELETE FROM "{self.schema_name}"."{self.table_name}" WHERE {self.id_column} in ({placeholders})'
where_clauses = []
param_dict = {}

# Handle ID-based deletion
if ids:
placeholders = ", ".join(f":id_{i}" for i in range(len(ids)))
id_params = {f"id_{i}": id for i, id in enumerate(ids)}
param_dict.update(id_params)
where_clauses.append(f"{self.id_column} in ({placeholders})")

# Handle filter-based deletion
if filter:
filter_clause, filter_params = self._create_filter_clause(filter)
param_dict.update(filter_params)
where_clauses.append(filter_clause)

# Combine WHERE clauses with AND if both are present
where_clause = " AND ".join(where_clauses)
query = f'DELETE FROM "{self.schema_name}"."{self.table_name}" WHERE {where_clause}'

async with self.engine.connect() as conn:
await conn.execute(text(query), param_dict)
await conn.commit()
Expand Down
48 changes: 46 additions & 2 deletions langchain_postgres/v2/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,26 +266,70 @@ def add_documents(
async def adelete(
self,
ids: Optional[list] = None,
filter: Optional[dict] = None,
**kwargs: Any,
) -> Optional[bool]:
"""Delete records from the table.

Args:
ids: List of document IDs to delete.
filter: Metadata filter dictionary for bulk deletion.
Supports the same filter syntax as similarity_search.
Note: Filters only work on fields defined in metadata_columns,
not on fields stored in the metadata_json_column.

Returns:
True if deletion was successful, False if no criteria provided.

Raises:
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.

Examples:
Delete by IDs:
await vectorstore.adelete(ids=["id1", "id2"])

Delete by metadata filter (requires metadata_columns):
await vectorstore.adelete(filter={"source": "documentation"})
await vectorstore.adelete(filter={"$and": [{"category": "obsolete"}, {"year": {"$lt": 2020}}]})

Delete by both IDs and filter (must match both criteria):
await vectorstore.adelete(ids=["id1", "id2"], filter={"status": "archived"})
"""
return await self._engine._run_as_async(self.__vs.adelete(ids, **kwargs))
return await self._engine._run_as_async(self.__vs.adelete(ids, filter=filter, **kwargs))

def delete(
self,
ids: Optional[list] = None,
filter: Optional[dict] = None,
**kwargs: Any,
) -> Optional[bool]:
"""Delete records from the table.

Args:
ids: List of document IDs to delete.
filter: Metadata filter dictionary for bulk deletion.
Supports the same filter syntax as similarity_search.
Note: Filters only work on fields defined in metadata_columns,
not on fields stored in the metadata_json_column.

Returns:
True if deletion was successful, False if no criteria provided.

Raises:
:class:`InvalidTextRepresentationError <asyncpg.exceptions.InvalidTextRepresentationError>`: if the `ids` data type does not match that of the `id_column`.

Examples:
Delete by IDs:
vectorstore.delete(ids=["id1", "id2"])

Delete by metadata filter (requires metadata_columns):
vectorstore.delete(filter={"source": "documentation"})
vectorstore.delete(filter={"$and": [{"category": "obsolete"}, {"year": {"$lt": 2020}}]})

Delete by both IDs and filter (must match both criteria):
vectorstore.delete(ids=["id1", "id2"], filter={"status": "archived"})
"""
return self._engine._run_as_sync(self.__vs.adelete(ids, **kwargs))
return self._engine._run_as_sync(self.__vs.adelete(ids, filter=filter, **kwargs))

@classmethod
async def afrom_texts( # type: ignore[override]
Expand Down
212 changes: 212 additions & 0 deletions tests/unit_tests/v2/test_async_pg_vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,218 @@ async def test_adelete(self, engine: PGEngine, vs: AsyncPGVectorStore) -> None:
assert not result
await aexecute(engine, f'TRUNCATE TABLE "{DEFAULT_TABLE}"')

async def test_adelete_with_filter(self, engine: PGEngine) -> None:
"""Test deletion by metadata filter."""
# Create a vectorstore with metadata columns for filtering
test_table = "test_delete_filter" + str(uuid.uuid4())
await engine._ainit_vectorstore_table(
test_table,
VECTOR_SIZE,
metadata_columns=[
Column("source", "TEXT"),
Column("category", "TEXT"),
],
store_metadata=False,
)
vs_filter = await AsyncPGVectorStore.create(
engine,
embedding_service=embeddings_service,
table_name=test_table,
metadata_columns=["source", "category"],
)

# Add texts with different metadata
test_metadatas = [
{"source": "postgres", "category": "docs"},
{"source": "web", "category": "docs"},
{"source": "postgres", "category": "blog"},
]
ids = [str(uuid.uuid4()) for i in range(len(texts))]
await vs_filter.aadd_texts(texts, metadatas=test_metadatas, ids=ids)
results = await afetch(engine, f'SELECT * FROM "{test_table}"')
assert len(results) == 3

# Delete all documents with source="postgres"
await vs_filter.adelete(filter={"source": "postgres"})
results = await afetch(engine, f'SELECT * FROM "{test_table}"')
assert len(results) == 1
# The remaining document should have source="web"
assert results[0]["source"] == "web"
await aexecute(engine, f'DROP TABLE "{test_table}"')

async def test_adelete_with_filter_and_operator(
self, engine: PGEngine
) -> None:
"""Test deletion with filter using operators."""
# Create a vectorstore with metadata columns for filtering
test_table = "test_delete_operator" + str(uuid.uuid4())
await engine._ainit_vectorstore_table(
test_table,
VECTOR_SIZE,
metadata_columns=[
Column("source", "TEXT"),
Column("year", "INTEGER"),
],
store_metadata=False,
)
vs_filter = await AsyncPGVectorStore.create(
engine,
embedding_service=embeddings_service,
table_name=test_table,
metadata_columns=["source", "year"],
)

# Add texts with different metadata including numeric values
test_metadatas = [
{"source": "postgres", "year": 2020},
{"source": "web", "year": 2021},
{"source": "postgres", "year": 2022},
]
ids = [str(uuid.uuid4()) for i in range(len(texts))]
await vs_filter.aadd_texts(texts, metadatas=test_metadatas, ids=ids)
results = await afetch(engine, f'SELECT * FROM "{test_table}"')
assert len(results) == 3

# Delete all documents with year < 2022
await vs_filter.adelete(filter={"year": {"$lt": 2022}})
results = await afetch(engine, f'SELECT * FROM "{test_table}"')
assert len(results) == 1
# The remaining document should have year=2022
assert results[0]["year"] == 2022
await aexecute(engine, f'DROP TABLE "{test_table}"')

async def test_adelete_with_complex_filter(
self, engine: PGEngine
) -> None:
"""Test deletion with complex filter using $and."""
# Create a vectorstore with metadata columns for filtering
test_table = "test_delete_complex" + str(uuid.uuid4())
await engine._ainit_vectorstore_table(
test_table,
VECTOR_SIZE,
metadata_columns=[
Column("source", "TEXT"),
Column("category", "TEXT"),
],
store_metadata=False,
)
vs_filter = await AsyncPGVectorStore.create(
engine,
embedding_service=embeddings_service,
table_name=test_table,
metadata_columns=["source", "category"],
)

# Add texts with different metadata
test_metadatas = [
{"source": "postgres", "category": "obsolete"},
{"source": "web", "category": "obsolete"},
{"source": "postgres", "category": "current"},
]
ids = [str(uuid.uuid4()) for i in range(len(texts))]
await vs_filter.aadd_texts(texts, metadatas=test_metadatas, ids=ids)
results = await afetch(engine, f'SELECT * FROM "{test_table}"')
assert len(results) == 3

# Delete documents with source="postgres" AND category="obsolete"
await vs_filter.adelete(
filter={"$and": [{"source": "postgres"}, {"category": "obsolete"}]}
)
results = await afetch(engine, f'SELECT * FROM "{test_table}"')
assert len(results) == 2
# Should have removed only the first document
remaining_categories = [result["category"] for result in results]
assert "obsolete" in remaining_categories # web/obsolete still exists
assert "current" in remaining_categories # postgres/current still exists
await aexecute(engine, f'DROP TABLE "{test_table}"')

async def test_adelete_with_filter_and_ids(
self, engine: PGEngine
) -> None:
"""Test deletion with both IDs and filter (must match both)."""
# Create a vectorstore with metadata columns for filtering
test_table = "test_delete_ids_filter" + str(uuid.uuid4())
await engine._ainit_vectorstore_table(
test_table,
VECTOR_SIZE,
metadata_columns=[
Column("source", "TEXT"),
],
store_metadata=False,
)
vs_filter = await AsyncPGVectorStore.create(
engine,
embedding_service=embeddings_service,
table_name=test_table,
metadata_columns=["source"],
)

# Add texts with different metadata
test_metadatas = [
{"source": "postgres"},
{"source": "web"},
{"source": "postgres"},
]
ids = [str(uuid.uuid4()) for i in range(len(texts))]
await vs_filter.aadd_texts(texts, metadatas=test_metadatas, ids=ids)
results = await afetch(engine, f'SELECT * FROM "{test_table}"')
assert len(results) == 3

# Try to delete ids[0] and ids[2] but only where source="web"
# This should delete nothing since ids[0] and ids[2] have source="postgres"
# With AND logic, it means id IN (ids) AND source="web"
# So this should only delete if the id is in the list AND source is web
# Since ids[0] and ids[2] are postgres, and ids[1] is web but not in the list,
# nothing should be deleted
await vs_filter.adelete(ids=[ids[0], ids[2]], filter={"source": "web"})
results = await afetch(engine, f'SELECT * FROM "{test_table}"')
assert len(results) == 3 # Nothing deleted

# Now delete ids[0] and ids[1] where source="web"
# This should delete only ids[1] (which has source="web")
await vs_filter.adelete(ids=[ids[0], ids[1]], filter={"source": "web"})
results = await afetch(engine, f'SELECT * FROM "{test_table}"')
assert len(results) == 2
remaining_ids = [str(result["langchain_id"]) for result in results]
assert ids[1] not in remaining_ids # ids[1] was deleted
assert ids[0] in remaining_ids # ids[0] not deleted (wrong source)
assert ids[2] in remaining_ids # ids[2] not deleted (not in id list)
await aexecute(engine, f'DROP TABLE "{test_table}"')

async def test_adelete_with_filter_no_matches(
self, engine: PGEngine
) -> None:
"""Test deletion with filter that matches no documents."""
# Create a vectorstore with metadata columns for filtering
test_table = "test_delete_nomatch" + str(uuid.uuid4())
await engine._ainit_vectorstore_table(
test_table,
VECTOR_SIZE,
metadata_columns=[
Column("source", "TEXT"),
],
store_metadata=False,
)
vs_filter = await AsyncPGVectorStore.create(
engine,
embedding_service=embeddings_service,
table_name=test_table,
metadata_columns=["source"],
)

# Add texts
test_metadatas = [{"source": "postgres"} for _ in range(len(texts))]
ids = [str(uuid.uuid4()) for i in range(len(texts))]
await vs_filter.aadd_texts(texts, metadatas=test_metadatas, ids=ids)
results = await afetch(engine, f'SELECT * FROM "{test_table}"')
assert len(results) == 3

# Try to delete with a filter that matches nothing
await vs_filter.adelete(filter={"source": "nonexistent"})
results = await afetch(engine, f'SELECT * FROM "{test_table}"')
assert len(results) == 3 # Nothing deleted
await aexecute(engine, f'DROP TABLE "{test_table}"')

##### Custom Vector Store #####
async def test_aadd_embeddings(
self, engine: PGEngine, vs_custom: AsyncPGVectorStore
Expand Down
Loading