From 37dee5451bbbfb9b30dbdf61f607b17a62fbe434 Mon Sep 17 00:00:00 2001 From: davidmezzetti <561939+davidmezzetti@users.noreply.github.com> Date: Thu, 20 Apr 2023 10:06:50 -0400 Subject: [PATCH] Update documentation --- docs/embeddings/configuration/index.md | 11 +++++++---- src/python/txtai/ann/base.py | 3 ++- src/python/txtai/database/base.py | 5 +++-- src/python/txtai/database/factory.py | 6 +++--- src/python/txtai/graph/base.py | 3 ++- src/python/txtai/vectors/base.py | 2 +- 6 files changed, 18 insertions(+), 12 deletions(-) diff --git a/docs/embeddings/configuration/index.md b/docs/embeddings/configuration/index.md index 9e373f195..f55c16051 100644 --- a/docs/embeddings/configuration/index.md +++ b/docs/embeddings/configuration/index.md @@ -106,11 +106,12 @@ Instruction-based models use prefixes to modify how embeddings are computed. Thi ## backend ```yaml -backend: faiss|hnsw|annoy +backend: faiss|hnsw|annoy|custom ``` Approximate Nearest Neighbor (ANN) index backend for storing generated sentence embeddings. `Defaults to faiss`. Additional backends require the -[similarity](../../install/#similarity) extras package to be installed. +[similarity](../../install/#similarity) extras package to be installed. Add custom backends via setting this parameter to the fully resolvable +class string. Backend-specific settings are set with a corresponding configuration object having the same name as the backend (i.e. annoy, faiss, or hnsw). None of these are required and are set to defaults if omitted. @@ -159,10 +160,10 @@ See [Annoy documentation](https://github.com/spotify/annoy#full-python-api) for ## content ```yaml -content: string|boolean +content: boolean|sqlite|duckdb|custom ``` -Enables content storage. When true, the default content storage engine will be used. `Defaults to sqlite`. Otherwise, the string must specify the supported content storage engine to use. +Enables content storage. When true, the default storage engine, `sqlite` will be used. Also supports `duckdb`. Add custom storage engines via setting this parameter to the fully resolvable class string. ## functions ```yaml @@ -205,6 +206,8 @@ graph: Enables graph storage. When set, a graph network is built using the embeddings index. Graph nodes are synced with each embeddings index operation (index/upsert/delete). Graph edges are created using the embeddings index upon completion of each index/upsert/delete embeddings index call. +Add custom graph storage engines via setting the `graph.backend` parameter to the fully resolvable class string. + Defaults are tuned so that in most cases these values don't need to be changed. ### topics diff --git a/src/python/txtai/ann/base.py b/src/python/txtai/ann/base.py index 182278651..b27e99b2f 100644 --- a/src/python/txtai/ann/base.py +++ b/src/python/txtai/ann/base.py @@ -10,7 +10,8 @@ class ANN: """ - Base class for ANN instances. + Base class for ANN instances. This class builds vector indexes to support similarity search. + The built-in ANN backends store ids and vectors. Content storage is supported via database instances. """ def __init__(self, config): diff --git a/src/python/txtai/database/base.py b/src/python/txtai/database/base.py index d619ce8d4..2a4674e61 100644 --- a/src/python/txtai/database/base.py +++ b/src/python/txtai/database/base.py @@ -14,8 +14,9 @@ class Database: """ - Base class for database instances. This class encapsulates a document-oriented database - used for storing key-value content stored as dicts. + Base class for database instances. This class encapsulates a content database used for + storing field content as dicts and objects. The database instance works in conjuction + with a vector index to execute SQL-driven similarity search. """ def __init__(self, config): diff --git a/src/python/txtai/database/factory.py b/src/python/txtai/database/factory.py index 7ac92d827..ae680829f 100644 --- a/src/python/txtai/database/factory.py +++ b/src/python/txtai/database/factory.py @@ -36,10 +36,10 @@ def create(config): content = "sqlite" # Create document database instance - if content == "sqlite": - database = SQLite(config) - elif content == "duckdb": + if content == "duckdb": database = DuckDB(config) + elif content == "sqlite": + database = SQLite(config) elif content: database = DatabaseFactory.resolve(content, config) diff --git a/src/python/txtai/graph/base.py b/src/python/txtai/graph/base.py index 28a4ef934..d99b39c94 100644 --- a/src/python/txtai/graph/base.py +++ b/src/python/txtai/graph/base.py @@ -18,7 +18,8 @@ # pylint: disable=R0904 class Graph: """ - Base class for Graph instances. + Base class for Graph instances. This class builds graph networks. Supports topic modeling + and relationship traversal. """ def __init__(self, config): diff --git a/src/python/txtai/vectors/base.py b/src/python/txtai/vectors/base.py index dbbf2f5e7..0caf8c122 100644 --- a/src/python/txtai/vectors/base.py +++ b/src/python/txtai/vectors/base.py @@ -12,7 +12,7 @@ class Vectors: """ - Base class for sentence embeddings/vector models. + Base class for sentence embeddings/vector models. Vector models transform input content into numeric vectors. """ def __init__(self, config, scoring):