diff --git a/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.replication 2.java b/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.replication 2.java new file mode 100644 index 0000000000..44102a6348 --- /dev/null +++ b/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.replication 2.java @@ -0,0 +1,247 @@ +// How-to: Manage-Data -> Classes +package io.weaviate.docs; + +import com.google.gson.GsonBuilder; +import io.weaviate.client.Config; +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.base.Result; +import io.weaviate.client.v1.misc.model.ReplicationConfig; +import io.weaviate.client.v1.misc.model.ShardingConfig; +import io.weaviate.client.v1.schema.model.Schema; +import io.weaviate.client.v1.schema.model.WeaviateClass; +import io.weaviate.client.v1.misc.model.BM25Config; +import io.weaviate.client.v1.misc.model.InvertedIndexConfig; +import io.weaviate.client.v1.misc.model.VectorIndexConfig; +import io.weaviate.docs.helper.EnvHelper; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +@Tag("crud") +@Tag("classes") +class ManageDataReplicationTest { + + private static WeaviateClient client; + + @BeforeAll + public static void beforeAll() { + String scheme = EnvHelper.scheme("http"); + String host = EnvHelper.host("localhost"); + String port = EnvHelper.port("8181"); + + Config config = new Config(scheme, host + ":" + port); + client = new WeaviateClient(config); + + Result result = client.schema().allDeleter().run(); + assertThat(result).isNotNull() + .withFailMessage(() -> result.getError().toString()) + .returns(false, Result::hasErrors) + .withFailMessage(null) + .returns(true, Result::getResult); + } + + @Test + public void shouldManageDataClasses() { + + String collectionName = "Article"; + + createArticleWithReplicationConfig(collectionName); + deleteCollections(collectionName); + createArticleWithShardingConfig(collectionName); + updateArticleConfiguration(collectionName); + readAllCollections(); + } + + private void deleteCollections(String className) { + client.schema().classDeleter() + .withClassName(className) + .run(); + } + + private void print(Result result) { + String json = new GsonBuilder().setPrettyPrinting().create().toJson(result.getResult()); + System.out.println(json); + } + + private void readAllCollections() { + Result result = client.schema().getter() + .run(); + + assertThat(result).isNotNull() + .withFailMessage(() -> result.getError().toString()) + .returns(false, Result::hasErrors) + .withFailMessage(null) + .extracting(Result::getResult).isNotNull() + .extracting(Schema::getClasses).asList() + .hasSize(1); + + print(result); + } + + private void createArticleWithReplicationConfig(String collectionName) { + // START AllReplicationSettings + // Configure replication settings + Integer replicationFactor = 3; + Boolean asyncEnabled = true; + + // Create replication configuration + ReplicationConfig replicationConfig = ReplicationConfig.builder() + .factor(replicationFactor) // factor=3 + .asyncEnabled(asyncEnabled) // async_enabled=True + .deletionStrategy(ReplicationConfig.DeletionStrategy.DELETE_ON_CONFLICT) + .build(); + + // Create the Article collection with replication configuration + WeaviateClass articleClass = WeaviateClass.builder() + .className(collectionName) + .description("Article collection with replication configuration") + .replicationConfig(replicationConfig) // Set the replication config + .build(); + + // Add the collection to the schema + Result result = client.schema().classCreator() + .withClass(articleClass) + .run(); + // END AllReplicationSettings + + // Assert the result + assertThat(result).isNotNull() + .withFailMessage(() -> result.getError().toString()) + .returns(false, Result::hasErrors) + .withFailMessage(null) + .returns(true, Result::getResult); + + // Verify the replication configuration was set correctly + Result classResult = client.schema().classGetter() + .withClassName(collectionName) + .run(); + + assertThat(classResult).isNotNull() + .returns(false, Result::hasErrors); + + WeaviateClass createdClass = classResult.getResult(); + assertThat(createdClass).isNotNull() + .extracting(WeaviateClass::getReplicationConfig).isNotNull() + .returns(replicationFactor, ReplicationConfig::getFactor) + .returns(asyncEnabled, ReplicationConfig::getAsyncEnabled) + .returns(ReplicationConfig.DeletionStrategy.DELETE_ON_CONFLICT, + ReplicationConfig::getDeletionStrategy); + } + + private void createArticleWithShardingConfig(String collectionName) { + // START ShardingSettings + // Configure sharding settings + Integer virtualPerPhysical = 128; + Integer desiredCount = 1; + Integer desiredVirtualCount = 128; + + // Create sharding configuration + ShardingConfig shardingConfig = ShardingConfig.builder() + .virtualPerPhysical(virtualPerPhysical) // virtual_per_physical=128 + .desiredCount(desiredCount) // desired_count=1 + .desiredVirtualCount(desiredVirtualCount) // desired_virtual_count=128 + .build(); + + // Create the Article collection with sharding configuration + WeaviateClass articleClass = WeaviateClass.builder() + .className(collectionName) + .description("Article collection with sharding configuration") + .shardingConfig(shardingConfig) // Set the sharding config + .build(); + + // Add the collection to the schema + Result result = client.schema().classCreator() + .withClass(articleClass) + .run(); + // END ShardingSettings + + // Assert the result + assertThat(result).isNotNull() + .withFailMessage(() -> result.getError().toString()) + .returns(false, Result::hasErrors) + .withFailMessage(null) + .returns(true, Result::getResult); + + // Verify the sharding configuration was set correctly + Result classResult = client.schema().classGetter() + .withClassName(collectionName) + .run(); + + assertThat(classResult).isNotNull() + .returns(false, Result::hasErrors); + + WeaviateClass createdClass = classResult.getResult(); + assertThat(createdClass).isNotNull() + .extracting(WeaviateClass::getShardingConfig).isNotNull() + .returns(virtualPerPhysical, ShardingConfig::getVirtualPerPhysical) + .returns(desiredCount, ShardingConfig::getDesiredCount) + .returns(desiredVirtualCount, ShardingConfig::getDesiredVirtualCount); + } + + private void updateArticleConfiguration(String collectionName) { + // START UpdateCollection + // Get existing collection + Result existingResult = client.schema().classGetter() + .withClassName(collectionName) + .run(); + + assertThat(existingResult).isNotNull() + .returns(false, Result::hasErrors); + + WeaviateClass existingClass = existingResult.getResult(); + + // Create updated configurations + InvertedIndexConfig invertedConfig = InvertedIndexConfig.builder() + .bm25(BM25Config.builder().k1(1.5f).build()) + .build(); + + VectorIndexConfig vectorConfig = VectorIndexConfig.builder() + .filterStrategy(VectorIndexConfig.FilterStrategy.ACORN) + .build(); + + ReplicationConfig replicationConfig = ReplicationConfig.builder() + .deletionStrategy(ReplicationConfig.DeletionStrategy.NO_AUTOMATED_RESOLUTION) + .build(); + + // Update collection with new configurations - preserve critical existing configs + WeaviateClass updatedClass = WeaviateClass.builder() + .className(collectionName) + .shardingConfig(existingClass.getShardingConfig()) // Preserve sharding (immutable) + .invertedIndexConfig(invertedConfig) // Update + .vectorIndexConfig(vectorConfig) // Update + .replicationConfig(replicationConfig) // Update + .build(); + + Result updateResult = client.schema().classUpdater() + .withClass(updatedClass) + .run(); + // END UpdateCollection + + // Debug: Print error if update fails + if (updateResult.hasErrors()) { + System.out.println("Update failed with error: " + updateResult.getError()); + } + + assertThat(updateResult).isNotNull() + .withFailMessage(() -> "Update failed: " + updateResult.getError()) + .returns(false, Result::hasErrors) + .returns(true, Result::getResult); + + // Verify updates + Result verifyResult = client.schema().classGetter() + .withClassName(collectionName) + .run(); + + assertThat(verifyResult).isNotNull() + .returns(false, Result::hasErrors); + + WeaviateClass verifyClass = verifyResult.getResult(); + + assertThat(verifyClass.getInvertedIndexConfig().getBm25().getK1()).isEqualTo(1.5f); + assertThat(verifyClass.getVectorIndexConfig().getFilterStrategy()).isEqualTo(VectorIndexConfig.FilterStrategy.ACORN); + assertThat(verifyClass.getReplicationConfig().getDeletionStrategy()).isEqualTo(ReplicationConfig.DeletionStrategy.NO_AUTOMATED_RESOLUTION); + } +} + \ No newline at end of file diff --git a/_includes/code/howto/java/src/test/java/io/weaviate/docs/model_providers/UsageWeaviateTextEmbeddingsArcticEmbedLV20 2.java b/_includes/code/howto/java/src/test/java/io/weaviate/docs/model_providers/UsageWeaviateTextEmbeddingsArcticEmbedLV20 2.java new file mode 100644 index 0000000000..9c8fde4374 --- /dev/null +++ b/_includes/code/howto/java/src/test/java/io/weaviate/docs/model_providers/UsageWeaviateTextEmbeddingsArcticEmbedLV20 2.java @@ -0,0 +1,58 @@ +package io.weaviate.docs.model_providers; + +import io.weaviate.client.Config; +import io.weaviate.client.WeaviateAuthClient; +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.base.Result; +import io.weaviate.client.v1.schema.model.WeaviateClass; + +import java.util.HashMap; +import java.util.Map; + +// Set these environment variables +// WEAVIATE_HOSTNAME Your Weaviate instance hostname +// WEAVIATE_API_KEY Your Weaviate instance API key +// _APIKEY Your Provider API key + +public class UsageWeaviateTextEmbeddingsArcticEmbedLV20 { + public static void main(String[] args) throws Exception { + + String host = System.getenv("WEAVIATE_HOSTNAME"); + String apiKey = System.getenv("WEAVIATE_API_KEY"); + + Config config = new Config("https", host); + + WeaviateClient client = WeaviateAuthClient.apiKey(config, apiKey); + + client.schema().classDeleter().withClassName("DemoCollection").run(); + + // START BasicVectorizerWeaviate // START VectorizerWeaviateCustomModel // START SnowflakeArcticEmbedLV20 + Map text2vecWeaviate = new HashMap<>(); + Map text2vecWeaviateSettings = new HashMap<>(); + + text2vecWeaviateSettings.put("properties", new String[]{"title"}); + // END BasicVectorizerWeaviate // START VectorizerWeaviateCustomModel // START SnowflakeArcticEmbedLV20 + text2vecWeaviateSettings.put("model", new String[]{"Snowflake/snowflake-arctic-embed-l-v2.0"}); + // END BasicVectorizerWeaviate // END VectorizerWeaviateCustomModel // START SnowflakeArcticEmbedLV20 + text2vecWeaviateSettings.put("dimensions", new Integer[]{1024}); // 1024, 256 + text2vecWeaviateSettings.put("base_url", new String[]{""}); + // START BasicVectorizerWeaviate // START VectorizerWeaviateCustomModel // START SnowflakeArcticEmbedLV20 + text2vecWeaviate.put("text2vec-weaviate", text2vecWeaviateSettings); + + // Define the vector configurations + Map vectorConfig = new HashMap<>(); + vectorConfig.put("title_vector", WeaviateClass.VectorConfig.builder() + .vectorIndexType("hnsw") + .vectorizer(text2vecWeaviate) + .build()); + + // Create the collection "DemoCollection" + WeaviateClass clazz = WeaviateClass.builder() + .className("DemoCollection") + .vectorConfig(vectorConfig) + .build(); + + Result result = client.schema().classCreator().withClass(clazz).run(); + // END BasicVectorizerWeaviate // END VectorizerWeaviateCustomModel // END SnowflakeArcticEmbedLV20 + } +} diff --git a/_includes/code/howto/search.bm25.gql 2.py b/_includes/code/howto/search.bm25.gql 2.py new file mode 100644 index 0000000000..094d5eadbf --- /dev/null +++ b/_includes/code/howto/search.bm25.gql 2.py @@ -0,0 +1,75 @@ +# Howto: Hybrid search - Python examples + +# ================================ +# ===== INSTANTIATION-COMMON ===== +# ================================ + +import weaviate +from weaviate.classes.init import Auth +import os + +# Best practice: store your credentials in environment variables +weaviate_url = os.environ["WEAVIATE_URL"] +weaviate_api_key = os.environ["WEAVIATE_API_KEY"] +openai_api_key = os.environ["OPENAI_APIKEY"] + +client = weaviate.connect_to_weaviate_cloud( + cluster_url=weaviate_url, + auth_credentials=Auth.api_key(weaviate_api_key), + headers={ + "X-OpenAI-Api-Key": openai_api_key, + }, +) + +gql_query = """ +# START BM25OperatorOrWithMin +{ + Get { + JeopardyQuestion( + limit: 3 + bm25: { + query: "Australian mammal cute" + # highlight-start + searchOperator: { + operator: Or, + minimumOrTokensMatch: 2 + } + # highlight-end + } + ) { + question + answer + } + } +} +# END BM25OperatorOrWithMin +""" + +gqlresponse = client.graphql_raw_query(gql_query) + +gql_query = """ +# START BM25OperatorAnd +{ + Get { + JeopardyQuestion( + limit: 3 + bm25: { + query: "Australian mammal cute" + # highlight-start + searchOperator: { + operator: And, + } + # highlight-end + } + ) { + question + answer + } + } +} +# END BM25OperatorAnd +""" + +gqlresponse = client.graphql_raw_query(gql_query) + +client.close() diff --git a/_includes/code/howto/search.hybrid.gql 2.py b/_includes/code/howto/search.hybrid.gql 2.py new file mode 100644 index 0000000000..f5b9ab445e --- /dev/null +++ b/_includes/code/howto/search.hybrid.gql 2.py @@ -0,0 +1,75 @@ +# Howto: Hybrid search - Python examples + +# ================================ +# ===== INSTANTIATION-COMMON ===== +# ================================ + +import weaviate +from weaviate.classes.init import Auth +import os + +# Best practice: store your credentials in environment variables +weaviate_url = os.environ["WEAVIATE_URL"] +weaviate_api_key = os.environ["WEAVIATE_API_KEY"] +openai_api_key = os.environ["OPENAI_APIKEY"] + +client = weaviate.connect_to_weaviate_cloud( + cluster_url=weaviate_url, + auth_credentials=Auth.api_key(weaviate_api_key), + headers={ + "X-OpenAI-Api-Key": openai_api_key, + }, +) + +gql_query = """ +# START HybridWithBM25OperatorOrWithMin +{ + Get { + JeopardyQuestion( + limit: 3 + hybrid: { + query: "Australian mammal cute" + # highlight-start + bm25SearchOperator: { + operator: Or, + minimumOrTokensMatch: 2 + } + # highlight-end + } + ) { + question + answer + } + } +} +# END HybridWithBM25OperatorOrWithMin +""" + +gqlresponse = client.graphql_raw_query(gql_query) + +gql_query = """ +# START HybridWithBM25OperatorAnd +{ + Get { + JeopardyQuestion( + limit: 3 + hybrid: { + query: "Australian mammal cute" + # highlight-start + bm25SearchOperator: { + operator: And, + } + # highlight-end + } + ) { + question + answer + } + } +} +# END HybridWithBM25OperatorAnd +""" + +gqlresponse = client.graphql_raw_query(gql_query) + +client.close() diff --git a/_includes/code/python/best-practices.python.ai 2.py b/_includes/code/python/best-practices.python.ai 2.py new file mode 100644 index 0000000000..e57866f345 --- /dev/null +++ b/_includes/code/python/best-practices.python.ai 2.py @@ -0,0 +1,617 @@ +""" +=============================================== +Essential Weaviate Python Client Examples (v4.x) +=============================================== + +This document contains key examples for using the Weaviate Python client (v4.x). + +The page also includes comments that indicate to the user where they may find more information. + +If the provided examples are not sufficient as in-context example for code generation, +indicate to the user in comments where they may find more information. +""" + +# ======================== +# 1. CLIENT INSTANTIATION +# ======================== +# For more information, see the Python Client library page: https://www.weaviate.io/developers/weaviate/client-libraries/python + +""" +Connecting to Weaviate instances (local, cloud, custom) +""" + +import weaviate +from weaviate.classes.init import Auth +import os + +# Connect to local instance +client = weaviate.connect_to_local() + +# Connect with API key authentication and headers +client = weaviate.connect_to_local( + headers={ + "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"] # Add inference API keys as needed + } +) + +# Connect to Weaviate Cloud +client = weaviate.connect_to_weaviate_cloud( + cluster_url=os.environ["WEAVIATE_URL"], + auth_credentials=Auth.api_key(os.environ["WEAVIATE_API_KEY"]), + headers={ + "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"] # Add inference API keys as needed + } +) + +# Custom connection (more control) +client = weaviate.connect_to_custom( + http_host="localhost", + http_port=8080, + http_secure=False, + grpc_host="localhost", + grpc_port=50051, + grpc_secure=False, + headers={ + "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"] # Add inference API keys as needed + } +) + +# Using context manager for automatic connection closing +with weaviate.connect_to_local() as client: + # Client operations go here... + pass # Connection is closed automatically + +# Using try/finally for connection handling +client = weaviate.connect_to_local() +try: + # Client operations go here... + pass +finally: + client.close() # Ensure connection is closed + + +# ========================= +# 2. COLLECTION MANAGEMENT +# ========================= +# For more information, see the How-to Manage Collections page: https://weaviate.io/developers/weaviate/manage-data/collections + +""" +Creating, configuring, and managing collections +""" + +from weaviate.classes.config import Configure, Property, DataType + +# Basic collection creation +client.collections.create("Article") + +# Collection with properties +client.collections.create( + "Article", + properties=[ + Property(name="title", data_type=DataType.TEXT), + Property(name="body", data_type=DataType.TEXT), + ] +) + +# Collection with vectorizer +client.collections.create( + "Article", + vectorizer_config=Configure.Vectorizer.text2vec_openai(), + properties=[ + Property(name="title", data_type=DataType.TEXT), + Property(name="body", data_type=DataType.TEXT), + Property(name="categories", data_type=DataType.TEXT_ARRAY), + Property(name="is_published", data_type=DataType.BOOL), + Property(name="word_count", data_type=DataType.INT), + ] +) + +# Collection with named vectors +client.collections.create( + "ArticleNV", + vectorizer_config=[ + Configure.NamedVectors.text2vec_openai( + name="title", + source_properties=["title"] + ), + Configure.NamedVectors.text2vec_openai( + name="title_body", + source_properties=["title", "body"] + ), + # For user-provided vectors + Configure.NamedVectors.none(name="custom_vector") + ], + properties=[ + Property(name="title", data_type=DataType.TEXT), + Property(name="body", data_type=DataType.TEXT), + ] +) + +# Collection with generative module +client.collections.create( + "Article", + vectorizer_config=Configure.Vectorizer.text2vec_openai(), + generative_config=Configure.Generative.openai( + model="gpt-4" # Optional specific model + ), + properties=[ + Property(name="title", data_type=DataType.TEXT), + Property(name="body", data_type=DataType.TEXT), + ] +) + +# Collection with references (cross-references) +client.collections.create( + "Author", + properties=[ + Property(name="name", data_type=DataType.TEXT), + Property(name="birthday", data_type=DataType.DATE), + Property(name="height_m", data_type=DataType.NUMBER), + ], + references=[ + weaviate.classes.config.ReferenceProperty( + name="wroteArticle", + target_collection="Article" + ) + ] +) + +# Get a collection +collection = client.collections.get("Article") + +# Check if collection exists +exists = client.collections.exists("Article") + +# List all collections +collections = client.collections.list_all() + +# Update a collection +from weaviate.classes.config import Reconfigure + +collection = client.collections.get("Article") +collection.config.update( + inverted_index_config=Reconfigure.inverted_index( + bm25_k1=1.5 + ) +) + +# Add a property to an existing collection +collection.config.add_property( + Property(name="publication_date", data_type=DataType.DATE) +) + +# Delete a collection +client.collections.delete("Article") + + +# ======================== +# 3. DATA OPERATIONS +# ======================== +# For more information, see the How-to Manage Data pages: https://weaviate.io/developers/weaviate/manage-data + +""" +Creating, updating, and retrieving objects +""" + +# Insert a single object +collection = client.collections.get("Article") +uuid = collection.data.insert({ + "title": "My first article", + "body": "This is the body of my first article.", +}) + +# Insert with a specific UUID +from weaviate.util import generate_uuid5 + +properties = { + "title": "My second article", + "body": "This is the body of my second article." +} +uuid = collection.data.insert( + properties=properties, + uuid=generate_uuid5(properties) # Generate a deterministic ID +) + +# Insert with a custom vector +collection.data.insert( + properties={ + "title": "Article with custom vector", + "body": "This article has a custom vector." + }, + vector=[0.1, 0.2, 0.3, 0.4, 0.5] # Your vector values +) + +# Insert with named vectors +collection = client.collections.get("ArticleNV") +collection.data.insert( + properties={ + "title": "Named vector article", + "body": "This article uses named vectors." + }, + vector={ + "title": [0.1, 0.2, 0.3, 0.4, 0.5], # Vector for title + "title_body": [0.5, 0.4, 0.3, 0.2, 0.1] # Vector for title_body + } +) + +# Fetch an object by ID +obj = collection.query.fetch_object_by_id(uuid) +print(obj.properties) + +# Fetch objects with vectors +obj = collection.query.fetch_object_by_id(uuid, include_vector=True) +print(obj.vector) # Access the vector + +# Update an object +collection.data.update( + uuid=uuid, + properties={ + "title": "Updated title" + } +) + +# Replace an object (replaces all properties) +collection.data.replace( + uuid=uuid, + properties={ + "title": "Completely new title", + "body": "Completely new body" + } +) + +# Delete an object +collection.data.delete_by_id(uuid) + +# Working with references +article_uuid = collection.data.insert({"title": "Referenced Article"}) +author_collection = client.collections.get("Author") +author_uuid = author_collection.data.insert({"name": "John Doe"}) + +# Add a reference +author_collection.data.reference_add( + from_uuid=author_uuid, + from_property="wroteArticle", + to=article_uuid +) + + +# ======================== +# 4. BATCH OPERATIONS +# ======================== +# For more information, see the How-to batch import data page: https://weaviate.io/developers/weaviate/manage-data/import +# And the Python Client library page: https://weaviate.io/developers/weaviate/client-libraries/python#batch-imports + +""" +Batch import for better performance +""" + +# Fixed size batch (Recommended option) +collection = client.collections.get("Article") +with collection.batch.fixed_size(batch_size=50) as batch: + for i in range(100): + batch.add_object( + properties={ + "title": f"Article {i}", + "body": f"This is article {i}" + } + ) + +# Dynamic batch (adapts to Weaviate load) +with collection.batch.dynamic() as batch: + for i in range(100): + batch.add_object( + properties={ + "title": f"Article {i}", + "body": f"This is article {i}" + } + ) + +# Rate limited batch +with collection.batch.rate_limit(requests_per_minute=600) as batch: + for i in range(100): + batch.add_object( + properties={ + "title": f"Article {i}", + "body": f"This is article {i}" + } + ) + +# Batch with error handling +with collection.batch.fixed_size(batch_size=50) as batch: + for i in range(100): + batch.add_object( + properties={ + "title": f"Article {i}", + "body": f"This is article {i}" + } + ) + if batch.number_errors > 10: + print("Too many errors, stopping batch") + break + +# Get failed objects after batch completes +failed_objects = collection.batch.failed_objects +if failed_objects: + print(f"Number of failed objects: {len(failed_objects)}") + +# Insert many items at once +from weaviate.classes.data import DataObject + +data_objects = [ + DataObject( + properties={"title": f"Article {i}", "body": f"Body {i}"}, + vector=[0.1] * 5 # Optional vector + ) + for i in range(10) +] + +collection.data.insert_many(data_objects) + + +# ======================== +# 5. SEARCH OPERATIONS +# ======================== +# For more information, see the How-to search pages: https://weaviate.io/developers/weaviate/search + +""" +Various search methods (semantic, keyword, hybrid) +""" + +# Basic search (fetch objects) +collection = client.collections.get("Article") +response = collection.query.fetch_objects( + limit=10, + return_properties=["title", "body"] +) + +for obj in response.objects: + print(obj.properties) + +# Semantic search with near_text +response = collection.query.near_text( + query="artificial intelligence applications", + limit=5 +) + +# Search based on vector +vector = [0.1, 0.2, 0.3, 0.4, 0.5] # Your vector here +response = collection.query.near_vector( + near_vector=vector, + limit=5 +) + +# Search based on existing object +response = collection.query.near_object( + near_object="36ddd591-2dee-4e7e-a3cc-eb86d30a4303", # UUID of reference object + limit=5 +) + +# BM25 keyword search +response = collection.query.bm25( + query="artificial intelligence", + query_properties=["title", "body"], + limit=5 +) + +# Hybrid search (combines semantic and keyword) +from weaviate.classes.query import HybridFusion + +response = collection.query.hybrid( + query="artificial intelligence", + alpha=0.5, # Balance between keyword and vector search + fusion_type=HybridFusion.RELATIVE_SCORE, + limit=5 +) + +# Search with filters +from weaviate.classes.query import Filter + +response = collection.query.near_text( + query="artificial intelligence", + filters=Filter.by_property("title").like("*AI*"), + limit=5 +) + +# Complex filtering +response = collection.query.near_text( + query="artificial intelligence", + filters=( + Filter.by_property("title").like("*AI*") & + (Filter.by_property("body").like("*research*") | + Filter.by_property("body").like("*innovation*")) + ), + limit=5 +) + +# Search with groupBy +from weaviate.classes.query import GroupBy + +response = collection.query.near_text( + query="artificial intelligence", + group_by=GroupBy( + prop="category", + objects_per_group=2, + number_of_groups=3 + ), + limit=10 +) + +# For grouped results +for group_name, group_data in response.groups.items(): + print(f"Group: {group_name}, Objects: {group_data.number_of_objects}") + for obj in group_data.objects: + print(obj.properties) + +# Getting metadata with search +from weaviate.classes.query import MetadataQuery + +response = collection.query.near_text( + query="artificial intelligence", + return_metadata=MetadataQuery( + distance=True, # Vector distance + score=True, # Relevance score + creation_time=True # When the object was created + ), + limit=5 +) + +for obj in response.objects: + print(obj.properties) + print(f"Distance: {obj.metadata.distance}") + print(f"Score: {obj.metadata.score}") + print(f"Created: {obj.metadata.creation_time}") + + +# =============================== +# 6. GENERATIVE CAPABILITIES +# =============================== +# For more information, see the How-to generative search page: https://weaviate.io/developers/weaviate/search/generative + +""" +Using generative models with Weaviate +""" + +# Basic generation +collection = client.collections.get("Article") +response = collection.generate.near_text( + query="artificial intelligence", + single_prompt="Summarize this article in one sentence: {title} - {body}", + limit=3 +) + +for obj in response.objects: + print(obj.properties) + print(f"Generated: {obj.generative.text}") + +# Grouped generation +response = collection.generate.near_text( + query="artificial intelligence", + grouped_task="Compare and contrast these AI articles", + limit=3 +) + +print(f"Grouped response: {response.generative.text}") + +# Generation with custom provider +from weaviate.classes.generate import GenerativeConfig + +response = collection.generate.near_text( + query="artificial intelligence", + single_prompt="Summarize this article: {title}", + generative_provider=GenerativeConfig.openai( + model="gpt-4", + temperature=0.7 + ), + limit=3 +) + +# Generation with parameters +from weaviate.classes.generate import GenerativeParameters + +prompt = GenerativeParameters.single_prompt( + prompt="Summarize this article: {title}", + metadata=True, # Include metadata in response + debug=True # Include debug info +) + +response = collection.generate.near_text( + query="artificial intelligence", + single_prompt=prompt, + limit=3 +) + +for obj in response.objects: + print(f"Generated: {obj.generative.text}") + print(f"Metadata: {obj.generative.metadata}") + print(f"Debug: {obj.generative.debug}") + + +# ================================= +# 7. MULTI-TENANCY OPERATIONS +# ================================= +# For more information, see the How-to multi-tenancy page: https://weaviate.io/developers/weaviate/manage-data/multi-tenancy +# And the manage tenant data and temperatures page: https://weaviate.io/developers/weaviate/manage-data/tenant-states + +""" +Working with multi-tenant collections +""" + +# Create a multi-tenant collection +client.collections.create( + "MultiTenantArticle", + multi_tenancy_config=Configure.multi_tenancy(enabled=True), + properties=[ + Property(name="title", data_type=DataType.TEXT), + Property(name="body", data_type=DataType.TEXT), + ], + vectorizer_config=Configure.Vectorizer.text2vec_openai() +) + +mt_collection = client.collections.get("MultiTenantArticle") + +# Add tenants +from weaviate.classes.tenants import Tenant + +mt_collection.tenants.create( + tenants=[ + Tenant(name="tenant1"), + Tenant(name="tenant2") + ] +) + +# Get all tenants +tenants = mt_collection.tenants.get() + +# Get specific tenant +tenant = mt_collection.tenants.get_by_name("tenant1") + +# Use a specific tenant +tenant1_collection = mt_collection.with_tenant("tenant1") + +# Add data to a specific tenant +tenant1_collection.data.insert({ + "title": "Tenant 1 Article", + "body": "This belongs to tenant 1" +}) + +# Search within a specific tenant +response = tenant1_collection.query.near_text( + query="article", + limit=5 +) + + +# ======================== +# 8. ITERATING OVER DATA +# ======================== +# For more information, see the iterator section of the Python Client library page: https://weaviate.io/developers/weaviate/client-libraries/python#collection-iterator-cursor-api + +""" +Iterating over large datasets +""" + +# Basic iteration +collection = client.collections.get("Article") +for article in collection.iterator(): + print(article.properties) + +# Iteration with specific properties +for article in collection.iterator(return_properties=["title"]): + print(article.properties["title"]) + +# Iteration with metadata +from weaviate.classes.query import MetadataQuery + +for article in collection.iterator( + return_metadata=MetadataQuery(creation_time=True) +): + print(article.properties) + print(article.metadata.creation_time) + + +# ======================== +# 9. CLEANUP +# ======================== + +# Don't forget to close the client when done +client.close() diff --git a/_includes/code/python/howto.configure.rbac.oidc.users 2.py b/_includes/code/python/howto.configure.rbac.oidc.users 2.py new file mode 100644 index 0000000000..03fcf75a2e --- /dev/null +++ b/_includes/code/python/howto.configure.rbac.oidc.users 2.py @@ -0,0 +1,51 @@ +# TODO[g-despot]: OIDC testing not yet implemented +from weaviate.classes.rbac import Permissions + +# START AdminClient +import weaviate +from weaviate.classes.init import Auth + +# Connect to Weaviate as root user +client = weaviate.connect_to_local( + # END AdminClient + # Use custom port defined in tests/docker-compose-rbac.yml (without showing the user) + port=8580, + grpc_port=50551, + # START AdminClient + auth_credentials=Auth.api_key("root-user-key"), +) +# END AdminClient + +from weaviate.classes.rbac import Permissions + +permissions = [ + Permissions.collections( + collection="TargetCollection*", read_config=True, create_collection=True + ), + Permissions.data(collection="TargetCollection*", read=True, create=True), +] + +client.roles.delete(role_name="testRole") +client.roles.create(role_name="testRole", permissions=permissions) + +# START AssignOidcUserRole +client.users.oidc.assign_roles(user_id="custom-user", role_names=["testRole", "viewer"]) +# END AssignOidcUserRole +assert "testRole" in client.users.oidc.get_assigned_roles("custom-user") +assert "viewer" in client.users.oidc.get_assigned_roles("custom-user") + +# START ListOidcUserRoles +user_roles = client.users.oidc.get_assigned_roles("custom-user") + +for role in user_roles: + print(role) +# END ListOidcUserRoles +assert "testRole" in user_roles +assert "viewer" in user_roles + +# START RevokeOidcUserRoles +client.users.oidc.revoke_roles(user_id="custom-user", role_names="testRole") +# END RevokeOidcUserRoles +assert "testRole" not in client.users.oidc.get_assigned_roles("custom-user") + +client.close() diff --git a/_includes/code/python/howto.configure.rbac.users 2.py b/_includes/code/python/howto.configure.rbac.users 2.py new file mode 100644 index 0000000000..8c4d4d6b11 --- /dev/null +++ b/_includes/code/python/howto.configure.rbac.users 2.py @@ -0,0 +1,75 @@ +from weaviate.classes.rbac import Permissions + +# START AdminClient +import weaviate +from weaviate.classes.init import Auth + +# Connect to Weaviate as root user +client = weaviate.connect_to_local( + # END AdminClient + # Use custom port defined in tests/docker-compose-rbac.yml (without showing the user) + port=8580, + grpc_port=50551, + # START AdminClient + auth_credentials=Auth.api_key("root-user-key"), +) +# END AdminClient + +user_api_key = client.users.db.delete(user_id="custom-user") + +# START CreateUser +user_api_key = client.users.db.create(user_id="custom-user") +print(user_api_key) +# END CreateUser +assert len(user_api_key) > 0 + +# START RotateApiKey +new_api_key = client.users.db.rotate_key(user_id="custom-user") +print(new_api_key) +# END RotateApiKey +assert len(new_api_key) > 0 and new_api_key != user_api_key + +from weaviate.classes.rbac import Permissions + +permissions = [ + Permissions.collections( + collection="TargetCollection*", read_config=True, create_collection=True + ), + Permissions.data(collection="TargetCollection*", read=True, create=True), +] + +client.roles.delete(role_name="testRole") +client.roles.create(role_name="testRole", permissions=permissions) + +# START AssignRole +client.users.db.assign_roles(user_id="custom-user", role_names=["testRole", "viewer"]) +# END AssignRole +assert "testRole" in client.users.db.get_assigned_roles("custom-user") +assert "viewer" in client.users.db.get_assigned_roles("custom-user") + +# START ListAllUsers +print(client.users.db.list_all()) +# END ListAllUsers + +# START ListUserRoles +user_roles = client.users.db.get_assigned_roles("custom-user") + +for role in user_roles: + print(role) +# END ListUserRoles +assert "testRole" in user_roles +assert "viewer" in user_roles + +# START RevokeRoles +client.users.db.revoke_roles(user_id="custom-user", role_names="testRole") +# END RevokeRoles +assert "testRole" not in client.users.db.get_assigned_roles("custom-user") + +# START DeleteUser +client.users.db.delete(user_id="custom-user") +# END DeleteUser +assert all( + user.user_id != "custom-user" for user in client.users.db.list_all() +), "custom-user not deleted" + +client.close() diff --git a/_includes/code/typescript/howto.configure.rbac.oidc.users 2.ts b/_includes/code/typescript/howto.configure.rbac.oidc.users 2.ts new file mode 100644 index 0000000000..2dbfc5338c --- /dev/null +++ b/_includes/code/typescript/howto.configure.rbac.oidc.users 2.ts @@ -0,0 +1,68 @@ +// TODO[g-despot]: OIDC testing not yet implemented +import assert from 'assert' + +// START AdminClient +import weaviate, { type WeaviateClient } from 'weaviate-client' + +// Connect to Weaviate as root user +const client: WeaviateClient = await weaviate.connectToLocal({ + // END AdminClient + // Use custom port defined in tests/docker-compose-rbac.yml (without showing the user) + port: 8580, + grpcPort: 50551, + // START AdminClient + authCredentials: new weaviate.ApiKey("root-user-key") +}) +// END AdminClient + +const { permissions } = weaviate + +const collectionPermissions = [ + permissions.collections({ + collection: "TargetCollection*", + read_config: true, + create_collection: true + }), + permissions.data({ + collection: "TargetCollection*", + read: true, + create: true} + ), +] + +await client.roles.delete("testRole") +await client.roles.create("testRole", collectionPermissions) + +// START AssignOidcUserRole +await client.users.oidc.assignRoles(["testRole", "viewer"], "custom-user",) +// END AssignOidcUserRole +assert.equal((Object.keys(await client.users.getAssignedRoles("custom-user")).some( + role => role == "testRole" +)), true) + +assert.equal((Object.keys(await client.users.getAssignedRoles("custom-user")).some( + role => role == "viewer" +)), true) + +// START ListOidcUserRoles +const userRoles = await client.users.oidc.getAssignedRoles("custom-user") + +for (const [role, value] of Object.entries(userRoles)) { + console.log(role) +} +// END ListOidcUserRoles +assert.equal((Object.keys(await client.users.db.getAssignedRoles("custom-user")).some( + role => role == "testRole" +)), true) +assert.equal((Object.keys(await client.users.db.getAssignedRoles("custom-user")).some( + role => role == "viewer" +)), true) + +// START RevokeOidcUserRoles +await client.users.oidc.revokeRoles("testRole","custom-user") +// END RevokeOidcUserRoles +assert.equal((Object.keys(await client.users.db.getAssignedRoles("custom-user")).some( + role => role == "testRole" +)), false) + +client.close() diff --git a/_includes/code/typescript/howto.configure.rbac.permissions 2.ts b/_includes/code/typescript/howto.configure.rbac.permissions 2.ts new file mode 100644 index 0000000000..8a55a60c6c --- /dev/null +++ b/_includes/code/typescript/howto.configure.rbac.permissions 2.ts @@ -0,0 +1,157 @@ +import weaviate, { WeaviateClient } from 'weaviate-client' +import assert from 'assert' + +const client: WeaviateClient = await weaviate.connectToLocal({ + // Use custom port defined in tests/docker-compose-rbac.yml (without showing the user) + port: 8580, + grpcPort: 50551, + authCredentials: new weaviate.ApiKey("user-a-key") +}) + + +async function resetUser(user: string, client: WeaviateClient) { + // Clean slate + const currentRoles = await client.users.getAssignedRoles(user) // check if user exists + for await (const [role, value] of Object.entries(currentRoles)) { + await client.users.revokeRoles(role, user) // revoke all roles + } +} +// ================================================================= +// =============== EXAMPLE: READ + WRITE PERMISSIONS +// ================================================================= + +// Clean slate +resetUser("user-b", client) +await client.roles.delete("rw_role") // delete if exists + +// START ReadWritePermissionDefinition // START MTPermissionsExample +const { permissions } = weaviate +// END ReadWritePermissionDefinition // END MTPermissionsExample + +// START ReadWritePermissionDefinition + +// Define permissions (example confers read+write rights to collections starting with "TargetCollection") +const allPermissions = [ + // Collection level permissions + permissions.collections({ + collection: "TargetCollection*", + create_collection: true, // Allow creating new collections + read_config: true, // Allow reading collection info/metadata + update_config: true, // Allow updating collection configuration, i.e. update schema properties, when inserting data with new properties + delete_collection: true, // Allow deleting collections + } ), + // Collection data level permissions + permissions.data({ + collection: "TargetCollection*", + create: true, // Allow data inserts + read: true, // Allow query and fetch operations + update: true, // Allow data updates + delete: false, // Allow data deletes + }), + permissions.backup({ + collection:"TargetCollection*", + manage: true + }), + permissions.nodes.verbose({ + collection: "TargetCollection*", + read: true + }), + permissions.cluster({ + read: true + }), +] + +// Create a new role +await client.roles.create("rw_role", allPermissions) +// END ReadWritePermissionDefinition + +// START ReadWritePermissionAssignment +// Assign the role to a user +await client.users.assignRoles(["rw_role"], "user-b",) +// END ReadWritePermissionAssignment + +// ===== TEST ===== basic checks to see if the role was created +const userPermissions = await client.users.getAssignedRoles("user-b") + +assert.equal("rw_role", Object.keys(userPermissions)) +assert.equal(userPermissions["rw_role"].collectionsPermissions[0].collection, "TargetCollection*") +assert.equal(userPermissions["rw_role"].name, "rw_role") + +// ================================================================= +// =============== EXAMPLE: VIEWER PERMISSIONS +// ================================================================= + +// Clean slate +await client.roles.delete("viewer_role") // delete if exists + +// START ViewerPermissionDefinition + +// Define permissions (example confers viewer rights to collections starting with "TargetCollection") +const newPermissions = [ + permissions.collections({ + collection: "TargetCollection*", + read_config: true, + }), + permissions.data({ + collection: "TargetCollection*", + read: true}), +] + +// Create a new role +await client.roles.create("viewer_role", newPermissions) +// END ViewerPermissionDefinition + +// START ViewerPermissionAssignment +// Assign the role to a user +await client.users.assignRoles("user-b", "viewer_role") +// END ViewerPermissionAssignment + +// ================================================================= +// =============== EXAMPLE: VIEWER PERMISSIONS +// ================================================================= + +// Clean slate +client.roles.delete("tenant_manager") + +// START MTPermissionsExample + +const tenantPermissions = [ + permissions.tenants({ + collection: "TargetCollection*", // Applies to all collections starting with "TargetCollection" + tenant: "TargetTenant*", // Applies to all tenants starting with "TargetTenant" + create: true, // Allow creating new tenants + read: true, // Allow reading tenant info/metadata + update: true, // Allow updating tenant states + delete: true, // Allow deleting tenants + }), + permissions.data({ + collection: "TargetCollection*", // Applies to all collections starting with "TargetCollection" + tenant: "TargetTenant*", // Applies to all tenants starting with "TargetTenant" + create: true, // Allow data inserts + read: true, // Allow query and fetch operations + update: true, // Allow data updates + delete: true, // Allow data deletes + }) +] + +// Create a new role +await client.roles.create("tenant_manager", tenantPermissions) +// END MTPermissionsExample +// START MTPermissionsAssignment +// Assign the role to a user +client.users.assignRoles("user-b", "tenant_manager") +// END MTPermissionsAssignment + +// ===== TEST ===== basic checks to see if the role was created +const testUserPermissions = await client.users.getAssignedRoles("user-b") + +assert.equal((Object.keys(await client.users.getAssignedRoles("user-b")).some( + role => role == "viewer_role" +)), true) +assert.equal( + testUserPermissions["viewer_role"].collectionsPermissions[0].collection + ,"TargetCollection*" +) +assert.equal(testUserPermissions["viewer_role"].name, "viewer_role") + +client.close() diff --git a/_includes/code/typescript/howto.configure.rbac.roles 2.ts b/_includes/code/typescript/howto.configure.rbac.roles 2.ts new file mode 100644 index 0000000000..57036af053 --- /dev/null +++ b/_includes/code/typescript/howto.configure.rbac.roles 2.ts @@ -0,0 +1,339 @@ +import assert from "assert" +// START AdminClient +import weaviate, { WeaviateClient } from 'weaviate-client' + +// Connect to Weaviate as root user +const client: WeaviateClient = await weaviate.connectToLocal({ + // END AdminClient + // Use custom port defined in tests/docker-compose-rbac.yml (without showing the user) + port: 8580, + grpcPort: 50551, + // START AdminClient + authCredentials: new weaviate.ApiKey("user-a-key") +}) +// END AdminClient + +// TODO: Remove if not used +const customUserClient: WeaviateClient = await weaviate.connectToLocal({ + port: 8580, + grpcPort: 50551, + authCredentials: new weaviate.ApiKey("user-b-key") +}) + +const allRolesCheck = await client.roles.listAll() + +for await (const [key, value] of Object.entries(allRolesCheck)) { + if (!["viewer", "root", "admin"].includes(key)) { + await client.roles.delete(key) + } +} + +// Todo: This will be added in upcoming release +// START CreateRole +await client.roles.create("testRole") +// END CreateRole + +// START AddClusterPermission // START AddManageRolesPermission // START AddCollectionsPermission // START AddTenantPermission // START AddDataObjectPermission // START AddBackupPermission // START AddNodesPermission // START AddRoles // START RemovePermissions +const { permissions } = weaviate +// END AddClusterPermission // END AddManageRolesPermission // END AddCollectionsPermission // END AddTenantPermission // END AddDataObjectPermission // END AddBackupPermission // END AddNodesPermission // END AddRoles // END RemovePermissions + +// todo add scope when tommy adds it +// START AddManageRolesPermission + +const rolePermission = permissions.roles({ + role: "testRole", + create: true, + read: true, + update: true, + delete: true, +}) + +await client.roles.create("testRole", rolePermission) +// END AddManageRolesPermission + +assert(Object.keys(await client.roles.listAll()).includes('testRole')); + +await client.roles.delete("testRole") + +// START AddManageUsersPermission + +const userPermission = permissions.users({ + user: "testRole", // Applies to all users starting with "testUser" + assignAndRevoke: true, // Allow assigning and revoking roles to and from users + read: true, // Allow reading user info +}) + +await client.roles.create("testRole", userPermission) +// END AddManageUsersPermission + +assert(Object.keys(await client.roles.listAll()).includes('testRole')); + +await client.roles.delete("testRole") + +// START AddCollectionsPermission + +const collectionPermissions = [ + permissions.collections({ + collection: "TargetCollection*", // Applies to all collections starting with "TargetCollection" + create_collection: true, // Allow creating new collections + read_config: true, // Allow reading collection info/metadata + update_config: true, // Allow updating collection configuration, i.e. update schema properties, when inserting data with new properties + delete_collection: true, // Allow deleting collections + }), +] + +await client.roles.create("testRole", collectionPermissions) + +// END AddCollectionsPermission +const getCollectionPermissions = await client.roles.byName("testRole") + +if (getCollectionPermissions) { + assert.equal((getCollectionPermissions.dataPermissions.some( + permission => permission.collection == "TargetCollection*" + )), true) +} + +await client.roles.delete("testRole") + +// START AddTenantPermission + +const AddTenantPermissions = [ + permissions.tenants({ + collection: "TargetCollection*", // Applies to all collections starting with "TargetCollection" + tenant: "TargetTenant*", // Applies to all tenants starting with "TargetTenant" + create: true, // Allow creating new tenants + read: true, // Allow reading tenant info/metadata + update: true, // Allow updating tenant states + delete: true, // Allow deleting tenants + }), +] + +await client.roles.create("testRole", AddTenantPermissions) +// END AddTenantPermission +const getTenantCollection = await client.roles.byName("testRole") + +if (getTenantCollection) { + assert.equal((getTenantCollection.dataPermissions.some( + permission => permission.collection == "TargetCollection*" + )), true) +} + +client.roles.delete("testRole") + +// START AddDataObjectPermission + +const dataPermissions = [ + permissions.data({ + collection: "TargetCollection*", // Applies to all collections starting with "TargetCollection" + tenant: "TargetTenant*", // Applies to all tenants starting with "TargetTenant" + create: true, // Allow data inserts + read: true, // Allow query and fetch operations + update: true, // Allow data updates + delete: false, // Allow data deletes + }), +] + +await client.roles.create("testRole", dataPermissions) + +// END AddDataObjectPermission +const getDataPermissions = await client.roles.byName("testRole") + +if (getDataPermissions) { + assert.equal((getDataPermissions.dataPermissions.some( + permission => permission.collection == "TargetCollection*" + )), true) +} + +await client.roles.delete("testRole") + +// START AddBackupPermission + +const backupsPermissions = [ + permissions.backup({ + collection: "TargetCollection*", // Applies to all collections starting with "TargetCollection" + manage: true, // Allow managing backups + }), +] + +await client.roles.create("testRole", backupsPermissions) +// END AddBackupPermission + +const getBackupsPermissions = await client.roles.byName("testRole") + +if (getBackupsPermissions) { + assert.equal((getBackupsPermissions.dataPermissions.some( + permission => permission.collection == "TargetCollection*" + )), true) +} + +await client.roles.delete("testRole") + +// START AddClusterPermission + +const clusterPermissions = [ + permissions.cluster({ + read: true // Allow reading cluster data + }), +] + +await client.roles.create("testRole", clusterPermissions) +// END AddClusterPermission + +const getClusterPermissions = await client.roles.byName("testRole") +// assert permissions.cluster_permissions + +await client.roles.delete("testRole") + +// START AddNodesPermission + +const verboseNodePermissions = [ + permissions.nodes.verbose({ + collection: "TargetCollection*", // Applies to all collections starting with "TargetCollection" + read: true, // Allow reading node metadata + }), +] + +// The `minimal` verbosity level applies to all collections unlike +// the `verbose` level where you specify the collection name filter +const minimalNodePermissions = [ + permissions.nodes.minimal({ + read: true, // Allow reading node metadata + }), +] + +await client.roles.create("testRole", verboseNodePermissions) // or `minimalNodePermissions` +// END AddNodesPermission + +const getNodePermissions = await client.roles.byName("testRole") + +if (getNodePermissions) { + assert.equal((getNodePermissions.dataPermissions.some( + permission => permission.collection == "TargetCollection*" + )), true) +} + + +await client.roles.delete("testRole") + +// This is to add additional permission to below +const dummyPermission = [ + permissions.collections({ + collection: "TargetCollection*", + read_config: true, + }), +] + +await client.roles.create("testRole", dummyPermission) + +// START AddRoles + +const additionalDataPermissions = [ + permissions.data({ + collection: "TargetCollection*", + read: true, + create: true + }), + permissions.data({ + collection: "TargetCollection*", + read: true, + create: false + }), +] + +client.roles.addPermissions("testRole", additionalDataPermissions) +// END AddRoles + +// START AssignRole +await client.users.assignRoles(["testRole", "viewer"], "user-b") +// END AssignRole +assert.equal((Object.keys(await client.users.getAssignedRoles("user-b")).some( + role => role == "viewer*" +)), true) + +assert.equal((Object.keys(await client.users.getAssignedRoles("user-b")).some( + role => role == "testRole" +)), true) + +// START ListCurrentUserRoles +console.log(await client.users.getMyUser()) +// END ListCurrentUserRoles + +// START ListUserRoles +const userRoles = await client.users.getAssignedRoles("user-b") + +for (const [role, value] of Object.entries(userRoles)) { + console.log(role) +} +// END ListUserRoles + +assert.equal((userRoles["testRole"].collectionsPermissions.some( + permission => permission.collection == "TargetCollection*" +)), true) + +assert.equal((userRoles["testRole"].dataPermissions.some( + permission => permission.collection == "TargetCollection*" +)), true) + +// START CheckRoleExists +console.log(await client.roles.exists("testRole")) // Returns true or false +// END CheckRoleExists + +// START InspectRole +const testRole = await client.roles.byName("testRole") + +console.log(testRole) +console.log(testRole?.collectionsPermissions) +console.log(testRole?.dataPermissions) +// END InspectRole + +// START AssignedUsers +const assignedUsers = await client.roles.userAssignments("testRole") + +for (const users of assignedUsers) { + console.log(users) +} +// END AssignedUsers +assert.equal(assignedUsers.some( + role => role.id == "custom-user" +), true) + +// START ListAllRoles +const allRoles = await client.roles.listAll() + +for (const [key, value] of Object.entries(allRoles)) { + console.log(key) +} +// END ListAllRoles + +// START RemovePermissions + +const permissionsToRemove = [ + permissions.collections({ + collection: "TargetCollection*", + read_config: true, + create_collection: true, + delete_collection: true, + }), + permissions.data({ + collection: "TargetCollection*", + read: true, + create: false + }), +] + +await client.roles.removePermissions("testRole", permissionsToRemove) +// END RemovePermissions + +// START RevokeRoles +await client.users.revokeRoles("user-b", "testRole") +// END RevokeRoles +assert.equal((Object.keys(await client.users.getAssignedRoles("user-b")).some( + role => role == "testRole" +)), false) + +// START DeleteRole +await client.roles.delete("testRole") +// END DeleteRole + +client.close() +customUserClient.close() diff --git a/_includes/code/typescript/howto.configure.rbac.users 2.ts b/_includes/code/typescript/howto.configure.rbac.users 2.ts new file mode 100644 index 0000000000..9242a409d6 --- /dev/null +++ b/_includes/code/typescript/howto.configure.rbac.users 2.ts @@ -0,0 +1,96 @@ +import assert from 'assert' +import weaviate, { type WeaviateClient } from 'weaviate-client' +// START AdminClient + +// Connect to Weaviate as root user +const client: WeaviateClient = await weaviate.connectToLocal({ + // END AdminClient + // Use custom port defined in tests/docker-compose-rbac.yml (without showing the user) + port: 8580, + grpcPort: 50551, + // START AdminClient + authCredentials: new weaviate.ApiKey("root-user-key") +}) +// END AdminClient +// START CreateUser +let userApiKey +// END CreateUser + +userApiKey = await client.users.db.delete("custom-user") + +// START CreateUser +userApiKey = await client.users.db.create("custom-user") +console.log(userApiKey) +// END CreateUser +assert.equal((userApiKey.length > 0), true) + +// START RotateApiKey +let newApiKey +newApiKey = await client.users.db.rotateKey("custom-user") +console.log(newApiKey) +// END RotateApiKey +assert.equal( (newApiKey.length > 0) && (newApiKey != userApiKey), true) + +const { permissions } = weaviate + +const collectionPermissions = [ + permissions.collections({ + collection: "TargetCollection*", + read_config: true, + create_collection: true + }), + permissions.data({ + collection: "TargetCollection*", + read: true, + create: true}), +] + +await client.roles.delete("testRole") +await client.roles.create("testRole", collectionPermissions) + +// START AssignRole +await client.users.db.assignRoles(["testRole", "viewer"], "custom-user") +// END AssignRole + +assert.equal((Object.keys(await client.users.getAssignedRoles("custom-user")).some( + role => role == "testRole" +)), true) + +assert.equal((Object.keys(await client.users.getAssignedRoles("custom-user")).some( + role => role == "viewer" +)), true) + +// START ListAllUsers +console.log(await client.users.db.listAll()) +// END ListAllUsers + +// START ListUserRoles +let userRoles = await client.users.db.getAssignedRoles("custom-user") + +for (const [role, value] of Object.entries(userRoles)) { + console.log(role) +} +// END ListUserRoles +assert.equal((Object.keys(await client.users.db.getAssignedRoles("custom-user")).some( + role => role == "testRole" +)), true) +assert.equal((Object.keys(await client.users.db.getAssignedRoles("custom-user")).some( + role => role == "viewer" +)), true) + +// START RevokeRoles +await client.users.db.revokeRoles("custom-user", "testRole") +// END RevokeRoles +assert.equal((Object.keys(await client.users.db.getAssignedRoles("custom-user")).some( + role => role == "testRole" +)), false) + +// START DeleteUser +await client.users.db.delete("custom-user") +// END DeleteUser +assert( + !(await client.users.db.listAll()).some(user => user.id === "custom-user"), + "custom-user not deleted" + ) + +client.close() diff --git a/_includes/collections-count-limit 2.mdx b/_includes/collections-count-limit 2.mdx new file mode 100644 index 0000000000..94e20f1269 --- /dev/null +++ b/_includes/collections-count-limit 2.mdx @@ -0,0 +1,3 @@ +:::info +To ensure optimal performance, Weaviate **limits the number of collections per instance**. The default limit is `100` collections and it can be adjusted via the [`MAXIMUM_ALLOWED_COLLECTIONS_COUNT`](/developers/weaviate/config-refs/env-vars) environment variable. +::: \ No newline at end of file diff --git a/_includes/configuration/configure-rbac 2.mdx b/_includes/configuration/configure-rbac 2.mdx new file mode 100644 index 0000000000..e709c1b02c --- /dev/null +++ b/_includes/configuration/configure-rbac 2.mdx @@ -0,0 +1,50 @@ +import Link from '@docusaurus/Link'; + +:::tip Follow these general steps to configure RBAC: + +
    +
  1. + + Step 1. Connect to Weaviate with a user possessing{' '} + + + role management permissions + + + . +
  2. +
  3. + + Step 2. Grant permissions to a{' '} + + + new role + + {' '} + or an{' '} + + + existing role + + + . +
  4. +
  5. + + Step 3.{' '} + + + Assign the role to a user + + + . +
  6. +
+ +::: diff --git a/_includes/configuration/dynamic-user-management 2.mdx b/_includes/configuration/dynamic-user-management 2.mdx new file mode 100644 index 0000000000..e564316069 --- /dev/null +++ b/_includes/configuration/dynamic-user-management 2.mdx @@ -0,0 +1,5 @@ +:::tip TIP: User management API available from `v1.30` + +Instead of adding additional users via the `AUTHENTICATION_APIKEY_USERS` environment variable, we suggest using the [user management API](/developers/weaviate/configuration/rbac/manage-users) which you can use to create and delete users, manage their roles and rotate their API keys. + +::: diff --git a/_includes/latest-weaviate-version 2.mdx b/_includes/latest-weaviate-version 2.mdx new file mode 100644 index 0000000000..b32f227abe --- /dev/null +++ b/_includes/latest-weaviate-version 2.mdx @@ -0,0 +1,6 @@ +:::tip TIP: Use the latest Weaviate version! + +When possible, try to use the latest Weaviate version. +New releases include cutting-edge features, performance enhancements, and critical security updates to keep your application safe and up-to-date. + +::: diff --git a/_includes/named-vector-compress 2.mdx b/_includes/named-vector-compress 2.mdx new file mode 100644 index 0000000000..e952ed3e87 --- /dev/null +++ b/_includes/named-vector-compress 2.mdx @@ -0,0 +1,4 @@ +:::info Added in `v1.24` +::: + +Collections can have multiple [named vectors](/developers/weaviate/config-refs/schema/multi-vector). The vectors in a collection can have their own configurations, and compression must be enabled independently for each vector. Every vector is independent and can use [PQ](/developers/weaviate/configuration/compression/pq-compression), [BQ](/developers/weaviate/configuration/compression/bq-compression), [SQ](/developers/weaviate/configuration/compression/sq-compression), or no compression. diff --git a/_includes/runtime-generative 2.mdx b/_includes/runtime-generative 2.mdx new file mode 100644 index 0000000000..eb6fc14d6d --- /dev/null +++ b/_includes/runtime-generative 2.mdx @@ -0,0 +1,5 @@ +:::tip + +You can [override the generative integration settings at query time](/developers/weaviate/search/generative#configure-a-generative-model-provider) without updating it in the collection configuration. + +::: diff --git a/_includes/wcs/create-api-keys 2.mdx b/_includes/wcs/create-api-keys 2.mdx new file mode 100644 index 0000000000..d49fe366da --- /dev/null +++ b/_includes/wcs/create-api-keys 2.mdx @@ -0,0 +1,95 @@ +When connecting to a Weaviate Cloud cluster, you need an API key and the REST endpoint URL for authentication. + +If you don't have an existing API key, you'll need to create one. Follow these steps to find the API keys section and create a new key if necessary: + +import Link from '@docusaurus/Link'; +import Register from '/developers/wcs/img/weaviate-cloud-api-key-create.png'; + +
+
+
    +
  1. + Open the{' '} + Weaviate Cloud console{' '} + and{' '} + + select your cluster + + . +
  2. +
  3. + Navigate to the API Keys section, found in the{' '} + Cluster details panel. +
  4. +
  5. + If you need a new API key, click the Create API key{' '} + button (1 in the image below). +
  6. +
+
+
+
+
+ Navigate to the API Keys section +
+
Navigate to the API Keys section.
+
+
+
+
+ +import NewAPIKeyForm from '/developers/wcs/img/weaviate-cloud-api-key-create-form.png'; +import SaveAPIKey from '/developers/wcs/img/weaviate-cloud-api-key-save.png'; + +
+
+
    +
  1. + In the Create API Key form, provide a descriptive name for + your key (1). +
  2. +
  3. + Choose the role for this API key (2). You + can either select an existing role like admin or{' '} + viewer, or{' '} + create a new role with + specific permissions. +
  4. +
  5. + Click the Create button (3). +
  6. +
+
+
+
+
+ Create a new API key +
+
Create a new API key.
+
+
+
+
+ +
+
+
    +
  1. + Important: This is the only time your API key will be + displayed. Make sure to copy it (1) or + download it (2) and store it in a secure + location immediately after creation. You will not be able to retrieve + the full key again. +
  2. +
+
+
+
+
+ Save your API key +
+
Save your API key.
+
+
+
+
diff --git a/_includes/wcs/hostname-warning 2.mdx b/_includes/wcs/hostname-warning 2.mdx new file mode 100644 index 0000000000..788c19d774 --- /dev/null +++ b/_includes/wcs/hostname-warning 2.mdx @@ -0,0 +1,3 @@ +:::caution +This client uses the `hostname` parameter (without the `https` scheme) instead of a complete `URL`. +::: \ No newline at end of file diff --git a/_includes/wcs/retrieve-rest-endpoint 2.mdx b/_includes/wcs/retrieve-rest-endpoint 2.mdx new file mode 100644 index 0000000000..b9018dd43e --- /dev/null +++ b/_includes/wcs/retrieve-rest-endpoint 2.mdx @@ -0,0 +1,37 @@ +import Link from '@docusaurus/Link'; + +This is how you can retrieve your `REST Endpoint`: + +import WCDClusterURL from '/developers/weaviate/quickstart/img/cluster_url.png'; +import WCDClusterAdminKey from '/developers/weaviate/quickstart/img/cluster_admin_key.png'; + +
+
+
    +
  1. + On the Cluster details page or within the{' '} + API Keys section, find the REST Endpoint URL. +
  2. +
  3. + Copy the REST Endpoint URL and store it securely. +
  4. +
+
+
+
+
+
+ Get the (REST) endpoint URL +
+
+ Grab the REST Endpoint URL. +
+
+
+
+
+
+ +:::note REST Endpoint vs gRPC Endpoint +When using an official Weaviate [client library](/developers/weaviate/client-libraries), you need to authenticate using the `REST Endpoint` and your API key. The client will infer the gRPC endpoint automatically and use the more performant gRPC protocol when available. +::: diff --git a/_includes/wcs/weaviate-cloud-edit-organization 2.mdx b/_includes/wcs/weaviate-cloud-edit-organization 2.mdx new file mode 100644 index 0000000000..db0f587ebe --- /dev/null +++ b/_includes/wcs/weaviate-cloud-edit-organization 2.mdx @@ -0,0 +1,32 @@ +import Link from '@docusaurus/Link'; +import OrganizationSettings from '/developers/wcs/img/weaviate-cloud-organization-settings.png'; + +
+
+
    +
  1. + Open the{' '} + Weaviate Cloud console. +
  2. +
  3. + Open the organization dropdown menu (1). +
  4. +
  5. + Click on Organization settings ( + 2). +
  6. +
+
+
+
+
+ Edit an organization in Weaviate Cloud +
+
Edit an organization in Weaviate Cloud.
+
+
+
+
diff --git a/_includes/weaviate-embeddings-models 2.mdx b/_includes/weaviate-embeddings-models 2.mdx new file mode 100644 index 0000000000..b52f313b87 --- /dev/null +++ b/_includes/weaviate-embeddings-models 2.mdx @@ -0,0 +1,21 @@ +### `Snowflake/snowflake-arctic-embed-l-v2.0` (default) {#snowflake-arctic-embed-l-v2.0} + +- A 568M parameter, 1024-dimensional model for multilingual enterprise retrieval tasks. +- Trained with Matryoshka Representation Learning to allow vector truncation with minimal loss. +- Quantization-friendly: Using scalar quantization and 256 dimensions provides 99% of unquantized, full-precision performance. +- Read more at the [Snowflake blog](https://huggingface.co/Snowflake/snowflake-arctic-embed-l-v2.0), and the Hugging Face [model card](https://huggingface.co/Snowflake/snowflake-arctic-embed-l-v2.0) +- Allowable `dimensions`: 1024 (default), 256 + +--- + +### `Snowflake/snowflake-arctic-embed-m-v1.5` {#snowflake-arctic-embed-m-v1.5} + +- A 109M parameter, 768-dimensional model for enterprise retrieval tasks in English. +- Trained with Matryoshka Representation Learning to allow vector truncation with minimal loss. +- Quantization-friendly: Using scalar quantization and 256 dimensions provides 99% of unquantized, full-precision performance. +- Read more at the [Snowflake blog](https://www.snowflake.com/engineering-blog/arctic-embed-m-v1-5-enterprise-retrieval/), and the Hugging Face [model card](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5) +- Allowable `dimensions`: 768 (default), 256 + +:::info Input truncation +Currently, input exceeding the model's context windows is truncated from the right (i.e. the end of the input). +::: diff --git a/_includes/weaviate-embeddings-requirements 2.mdx b/_includes/weaviate-embeddings-requirements 2.mdx new file mode 100644 index 0000000000..76d83c8590 --- /dev/null +++ b/_includes/weaviate-embeddings-requirements 2.mdx @@ -0,0 +1,7 @@ +To use Weaviate Embeddings, you need: + +- A Weaviate Cloud instance running at least Weaviate version `>=1.27.10`, `>=1.28.3` or `>=1.29.0`. +- A Weaviate client library that supports Weaviate Embeddings: + - Python client version `4.9.5` or higher + - JavaScript/TypeScript client version `3.2.5` or higher + - Go/Java clients are not yet officially supported; you must pass the `X-Weaviate-Api-Key` and `X-Weaviate-Cluster-Url` headers manually upon instantiation as shown below. diff --git a/_includes/weaviate-embeddings-vectorizer-parameters 2.mdx b/_includes/weaviate-embeddings-vectorizer-parameters 2.mdx new file mode 100644 index 0000000000..2688f68c68 --- /dev/null +++ b/_includes/weaviate-embeddings-vectorizer-parameters 2.mdx @@ -0,0 +1,51 @@ + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; +import PyCode from '!!raw-loader!/developers/weaviate/model-providers/_includes/provider.vectorizer.py'; +import TSCode from '!!raw-loader!/developers/weaviate/model-providers/_includes/provider.vectorizer.ts'; +import GoCode from '!!raw-loader!/_includes/code/howto/go/docs/model-providers/2-usage-text/main.go'; +import JavaCode from '!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/model_providers/UsageWeaviateTextEmbeddings.java'; +import JavaCode2 from '!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/model_providers/UsageWeaviateTextEmbeddingsArcticEmbedLV20.java'; +import JavaImportQueries from '!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/model_providers/ImportAndQueries.java'; + +- `model` (optional): The name of the model to use for embedding generation. +- `dimensions` (optional): The number of dimensions to use for the generated embeddings. +- `base_url` (optional): The base URL for the Weaviate Embeddings service. (Not required in most cases.) + +The following examples show how to configure Weaviate Embeddings-specific options. + + + + + + + + + + + + + + + diff --git a/blog/2025-02-13-agents-simplified/index 2.mdx b/blog/2025-02-13-agents-simplified/index 2.mdx new file mode 100644 index 0000000000..4fa698198c --- /dev/null +++ b/blog/2025-02-13-agents-simplified/index 2.mdx @@ -0,0 +1,206 @@ +--- +title: 'Agents Simplified: What we mean in the context of AI' +slug: ai-agents +authors: [tuana, prajjwal] +date: 2025-02-13 +tags: ['concepts', 'agents'] +image: ./img/hero.png +description: "What is an AI Agent? Learn how AI agents work, the benefits of using AI agents and more" +--- +![Agents Simplified: What we mean in the context of AI](./img/hero.png) + + +If you’re in the AI-osphere, you’ve probably heard the term ‘AI Agents’ being thrown around quite a bit recently. In this article, let’s boil down what we mean when we say ‘Agents’ in the context of large language models (LLMs) and artificial intelligence (AI). + +Before we dive into the topic, one thing to remember is that the term ‘agents’ has existed long before we had todays performant LLMs. We could even say that AI agents have existed for a long time too, just not with today’s generative LLMs as the star. What has changed though is just how good and sophisticated they’ve become. So, in short, you’re not hearing more about agents because they’re a brand new technology. No, you’re hearing more about AI agents because things just got very, very interesting. + +## What is an AI Agent + +At a basic level, an AI agent today is a semi- or fully-autonomous system that uses an LLM as its ‘brain’ for critical decision making and solving complex tasks. Think of them as automated decision making engines so that you, the user, only have to come with your query. They operate and use a variety of tools available to them in their environment to get things done for you so that you can sit back and relax while it figures out how to solve your problem. + +Agents autonomously direct their own processes and execution flow, choosing which tools to use based on the task at hand. These tools can include web search engines, databases, APIs, and more, enabling agents to interact with the real world. + +## A Brief History on AI Agents + +AI agents have technically existed for a long time. You can even see the authors of [this recent article on AI agents by Microsoft](https://news.microsoft.com/source/features/ai/ai-agents-what-they-are-and-how-theyll-change-the-way-we-work/) referring to AI agents they’d been working on back in 2005. However, the shape and abilities of our AI agents have significantly changed in the last couple of years largely thanks to the abilities of the latest LLMs. Now, we’re able to use LLMs as a core component when it comes to planning, reasoning and acting. + +> Reader: This section was a pleasure for me (Tuana) to write and geek out on. But if history and research is not your thing, feel free to jump to the next section. I won’t take offense. + +So, with that said, I’d like to highlight a few milestones in our _recent_ history of AI agents, and you can assume that from here on out we are only referring to the AI agents of today (2025). This is of course my own expecrience on the matter looking back over the last few years. But let’s turn back the clock to just before the release of ChatGPT. In 2020, there were 2 papers that were published that in my view could be viewed as the beginnings of current day AI agents that make use of LLMs as the core decision making component: + +- [MRKL Systems](https://arxiv.org/abs/2205.00445): Pronounced ‘miracle’ systems, this paper was largely centered around the shortcomings of language and studied the _reason_ as to why we were getting so many hallucinated responses. And in short, they highlighted what we now fully understand: Language models don’t _know_ everything, they’re designed to generate language. Think of it this way, we can’t expect people to know our birthday unless we tell them when it is. This paper introduces a way in which we can provide language models with external knowledge bases which can be referred to extract the relevant information from. +- [ReAct](https://arxiv.org/pdf/2210.03629): Published slightly after MRKL systems, this paper introduced another crucial component into what makes an agent today. This paper introduced a process of prompting that we call “ReAct”, which stands for ‘reason and act’. In short, it highlighted a clever way we can structure our prompts which results in the LLM taking into account the question at hand, reasoning about its options on how to solve it, selecting the correct tools to use to solve the question, and acting on it. To keep things _very_ simple, take the following example. Instead of only asking the question, we’re also telling the model which resources it has access to and asking it to make a plan about how it would solve the query. In short, this paper introduced a way to start thinking about our LLM instructions to make the process of reasoning and acting more reliable: + +![chat](img/chat.png) + +> Note: The actual ReAct prompt recommended in the paper is a lot more involved than this, including instructions on how to generate thought, how to reason and so on. + +In my view, these two papers highlight two very important findings and features that bring us to the AI agents of today: a good instruction, and external tools. That, and thousands of humans who started to tinker around with these LLMs and we’re now in a world where we’ve started to build more and more sophisticated AI agents (that no longer only use the ReAct prompting approach). + +With that, let’s have a look into what makes up an AI agent of today. + +### Core Components of an AI Agent + +Although not every AI agent has to include _all_ of these components, when we build agents they include at least a few of the following components and processes: An LLM, access to tools (via function calling), some level of memory, and reasoning. + +Let’s dive into what they each do: + +- **LLM:** Think of the LLM as the brain of the operation. Although not necessarily for _every step_, when we say ‘agents’ in 2025 a generative model is involved as the orchestrator of the operation to a great degree. Simply put, think of the example scenario in the section above: it’s the LLM that has decided that it’s best to first look up the `user_calendar` followed by looking up the weather. +- **Tools:** A great feature of agents is that they interact with the environment through different tools. One can think of them as ‘add-ons’ that make agents better. These tools let agents go beyond the fixed training knowledge of the LLMs by providing highly relevant and real-time data (like to your personal database) and abilities (like sending emails). With function calling, LLMs can directly interact with a predefined set of tools, expanding the operational scope and efficiency of agents. +- **Memory:** Agents often have some form of memory (both short-term and long-term), which allows them to store logs of their reasoning process, conversation histories, or information collected during different execution steps. We need memory both for ongoing conversations with our agents as well as conversations we want to come back to. Memory can be used to personalize the experience or plan future decisions +- **Observation & Reasoning:** The LLM is at the heart of problem solving, task decomposition, planning, and routing. It’s the component that allows the agent to reason about a problem, break it down into smaller steps (if needed), and decide how & when to use the available resources/tools to provide the best solution. However, not every agent is built equally, sometimes we include reasoning as an explicit step of the process when we’re building our agents. + +An important thing to remember is that there are various design patterns that result in an AI agent and these components can be used to varying degrees. The agents we see today exist on a spectrum, and the level of autonomy or ‘agentic’ behavior largely depends on how much decision making authority is delegated to the LLMs. In simpler terms: some agents are designed to operate more independently than others. + +![agents](img/ai_agents.png) + +## How do AI Agents Work? + +Most AI agents we see today use the LLM as the core decision maker/orchestrator of the operation. The level of autonomy this LLM has can of course vary, which we’ll talk more about in the ‘A look into the future’ section of this article. But let’s first start by discussing the basics of how an AI agent that uses an LLM for most of the decisions works. + +Something I notice is that when people discuss LLMs and agents these days, it seems like there’s quite a lot of magic happening. So here, I’ll try to explain what is _actually_ going on behind the scenes of an AI agent that has access to some tools. + +### Define the Prompt + +At the heart of any system that uses an LLM is an instruction (a prompt) that sets the scene for the LLM as to what its core purpose is. The ReAct paper also clearly presented this by highlighting a complex prompt that defines a reasoning, thought-generating, observing agent. For example, an LLM could be given the instruction about how it’s a “helpful assistant that has access to my databases in order to answer my queries”. + +### Provide Tools + +Next, we need to provide a list of tools to the LLM. This is by far one of the most popular ways of creating AI agents today, although it’s not always necessary and we can still create agentic functionality without it having to be via tools and function calling. Most model providers today support ‘function calling’ which allows us to set up our interactions with an LLM with a list of tools that it knows it may access at any given time to resolve a query. + +When we provide tools to an LLM, we tell the LLM about a few things. It uses these things to decide whether it’s time to use the tool or not: + +- **The name:** for example, a tool may have the name `technical_documentation_search` +- **The description:** which is probably the most important piece of information the model has access to when reasoning about which tool to use. For example, for the tool `technical_documentation_search` we may provide the description “Useful for when you need to search the Weaviate technical docs for answers” +- **The expected inputs:** Remember that tools are _external_ to the LLM. The LLM knows their name, it has a description for them too, but ultimately the job of a generative large language model is to produce language. So what can it do? Well, what it’s good at! It can probably produce some content which returns the name of a function (a tool), and the expected inputs for it to run. So, we also provide this information when we give a list of tools to an LLM. For example, for our tool `technical_documentation_search` tool, we may tell the LLM that it expects `query: str` to run. + +If you’re interested in what this looks like in reality, you can check out the [Function Definition docs by OpenAI](https://platform.openai.com/docs/guides/function-calling) for example. + +### Use Tools + +So, we have an LLM, it knows that it may access some tools, how to run them, and what they’re useful for. However, an LLM doesn’t have an inherent ability to, for example, run a python script… Or search your documentation. What it can do though is provide a message that _explains_ that it intends to run a tool, and what inputs it wants to run it with. + +Let’s take the following scenario as an example: + +- We have an AI agent using an LLM +- We’ve provided `technical_documentation_search` as a tool with expected input `query: str`. We’ve said it’s “Useful for when you need to search the Weaviate technical docs for answers” +- User asks: “Hey, how can I use Ollama with Weaviate?” + +In this scenario, what actually happens is something like this: + +- The LLM produces a response that boils down to “Run tool `technical_documentation_search` with `query = "Using Ollama"` ”. + +So, in reality, the LLM is making our AI agent application take a step outside of its own world. It instructs our system that there’s an external resource to be referenced. + +### Observe Tool Responses + +If all goes well, by this point your AI agent has run a tool. Remember that this tool could be _anything_. For example, our `technical_documentation_search` tool could in itself be a [RAG application (retrieval augmented generation)](/blog/introduction-to-rag) that in itself uses yet another LLM to generate responses to queries. The point is, at the end of the day we’ve probably run the tool with the query “Using Ollama” and the response is “You can use Ollama by enabling the text2vec-ollama or generative-ollama modules, both for embedding models and generative modules”, or something along those lines. But that’s not the end of it, because the original LLM that makes up the core of our AI agent doesn’t know the response yet. + +When a tool runs, the results of that tool are then returned back to the agent’s LLM. This is usually provided as a chat message where the role is set to “function call”. So our LLM knows that the response it’s seeing is not from the user, but a result of the tool it decided to run. The LLM then observes the results of the tool (or tools) to provide the user with the final answer. + +Congratulations! By this point, you’ve learned the basics of what makes an AI agent! Especially those that rely on tools and function calling. The way I like to imagine it is that the LLM that is the core orchestrator of an AI agent is a bit like a wizard with a spell book but no wand. The LLM knows what it can do, and how, but it can do nothing more than say the magic word. The tools still have to run outside the LLM. + +![wizard](img/wizard.png) + +## What is “Agentic” AI + +There’s a lot of new vocabulary to get used to, which can be confusing. But actually, when it comes to what’s “agentic AI” versus what an “AI agent” is, we can make our lives a lot easier. An AI agent is inherently _agentic_, but an AI agent usually refers to the an end application designed for a specific task. For example, an AI agent might be a documentation search assistant, or a personal assistant that has access to your email and slack. + +When we say ‘Agentic AI’ however, we’re usually to a system that is designed with elements of agentic components such as a decision making LLM, a reasoning step, maybe some tools, self-reflection, and so on. For something to be deemed agentic, it doesn’t need to have all of components. Rather, it often showcases the features of some of them. + +## Tools for Building AI Agents + +Building an AI agent requires integrating many components and tools to create a system capable of autonomous or semi-autonomous decision-making, interaction, and task execution. While advanced agents can be highly complex, even the simplest ones need a few essential elements. Below are some resources that can help you get started with building your own AI agents: + +### 1. Language Model Providers: + +The foundation of an AI agent is an LLM, which powers its entire reasoning. It allows the agent to understand different inputs and plan its actions effectively. It is also essential to look for an LLM that has built-in function-calling support so that we can connect it to external tools and APIs. Popular LLM providers include: + +- [OpenAI](https://platform.openai.com/docs/models): GPT 4o, o3-mini +- [Anthropic](https://docs.anthropic.com/en/docs/about-claude/models): Claude 3.5 Sonnet, Claude 3.5 Haiku +- [Google](https://ai.google.dev/gemini-api/docs/models/gemini): Gemini 2.0 Pro, Gemini 2.0 Flash +- [Mistral](https://docs.mistral.ai/getting-started/models/models_overview/): Mistral Large, Mistral Small 3 +- Open-source models using [Hugging Face](https://huggingface.co/models) or [Ollama](https://ollama.com/search) + +### 2. Memory and Storage: + +Agents need some kind of persistent memory to retain context over time. The memory can be of two types: + +- Short-term Memory: To keep track of current conversation or the task at hand. +- Long-term Memory: To remember past conversations, personalization, and experiences over time. + +There are currently many variations and implementations of both types of memory for agents today, and we’re likely to see more as the technology progresses. For example, for short-term memory, we see implementations as simple as providing “conversation summaries” to the LLM at each iteration or message, so as to navigate context length limits. For long-term memory, we may choose to use a database to back up conversations. This may even start changing the role of vector databases like Weaviate, where they start being used as long-term memory which the AI agent can extract most relevant bits of prior conversation from. + +### 3. Frameworks for AI Agent Orchestration: + +Orchestration frameworks act as smart conductors, coordinating all components of an AI agent and even managing multiple agents in a multi-agent setup. They abstract away most of the complexities, handle errors/retries cycles, and ensure that the language model, external tools/APIs, and memory systems all work together smoothly. + +There are several frameworks available that simplify the development of AI agents: + +- [Langgraph](https://www.langchain.com/langgraph): Provides a structured framework for defining, coordinating, and executing multiple agents. +- [LlamaIndex](https://www.llamaindex.ai/): Enables the creation of complex, agentic systems with varying degrees of complexity. +- [CrewAI](https://www.crewai.com/): Multi-agent framework for orchestrating autonomous AI agents having specific roles, tools, and goals. +- [Hugging Face smolagents](https://huggingface.co/docs/smolagents/en/index): Library that enables you to run powerful agents in just a few lines of code. +- [Haystack](https://haystack.deepset.ai/): End-to-end framework that allows you to build AI applications like agents, powered by LLMs. +- [OpenAI Swarm](https://github.com/openai/swarm):An educational framework exploring ergonomic, lightweight multi-agent orchestration. +- [AgentKit](https://agentkit.inngest.com/overview): A TypeScript library to create and orchestrate AI Agents. + +### 4. Tools and APIs: + +An agent is only as powerful as the tools it can access. By connecting to various APIs and tools, the agent can interact with its environment and perform tasks such as web browsing, data retrieval, database queries, data extraction & analysis, code execution, etc. + +Frameworks like LlamaIndex, offer pre-made tool integrations like data loaders for PDFs, websites, and databases, as well as for apps like Slack, and Google Drive via [LlamaHub](https://llamahub.ai/). Similarly, [Langchain](https://python.langchain.com/docs/integrations/tools/) offers a wide range of similar tools that agents can readily use. Also, developers can always build custom tools as per their needs by wrapping APIs to introduce entire new functionalities. Recent works like [Querying Databases with Function Calling](https://arxiv.org/abs/2502.00032) even hint at the promise of function calling for database queries. + +In a nutshell, building AI agents is a lot like assembling pieces of a puzzle. You start off with a good language model, add the right set of tools and APIs, and then add in memory so that the agent remembers what’s important. An orchestration framework can be used to make things simpler and tie things together, making sure every piece plays its part perfectly. + +## A look into the future of AI Agents: challenges and advances + +The great thing about AI agents and agentic AI in general is that it’s still evolving every day. Although there’s a lot we didn’t discuss here from the challenges we see, to other core components of actually building AI agents for production, like observability, there are a few things that is probably worth highlighting when it comes to the future of AI agents. + +For example, you may have already noticed that unless we take some time to intentionally design our agentic applications, it may seem that a lot (too much?) relies on an LLM making the right call, if you will. And in the case that the agent has access to search tools, or knowledge bases, maybe that’s ok. But what happens when the tool has access to your bank account and the agent can now buy you a very expensive one way ticket to Hawaii? + +A debate I’ve really been enjoying listening to is whether the use of AI agents is mostly as “research assistants” or as the “executors of our will”. Which is a simple, but important debate, and probably one on which our opinions change over time as LLMs get better, and we have better regulations and guard rails in the field of AI in general. + +### Levels of Autonomy & Human in the Loop + +Now you understand how an AI agent in its most basic form operates. But it’s not _necessary_ (or advisable) to have the LLM be the orchestrator of _everything_. We’re already seeing more and more agents that delegate the process to simpler, more deterministic systems. And in some cases, to humans. For example, we’ll probably see more and more of the scenario in which a human is supposed to approve an action before it can take place. + +We’re even seeing tools like [Gorilla](https://github.com/ShishirPatil/gorilla) implement agents with “undo” functionality that allows a human to decide whether an action should be back tracked, adding a layer of human intervention into the process. + +### Multi-modal AI Agents + +Multi-modality refers to the ability to make use of more than one modality, i.e. the ability to go beyond just language (text) and incorporate images, videos, audio and so on. In a way, the technology is for the most part there. So, we will probably start seeing more and more AI agents that can interact with a variety of mediums, either as part of their tooling, or inherently if they make use of a multi-modal LLM. Think of an AI agent which you can ask to “create a cute cat video and forward it to my email”! + +### The role of vector databases + +Another interesting topic, especially for us at Weaviate, is the potential for the role of [vector databases](/blog/what-is-a-vector-database) in AI to expand. So far, we’ve mostly been seeing vector databases used as knowledge sources which an agent can have access to. However, it’s not difficult to imagine a future in which we’re making use of vector databases, as well as other types of databases, as memory resources for our agent interactions. + +## Examples and Use Cases of AI agents + +AI agents are reshaping the way we work and this change is already visible across multiple industries. They shine brightest when we need a perfect blend of conversation with action. By automating repetitive tasks they not only increase the work efficiency but also improve the overall user experience. Here are some real-world examples of AI agents in action: + +### AI Research Agent + +AI research agents, or research assistants simplify the process of analyzing large amounts of data, spotting trend, and generating hypotheses. Today, we can already see people in academia or professionals at work using ChatGPT as a companion to help them gather information, to help them structure their thoughts and provide the first step in many tasks. In a way, ChatGPT in its bare form is in itself a research assistant agent. These types agents are also sometimes referred to as [“Agentic RAG”](/blog/what-is-agentic-rag), where an AI agent has access to multiple RAG tools, each accessing different knowledge bases. + +### Customer Service Agent + +AI customer service agents provide 24/7 support, handling inquiries, troubleshooting, and offering personalized interactions. They reduce wait times and let human agents take on more complex tasks. They can both act as research assistants for customers, getting answers to their queries quicker, as well as completing tasks for them. + +### Marketing & Sales Agent + +These agents optimize marketing campaigns and sales processes by analyzing customer data, personalizing outreach, and automating repetitive tasks like lead qualification and email follow-ups. + +### Code Assistant Agent + +These agents help developers by suggesting code, debugging errors, resolving tickets/issues, and even building new features. This enables developers to save time and focus on creative problem-solving. Examples of this are already out there with Cursor and Copilot. + +## Summary + +This article gave a high level overview of what we mean when we say ‘AI agents’ in 2025, as well as giving a simple look into how they work. Although we did not go into all the technical details of different ‘agentic workflows’, another blog going into more technical detail is coming soon! We go through the components that help with the basic understanding of AI agents, such as prompts, tools, observing tool responses and reasoning about the final answer. Finally, we look into the future of AI agents, discuss the current short-comings and the advancements we could expect. + +A lot of the historical overview mentioned in this blog was also my (Tuana’s) subjective view looking over the past few years. If you do think I’m missing a curcial step, do let me know (DMs open on [X](https://x.com/tuanacelik)) + +import WhatsNext from '/_includes/what-next.mdx' + + \ No newline at end of file diff --git a/blog/2025-02-25-accelerate-enterprise-ai/index 2.mdx b/blog/2025-02-25-accelerate-enterprise-ai/index 2.mdx new file mode 100644 index 0000000000..53169923a2 --- /dev/null +++ b/blog/2025-02-25-accelerate-enterprise-ai/index 2.mdx @@ -0,0 +1,82 @@ +--- +title: 'Accelerate Enterprise AI: 94% Faster Search, Simplified Embedding Creation, and Dedicated Azure Deployment' +slug: accelerate-enterprise-ai +authors: [alvin, nenand] +date: 2025-02-25 +tags: ['release'] +image: ./img/hero.png +description: "Weaviate Embeddings is Now Generally Available, and Weaviate 1.29 is Officially Here! Read more about it in our launch announcement." +--- +![hero](./img/hero.png) + +**TL;DR** + +* **Weaviate Embeddings General Availability (GA)**: Now natively hosted in Weaviate Cloud, featuring Snowflake’s Arctic Embed 1.5 and 2.0 for multilingual, high-performance vector embeddings. +* **94% Faster Keyword Search**: BlockMax WAND slashes BM25 query latency by up to 94%, making large-scale hybrid search more efficient. +* **Enterprise Security with RBAC GA**: Our fully supported Role-Based Access Control is now generally available, giving enterprises the granular access controls they need. +* **Multi-Vector Embeddings Preview:** Now enables both queries and objects to be represented by multiple vectors, unlocking more nuanced data understanding and improved search relevance. +* **Dedicated Enterprise Deployment on Azure**: Simplifies deployments for customers deploying on Microsoft Azure, offering private clusters, IP whitelisting, and seamless cloud management. +* **NVIDIA integrations:** Leverage NVIDIA's inference services and models directly within Weaviate. +* **March 5 Release Event**: Join the Weaviate 1.29 deep dive for a live walkthrough of new features—recording available if you can’t attend live. + +## What’s New in Weaviate? + +We’re excited to share major enhancements across both Weaviate Cloud and the newly released Weaviate 1.29—our latest product launch packed with features that accelerate enterprise AI adoption without steep learning curves or costly overhead. + +From boosting hybrid search performance to simplifying the creation of **vector embeddings**, Weaviate continues to blend innovation with practicality for teams building production-grade AI solutions. Have stringent security requirements? With **Role-Based Access Controls (RBAC)** and dedicated deployment in Microsoft **Azure,** organizations can more easily comply with enterprise standards. Below, you’ll find the highlights that make Weaviate a compelling choice for those looking to push the boundaries of AI-powered search, data ingestion, and security. + +## 94% Faster BM25 Keyword Search with BlockMax WAND + +Weaviate has proven billion-scale vector search with low latency, it now delivers the same high-performance level for BM25 keyword search. **BlockMax WAND** implementation supercharges Weaviate’s BM25 keyword search with up to a **94% reduction** in search latency**[^1]**, making large-scale keyword lookups faster and more efficient. + +By organizing parts of the keyword index with pre-computed statistics, it can skip irrelevant documents and compress data far more effectively. For enterprises handling billions of records, this translates to dramatically lower latency and significantly reduced storage requirements—**elevenfold compression in some tests[^2]**. Whether you’re searching across product catalogs, customer communications, or internal knowledge bases, the new algorithm helps your teams find relevant insights faster **without extra hardware or infrastructure overhead**. + +Best of all, once a user opts in, these performance gains work behind the scenes–applying only to data ingested after enabling–so there’s no need for developers to revamp existing applications. With **BlockMax WAND** in the **Weaviate 1.29** release, users gain a robust, future-proof search solution that scales to enterprise needs. For further insights into BlockMax WAND, explore our blog, "[BlockMax WAND: How Weaviate Achieved 10x Faster Keyword Search](/blog/blockmax-wand).” + +## Weaviate Embeddings: Now Generally Available + +**Weaviate Embeddings** is now generally available in **Weaviate Cloud**, taking a major leap forward from our initial [Preview](/blog/introducing-weaviate-embeddings). Designed to eliminate friction in AI data pipelines, it combines vector creation and storage into a single service. No more juggling external providers or hitting rate limits mid-project. You can ingest millions of data points at high throughput, all while co-locating models and vector indexes in Weaviate Cloud for optimal performance. + +A key addition to this release is **Snowflake’s Arctic Embed 2.0**—an open-source text embedding model that goes beyond its 1.5 predecessor with better multilingual support and **impressive benchmark results** (including [MTEB](https://arxiv.org/html/2412.04506v1)). Simply put, it handles large-scale, high-fidelity document ingestion while delivering more accurate semantic search. By running these embeddings natively in Weaviate Cloud, developers can focus on building next-gen AI applications instead of wrestling with model hosting or unwieldy data pipelines. + +For a deeper look at **Arctic Embed 2.0**’s journey, check out the latest [**Weaviate Podcast**](https://www.youtube.com/watch?v=Kjqv4uk3RCs&ab_channel=Weaviate%E2%80%A2VectorDatabase) featuring its creators from Snowflake. If you’re ready to get hands-on, visit our [**Quickstart tutorial**](/developers/wcs/embeddings#get-started), or get started with a free trial of [**Weaviate Cloud**](https://console.weaviate.cloud/). + +## Raising the Bar on Enterprise Security & Compliance + +Compliance is table stakes for enterprises, and it’s more vital than ever as AI-driven breaches are alarmingly on the rise. As industries like banking, insurance, and healthcare ramp up their AI use cases, the regulations around data privacy and security continue to tighten. That’s why Weaviate now provides **RBAC** as a fully supported, enterprise-grade feature in the **1.29 release**. + +**RBAC** gives you granular permission settings to secure your collections and tenants—ensuring that sensitive data, such as customer records or key business information, is accessible only to authorized users without cumbersome workarounds or bottlenecks. With custom or predefined roles, security and compliance teams can ensure that every user gets the right level of access, no more, no less. This approach not only reduces the risk of unauthorized data access but also streamlines compliance reporting and auditing, eliminating costly manual procedures. + +Executives, IT teams, and developers can now have more confidence to keep innovating, without compromising on compliance or risking data integrity. + +## Dedicated Deployment on Microsoft Azure + +Many enterprises rely on **Microsoft Azure** for their core infrastructure. To meet these users where they are, Weaviate now offers an Azure Enterprise Provisioner for easy cloud deployment. This enables customers to have their Weaviate Cloud deployment (managed by Weaviate) run in an isolated Azure environment. + +For businesses prioritizing security and compliance, the provisioner ensures full resource isolation via a private cluster and IP whitelisting, plus deep integration with Azure’s security and **role-based access controls**. It also manages essential infrastructure, like backups, logs, and metrics, so teams spend less time on DevOps overhead and more time building AI-driven applications. + +For those operating on Azure, this opens the door for simpler procurement, reduced operational friction, and dedicated resource environments that match the strict requirements of heavily regulated sectors. + +## Enhanced AI Performance: Multi-Vector Embeddings, NVIDIA Integrations, and Asynchronous Replication + +With Weaviate 1.29, we’re taking innovation even further. **Multi-vector embeddings** now enable both queries and objects to be represented by multiple vectors, unlocking more nuanced data understanding and improved search relevance—especially for dense content like medical data or academic papers. Our integration with the **Jina AI ColBERT vectorizer** exemplifies this approach, supporting “late interaction” search techniques that deliver superior precision. + +In addition, Weaviate 1.29 introduces robust **NVIDIA integrations** – including text2vec-nvidia, multi2vec-nvidia, generative-nvidia, and reranker-nvidia – that seamlessly bring NVIDIA’s powerful inference engine into Weaviate. These modules simplify embedding creation, semantic search, and Retrieval Augmented Generation (RAG), empowering you to build sophisticated AI applications faster and more efficiently. + +Complementing these innovations, our new **asynchronous replication** feature seamlessly synchronizes data across nodes, further bolstering system reliability and performance in large-scale deployments. + +Together, these enhancements further elevate Weaviate’s position as a leading enterprise-grade vector database. For a deeper dive into the technical details, check out the [Weaviate 1.29 Release Highlights](/blog/weaviate-1-29-release) blog. + +## What’s Next + +That wraps up our look at the **new additions to Weaviate Cloud and the 1.29 release**—but the excitement doesn’t stop here. [Mark your calendars for **March 5**](https://link.mail.beehiiv.com/ss/c/u001.7ph1bOQkPnwamO4cv9f9A8jeYYCfFv91aBjGm1l9ezWQ7PVH1EkZELJKK7fxa04bAOWbwJ2WYaaD136yiCTURqFbSQBejldEJqvIuDUo_6B-tD8pmJCXebuYJUx0NpviZQxynvPP4vitQpuZ-FAOu-PKwROV8cNcnbceaxbz3yeDwP7eJ970d5IHv583qMine1EFPLJmWl2sido3qgWBFTMhCOUEyAYu4cGxr9A2HCc3T9hBBRkgPiIYpFO7yTd7RvVKZlMIxCGrcG3E8tCgaQ/4dz/v3dtMN9ZSJaBYAUu0KBytA/h20/h001.TrgKKrLVHWSDrE7WMueG26GAoMG46biDXDrp0keR2fU), where our experts will deliver a deep dive into new features like **multi-valued vectors**, **RBAC**, and more. If you can’t make the live session, don’t worry—we’ll share a recording so you won’t miss a thing. + +In the meantime, if you’re eager to get hands-on, check out the [**Quickstart tutorial**](/developers/wcs/embeddings#get-started), or explore [**Weaviate Cloud with a free trial**](https://console.weaviate.cloud/)—and experience for yourself how Weaviate can supercharge your AI applications. We’re excited to see what you build\! + +[^1]: [BlockMax WAND: How Weaviate Achieved 10x Faster Keyword Search](/blog/blockmax-wand) (Fever dataset–5.4M documents): reduced search time from 517ms to 33ms. + +[^2]: [BlockMax WAND: How Weaviate Achieved 10x Faster Keyword Search](/blog/blockmax-wand) (MS Marco dataset–8.6M documents): from 10.53 GB to 0.92 GB, 77% fewer documents scored, 79% fewer blocks decompressed. + +import WhatsNext from '/_includes/what-next.mdx' + + \ No newline at end of file diff --git a/blog/2025-02-25-weaviate-1-29-release/_core-1-29-include 2.mdx b/blog/2025-02-25-weaviate-1-29-release/_core-1-29-include 2.mdx new file mode 100644 index 0000000000..2e39b6a5d5 --- /dev/null +++ b/blog/2025-02-25-weaviate-1-29-release/_core-1-29-include 2.mdx @@ -0,0 +1,100 @@ +Weaviate `1.29` brings a host of new features and improvements. It introduces multi-vector embedding support (preview) and new NVIDIA model support. Weaviate's role-based access control (RBAC) and async replication are now generally available. We've also made further improvements to the BlockMax WAND algorithm to speed up keyword and hybrid searches, among other enhancements. + +Here are the release ⭐️*highlights*⭐️! + +![Weaviate 1.29](./img/hero.png) + +- [Multi-vector embedding support (Preview)](#multi-vector-embedding-support-preview) +- [NVIDIA model support](#nvidia-model-support) +- [Role-based access control (RBAC) in GA](#role-based-access-control-rbac-in-ga) +- [BlockMax WAND (Technical Preview)](#blockmax-wand-technical-preview) +- [Async replication in GA](#async-replication-in-ga) + +## Multi-vector embedding support (Preview) + +:::caution 🚧 Technical Preview +Multi-vector embedding support is added `v1.29` as a **technical preview**. This means that the feature is still under development and may change in future releases, including potential breaking changes. **We do not recommend using this feature in production environments at this time.** +::: + +Weaviate now supports multi-vector embeddings, allowing you to store and query using multi-vector embeddings such as ColBERT, ColPali and ColQwen. + +![Single vs Multi-vector embedding comparison visualization](../../developers/weaviate/tutorials/_includes/single_multi_vector_comparison_light.png#gh-light-mode-only "Single vs Multi-vector embedding comparison visualization") +![Single vs Multi-vector embedding comparison visualization](../../developers/weaviate/tutorials/_includes/single_multi_vector_comparison_dark.png#gh-dark-mode-only "Single vs Multi-vector embedding comparison visualization") + +This approach enables more precise searching through "late interaction" - a technique that matches individual parts of texts rather than comparing them as whole units. + +Using multi-vector embeddings can improve the quality of search results, especially for long texts or complex queries. + +The following visualization shows how late interaction works in a ColBERT model, in comparison to a single-vector model. + +![ColBERT late interaction vs single-vector visualization](../../developers/weaviate/tutorials/_includes/colbert_late_interaction_light.png#gh-light-mode-only "ColBERT late interaction vs single-vector visualization") +![ColBERT late interaction vs single-vector visualization](../../developers/weaviate/tutorials/_includes/colbert_late_interaction_dark.png#gh-dark-mode-only "ColBERT late interaction vs single-vector visualization") + +This feature is available as a technical preview in `1.29`, so we're excited to hear your feedback and suggestions for further improvements. + +If you would like to try out multi-vector embeddings in Weaviate, check out the [Multi-vector embeddings tutorial](/developers/weaviate/tutorials/multi-vector-embeddings) which will take you end-to-end, for both: + +- [Using Jina AI ColBERT model integration](/developers/weaviate/tutorials/multi-vector-embeddings#option-1-colbert-model-integration), or +- [Using user-provided multi-vector embeddings](/developers/weaviate/tutorials/multi-vector-embeddings#option-2-user-provided-embeddings). + +## NVIDIA model support + +Weaviate's suite of [model integrations](/developers/weaviate/model-providers/) now includes support for NVIDIA's NIM inference service. + +![Embedding integration illustration](../../developers/weaviate/model-providers/_includes/integration_nvidia_embedding.png) + +Weaviate users can now use NVIDIA model integration to [create text embeddings](/developers/weaviate/model-providers/nvidia/embeddings), [create multi-modal embeddings](/developers/weaviate/model-providers/nvidia/embeddings-multimodal), and use [generative AI models](/developers/weaviate/model-providers/nvidia/generative). (Reranker model support coming soon) + +These model integration pages provide detailed instructions on how to configure Weaviate with NVIDIA models and start using them in your applications. + +## Role-based access control (RBAC) in GA + +Role-based access control (RBAC) is now generally available in Weaviate `1.29`, offering more granular control over user permissions. + +The RBAC feature allows you to define roles and assign permissions to users based on their roles. This enables you to control who can access, read, write, or delete data in Weaviate. + +There have been a number of changes to the RBAC API in `1.29` from the preview API in `1.28`, some of which are breaking changes. + +This was done to make the API more consistent and easier to use, and to introduce new features. Keep also in mind that the RBAC features is still in development, and we have plans to add more features in the future. + +Refer to the [RBAC documentation](/developers/weaviate/configuration/rbac) for more information. + +## BlockMax WAND (Technical Preview) + +:::caution 🚧 Technical Preview +BlockMax WAND algorithm is available in `v1.29` as a **technical preview**. This means that the feature is still under development and may change in future releases, including potential breaking changes. **We do not recommend using this feature in production environments at this time.** +::: + +The BlockMax WAND algorithm continues to evolve in Weaviate `1.29` with further improvements to speed up BM25 and hybrid searches. + +It organizes the inverted index in blocks to enable skipping over blocks that are not relevant to the query. This can significantly reduce the number of documents that need to be scored, improving search performance. + +In our internal testing, we have seen up to a 10x speedup in keyword searches due to BlockMax WAND. + +If you are experiencing slow BM25 (or hybrid) searches, try enabling BlockMax WAND to see if it improves performance. + +To read more about BlockMax WAND, and to try it out, refer to the [Indexing page](/developers/weaviate/concepts/indexing#blockmax-wand-algorithm). + +**To use BlockMax WAND in Weaviate `v1.29`, it must be enabled prior to collection creation.** As of this version, Weaviate will not migrate existing collections to use BlockMax WAND. + +## Async replication in GA + +For those of you using Weaviate in a distributed environment, async replication is now generally available in `1.29`. + +When each shard is replicated across multiple nodes, async replication guarantees that all nodes holding copies of the same data remain in sync by periodically comparing and propagating data. + +Async replication supplements the existing repair-on-read mechanism. If a node becomes inconsistent between sync checks, the repair-on-read mechanism catches the problem at read time. + +To activate async replication, set `asyncEnabled` to true in the [`replicationConfig` section of your collection definition](/developers/weaviate/manage-data/collections#replication-settings). Visit the [How-to: Replication](/developers/weaviate/configuration/replication#async-replication-settings) page to learn more about the available async replication settings, and [Concepts: Replication/Consistency](/developers/weaviate/concepts/replication-architecture/consistency) for more information on how async replication works. + +## Summary + +Ready to Get Started? + +Enjoy the new features and improvements in Weaviate `1.29`. The release is available open-source as always [on GitHub](https://github.com/weaviate/weaviate/releases/tag/v1.28.0), and will be available for new Sandboxes on [Weaviate Cloud](https://console.weaviate.cloud/) very shortly. + +For those of you upgrading a self-hosted version, please check the [migration guide](/developers/weaviate/more-resources/migration#general-upgrade-instructions) for detailed instructions. + +It will be available for Serverless clusters on Weaviate Cloud soon as well. + +Thanks for reading, see you next time 👋! diff --git a/blog/2025-02-25-weaviate-1-29-release/index 2.mdx b/blog/2025-02-25-weaviate-1-29-release/index 2.mdx new file mode 100644 index 0000000000..112e15b260 --- /dev/null +++ b/blog/2025-02-25-weaviate-1-29-release/index 2.mdx @@ -0,0 +1,18 @@ +--- +title: Weaviate 1.29 Release +slug: weaviate-1-29-release +authors: [jp] +date: 2025-02-25 +image: ./img/hero.png +tags: ['release', 'engineering'] +description: "Read about multi-vector embedding support, improved keyword/hybrid searches, role-based access control and async replication going GA, new nvidia modules, and more." + +--- + +import Core129 from './_core-1-29-include.mdx'; + + + +import WhatsNext from '/_includes/what-next.mdx' + + diff --git a/blog/2025-02-26-blockmax-wand/index 2.mdx b/blog/2025-02-26-blockmax-wand/index 2.mdx new file mode 100644 index 0000000000..59b8780cef --- /dev/null +++ b/blog/2025-02-26-blockmax-wand/index 2.mdx @@ -0,0 +1,255 @@ +--- +title: 'BlockMax WAND: How Weaviate Achieved 10x Faster Keyword Search' +slug: blockmax-wand +authors: [amourao, jp] +date: 2025-02-26 +tags: ['search', 'concepts', 'engineering'] +image: ./img/hero.png +description: "How Weaviate achieved 10x Faster Keyword Search and 90% index compression" +--- +![BlockMax WAND: How Weaviate Achieved 10x Faster Keyword Search](./img/hero.png) + + +Keyword search is an integral part of Weaviate’s [hybrid search](/blog/hybrid-search-explained), designed to return [best of both](/blog/hybrid-search-fusion-algorithms) vector and keyword search. +Hybrid search as a tool for [RAG](/blog/introduction-to-rag) and [Agentic AI](/blog/ai-agents) increases the breadth and depth of information that can be retrieved from a dataset, but it comes with its own challenges. + +As the text corpora size becomes larger and larger, keyword searches can take long times to execute compared to vector searches. +In this blog post, you will learn how we improved Weaviate’s inverted index, how we avoid scoring all documents that have the query terms, how we compress the inverted index, and more about the improvements from using BlockMax WAND in Weaviate. + +:::caution 🚧 Technical Preview +BlockMax WAND algorithm is available in `v1.29` as a **technical preview**. This means that the feature is still under development and may change in future releases, including potential breaking changes. **We do not recommend using this feature in production environments at this time.** +[Instructions on how to enable it are available here.](/developers/weaviate/concepts/indexing#blockmax-wand-algorithm) +::: + + +## Inverted Index and Tokenization + +Keyword search works by comparing the terms in your queries to the terms in your database documents, giving higher scores to rarer terms and terms that show up more often in your documents. +In keyword search context, these `terms` are called `tokens`, but we'll use the terms interchangeably in this blog post. + +Keyword search requires us to first define what terms we want to search. +A big part of this process is [tokenization](/developers/academy/py/tokenization/basics). It splits the input documents into [tokens](https://en.wikipedia.org/wiki/Lexical_analysis#Token) (i.e. terms, numbers or anything else one would consider important to be searched individually). +In this simple example, consider a dataset made of three documents with a single property, title, composed of a single sentence and a *whitespace* tokenizer that lowercases the documents and splits them by whitespace. + +| Doc Id | Document Title | Tokenized Title | +| :---- | :---- | :---- | +| 1 | A Web Developer's Guide to Hybrid Search | \[“a”, “web”, “developer", "s”, “guide”, “to”, “hybrid”, “search”\] | +| 2 | Unlocking the Power of Hybrid Search | \[“unlocking”, “the”, “power”, “of”, “hybrid”, “search”\] | +| 3 | Vector Library versus Vector Database | \[“vector”, “library”, “versus”, “vector”, “database”\] | + +**Table**: Example dataset with tokenized titles. + +Now we have turned documents into a [bag-of-words](https://en.wikipedia.org/wiki/Bag-of-words_model) model, where we can find for individual query terms in the sentences. But having to go through all documents to find the ones that have query terms isn't efficient. + +This is where the *inverted* part of the [inverted index](https://en.wikipedia.org/wiki/Inverted_index) comes from: instead of going document \-\> term, create an index that does from term \-\> documents. +It works like the indexes at the end of books, but instead of mapping terms to book pages, we map terms to documents using posting lists. + +A posting list is a list of "postings", which contain the information needed to score documents: +* doc id: to identify which documents have the term. +* [term frequency (tf)](https://en.wikipedia.org/wiki/Tf%E2%80%93idf), which represents the number of times the term is part of the property. For example, `tf("vector")` for doc 3 is 2, as it shows up twice. + +| Term | Posting List | +| :---- | :---- | +| hybrid | (Doc 1, tf: 1); (Doc 2, tf: 1) | +| search | (Doc 1, tf: 1); (Doc 2, tf: 1) | +| vector | (Doc 3, tf: 2) | + +**Table**: Posting lists for terms `hybrid`, `search`, and `vector` in the example dataset. + +When a user searches for a query, we tokenize the query in the same way we tokenized the documents. +Thus, if you want to search for `"Hybrid search or Vector search"`, we get the tokens `["hybrid", "search", "or", "vector"]`. +Inspecting the posting lists for each token, we can see that documents 1, 2, and 3 have at least one of the query tokens. +But we still need to score them to see which ones are the most relevant to the query. + +## tf-idf and BM25 + +Not all terms are created equal. In our examples, words like "hybrid," "vector," and "database" are more informative than "a," "to," or "the." To rank results meaningfully, we need to score documents based on: +[idf (Inverse Document Frequency)](https://en.wikipedia.org/wiki/Tf%E2%80%93idf) is a measure of this importance, based on the number of documents a term appears in compared to the total number of documents. Higher values mean rarer terms that will contribute more to the score of a document. Combined with the tf, it becomes the cornerstone of keyword search, [tf-idf](https://en.wikipedia.org/wiki/Tf%E2%80%93idf). + +[BM25](/blog/hybrid-search-explained#bm25) further refines tf-idf by applying property length and frequency saturation normalization. + +## Exhaustive Search + +The exhaustive way of computing the [BM25](/blog/hybrid-search-explained#bm25) scores would be to check all the documents that have at least one of the query terms and score them. + +But this is quite resource intensive; most searches are for the top 10-100 results, and even with pagination, at most, only about 100 documents end up being shown to the user for each search. +This means that if 100 000 documents have at least one of the query terms (normal for queries with common words in databases with 1 million documents), this is 0.1% of the documents, many of which are completely irrelevant to the query, wasting a lot of CPU and I/O resources. + +## WAND + +[WAND (Weak AND)](https://dl.acm.org/doi/abs/10.1145/956863.956944) takes the inverted index and idf to greatly reduce the number of documents we need to inspect when searching for the top-*k* documents that match a query. +It relies on two step search, to avoid ranking all documents for top-k search. + +* Approximate evaluation over query term postings in parallel to identify candidate docs with max impact heuristics (based on idf); +* Promising candidates are fully evaluated, their exact scores are computed and they are added to the top-*k* results if the scores are higher than the lowest score. + +Max impact is the maximum score a term can contribute to the score. +Its upper bound is the idf, e.g. a document with only the term *vector* will have a max impact equal to vector’s idf. + +* As we start to rank, we add enough documents to fill the top-*k* list; +* When we get *k* candidates, the list is full and we have a **lower bound** to beat, which is the score of the lowest ranked document; +* As we move forward, we can then start skipping documents where the sum of the max impacts of its terms, is lower than the lower bound. + +WAND is what currently powers keyword search at Weaviate. +The following section will show why we are excited to introduce BlockMax WAND. + +## BlockMax WAND + +While WAND works well and is already able to greatly reduce the number of documents that we need to inspect, it still has some limitations: it relies on a single global value of idf for all documents in a term, which relies on the assumption that there may be a single document that just has that term. +[BlockMax WAND (BMW)](https://dl.acm.org/doi/10.1145/2009916.2010048) is WAND on steroids: +* Divides posting lists into blocks with local max impact; +* Skips and avoids decoding doc ids in blocks. + +BMW can be viewed as a *meta*-WAND, where we perform document skips at block level using max impact (shallow advances), and use the combination of max doc ids from blocks to even avoid loading whole blocks from disk. + +This table shows an example posting, as output by BlockMax WAND. You may notice that this includes some additional elements compared to postings for WAND shown above. + +![BlockMax WAND Block example](./img/block_example.png) + +A block is a mini posting list (list of doc ids and tfs) with its own metadata: +* **Max doc id**: highest doc id that shows up in the block; +* **Max impact**: maximum possible score for a document in the block; for tf-idf, this represents the maximum tf of a document within the block (norm tf equals tf/prop length). + + +| Dataset | WAND | BlockMax WAND | +| :---- | -----: | ----: | +| MS Marco (8.6M docs) | 15.1% | 6.7% (-56%) | +| Fever (5.4M docs) | 20.8% | 8.4% (-60%) | +| Climate Fever (5.4M docs) | 29.3% | 12.2% (-58%) | + +**Table**: Average % of doc query terms scored Weaviate `v1.29.0` on standard [beir datasets](https://github.com/beir-cellar/beir) without stopword removal. +Exhaustive search always needs to examine **100%** of the document query terms with at least one query term. + +The experimental results show that BlockMax WAND is able to further halve the number of documents inspected from the already remarkable **15-30%** number of terms scored to **5-15%**. +But how does BlockMax WAND work in practice? + +### BlockMax WAND Demo + +At Weaviate, we like to show, not just tell. That's why we've created a **demo** to show you exactly how BlockMax WAND works in practice! +* **Input your documents, queries, and search parameters** and see exactly how Exhaustive search, WAND, and BlockMax WAND work; +* **Get the metrics on the number of documents and blocks scored**, and see the improvements of BlockMax WAND vs. WAND and Exhaustive search; +* **Share your dataset and queries** with others to show the improvements! + +