diff --git a/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.replication 2.java b/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.replication 2.java
new file mode 100644
index 0000000000..44102a6348
--- /dev/null
+++ b/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.replication 2.java	
@@ -0,0 +1,247 @@
+// How-to: Manage-Data -> Classes
+package io.weaviate.docs;
+
+import com.google.gson.GsonBuilder;
+import io.weaviate.client.Config;
+import io.weaviate.client.WeaviateClient;
+import io.weaviate.client.base.Result;
+import io.weaviate.client.v1.misc.model.ReplicationConfig;
+import io.weaviate.client.v1.misc.model.ShardingConfig;
+import io.weaviate.client.v1.schema.model.Schema;
+import io.weaviate.client.v1.schema.model.WeaviateClass;
+import io.weaviate.client.v1.misc.model.BM25Config;
+import io.weaviate.client.v1.misc.model.InvertedIndexConfig;
+import io.weaviate.client.v1.misc.model.VectorIndexConfig;
+import io.weaviate.docs.helper.EnvHelper;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+@Tag("crud")
+@Tag("classes")
+class ManageDataReplicationTest {
+
+  private static WeaviateClient client;
+
+  @BeforeAll
+  public static void beforeAll() {
+    String scheme = EnvHelper.scheme("http");
+    String host = EnvHelper.host("localhost");
+    String port = EnvHelper.port("8181");
+
+    Config config = new Config(scheme, host + ":" + port);
+    client = new WeaviateClient(config);
+
+    Result<Boolean> result = client.schema().allDeleter().run();
+    assertThat(result).isNotNull()
+        .withFailMessage(() -> result.getError().toString())
+        .returns(false, Result::hasErrors)
+        .withFailMessage(null)
+        .returns(true, Result::getResult);
+  }
+
+  @Test
+  public void shouldManageDataClasses() {
+
+    String collectionName = "Article";
+
+    createArticleWithReplicationConfig(collectionName);
+    deleteCollections(collectionName);
+    createArticleWithShardingConfig(collectionName);
+    updateArticleConfiguration(collectionName);
+    readAllCollections();
+  }
+
+  private void deleteCollections(String className) {
+    client.schema().classDeleter()
+        .withClassName(className)
+        .run();
+  }
+
+  private <T> void print(Result<T> result) {
+    String json = new GsonBuilder().setPrettyPrinting().create().toJson(result.getResult());
+    System.out.println(json);
+  }
+
+  private void readAllCollections() {
+    Result<Schema> result = client.schema().getter()
+        .run();
+
+    assertThat(result).isNotNull()
+        .withFailMessage(() -> result.getError().toString())
+        .returns(false, Result::hasErrors)
+        .withFailMessage(null)
+        .extracting(Result::getResult).isNotNull()
+        .extracting(Schema::getClasses).asList()
+        .hasSize(1);
+
+    print(result);
+  }
+
+  private void createArticleWithReplicationConfig(String collectionName) {
+    // START AllReplicationSettings
+    // Configure replication settings
+    Integer replicationFactor = 3;
+    Boolean asyncEnabled = true;
+
+    // Create replication configuration
+    ReplicationConfig replicationConfig = ReplicationConfig.builder()
+        .factor(replicationFactor) // factor=3
+        .asyncEnabled(asyncEnabled) // async_enabled=True
+        .deletionStrategy(ReplicationConfig.DeletionStrategy.DELETE_ON_CONFLICT)
+        .build();
+
+    // Create the Article collection with replication configuration
+    WeaviateClass articleClass = WeaviateClass.builder()
+        .className(collectionName)
+        .description("Article collection with replication configuration")
+        .replicationConfig(replicationConfig) // Set the replication config
+        .build();
+
+    // Add the collection to the schema
+    Result<Boolean> result = client.schema().classCreator()
+        .withClass(articleClass)
+        .run();
+    // END AllReplicationSettings
+
+    // Assert the result
+    assertThat(result).isNotNull()
+        .withFailMessage(() -> result.getError().toString())
+        .returns(false, Result::hasErrors)
+        .withFailMessage(null)
+        .returns(true, Result::getResult);
+
+    // Verify the replication configuration was set correctly
+    Result<WeaviateClass> classResult = client.schema().classGetter()
+        .withClassName(collectionName)
+        .run();
+
+    assertThat(classResult).isNotNull()
+        .returns(false, Result::hasErrors);
+
+    WeaviateClass createdClass = classResult.getResult();
+    assertThat(createdClass).isNotNull()
+        .extracting(WeaviateClass::getReplicationConfig).isNotNull()
+        .returns(replicationFactor, ReplicationConfig::getFactor)
+        .returns(asyncEnabled, ReplicationConfig::getAsyncEnabled)
+        .returns(ReplicationConfig.DeletionStrategy.DELETE_ON_CONFLICT,
+            ReplicationConfig::getDeletionStrategy);
+  }
+
+  private void createArticleWithShardingConfig(String collectionName) {
+    // START ShardingSettings
+    // Configure sharding settings
+    Integer virtualPerPhysical = 128;
+    Integer desiredCount = 1;
+    Integer desiredVirtualCount = 128;
+
+    // Create sharding configuration
+    ShardingConfig shardingConfig = ShardingConfig.builder()
+        .virtualPerPhysical(virtualPerPhysical) // virtual_per_physical=128
+        .desiredCount(desiredCount) // desired_count=1
+        .desiredVirtualCount(desiredVirtualCount) // desired_virtual_count=128
+        .build();
+
+    // Create the Article collection with sharding configuration
+    WeaviateClass articleClass = WeaviateClass.builder()
+        .className(collectionName)
+        .description("Article collection with sharding configuration")
+        .shardingConfig(shardingConfig) // Set the sharding config
+        .build();
+
+    // Add the collection to the schema
+    Result<Boolean> result = client.schema().classCreator()
+        .withClass(articleClass)
+        .run();
+    // END ShardingSettings
+
+    // Assert the result
+    assertThat(result).isNotNull()
+        .withFailMessage(() -> result.getError().toString())
+        .returns(false, Result::hasErrors)
+        .withFailMessage(null)
+        .returns(true, Result::getResult);
+
+    // Verify the sharding configuration was set correctly
+    Result<WeaviateClass> classResult = client.schema().classGetter()
+        .withClassName(collectionName)
+        .run();
+
+    assertThat(classResult).isNotNull()
+        .returns(false, Result::hasErrors);
+
+    WeaviateClass createdClass = classResult.getResult();
+    assertThat(createdClass).isNotNull()
+        .extracting(WeaviateClass::getShardingConfig).isNotNull()
+        .returns(virtualPerPhysical, ShardingConfig::getVirtualPerPhysical)
+        .returns(desiredCount, ShardingConfig::getDesiredCount)
+        .returns(desiredVirtualCount, ShardingConfig::getDesiredVirtualCount);
+  }
+
+  private void updateArticleConfiguration(String collectionName) {
+    // START UpdateCollection
+    // Get existing collection
+    Result<WeaviateClass> existingResult = client.schema().classGetter()
+        .withClassName(collectionName)
+        .run();
+    
+    assertThat(existingResult).isNotNull()
+        .returns(false, Result::hasErrors);
+    
+    WeaviateClass existingClass = existingResult.getResult();
+
+    // Create updated configurations
+    InvertedIndexConfig invertedConfig = InvertedIndexConfig.builder()
+        .bm25(BM25Config.builder().k1(1.5f).build())
+        .build();
+
+    VectorIndexConfig vectorConfig = VectorIndexConfig.builder()
+        .filterStrategy(VectorIndexConfig.FilterStrategy.ACORN)
+        .build();
+
+    ReplicationConfig replicationConfig = ReplicationConfig.builder()
+        .deletionStrategy(ReplicationConfig.DeletionStrategy.NO_AUTOMATED_RESOLUTION)
+        .build();
+
+    // Update collection with new configurations - preserve critical existing configs
+    WeaviateClass updatedClass = WeaviateClass.builder()
+        .className(collectionName)
+        .shardingConfig(existingClass.getShardingConfig())     // Preserve sharding (immutable)
+        .invertedIndexConfig(invertedConfig)                   // Update
+        .vectorIndexConfig(vectorConfig)                       // Update
+        .replicationConfig(replicationConfig)                  // Update
+        .build();
+
+    Result<Boolean> updateResult = client.schema().classUpdater()
+        .withClass(updatedClass)
+        .run();
+    // END UpdateCollection
+
+    // Debug: Print error if update fails
+    if (updateResult.hasErrors()) {
+        System.out.println("Update failed with error: " + updateResult.getError());
+    }
+
+    assertThat(updateResult).isNotNull()
+        .withFailMessage(() -> "Update failed: " + updateResult.getError())
+        .returns(false, Result::hasErrors)
+        .returns(true, Result::getResult);
+
+    // Verify updates
+    Result<WeaviateClass> verifyResult = client.schema().classGetter()
+        .withClassName(collectionName)
+        .run();
+    
+    assertThat(verifyResult).isNotNull()
+        .returns(false, Result::hasErrors);
+    
+    WeaviateClass verifyClass = verifyResult.getResult();
+
+    assertThat(verifyClass.getInvertedIndexConfig().getBm25().getK1()).isEqualTo(1.5f);
+    assertThat(verifyClass.getVectorIndexConfig().getFilterStrategy()).isEqualTo(VectorIndexConfig.FilterStrategy.ACORN);
+    assertThat(verifyClass.getReplicationConfig().getDeletionStrategy()).isEqualTo(ReplicationConfig.DeletionStrategy.NO_AUTOMATED_RESOLUTION);
+  }
+}
+ 
\ No newline at end of file
diff --git a/_includes/code/howto/java/src/test/java/io/weaviate/docs/model_providers/UsageWeaviateTextEmbeddingsArcticEmbedLV20 2.java b/_includes/code/howto/java/src/test/java/io/weaviate/docs/model_providers/UsageWeaviateTextEmbeddingsArcticEmbedLV20 2.java
new file mode 100644
index 0000000000..9c8fde4374
--- /dev/null
+++ b/_includes/code/howto/java/src/test/java/io/weaviate/docs/model_providers/UsageWeaviateTextEmbeddingsArcticEmbedLV20 2.java	
@@ -0,0 +1,58 @@
+package io.weaviate.docs.model_providers;
+
+import io.weaviate.client.Config;
+import io.weaviate.client.WeaviateAuthClient;
+import io.weaviate.client.WeaviateClient;
+import io.weaviate.client.base.Result;
+import io.weaviate.client.v1.schema.model.WeaviateClass;
+
+import java.util.HashMap;
+import java.util.Map;
+
+// Set these environment variables
+// WEAVIATE_HOSTNAME     Your Weaviate instance hostname
+// WEAVIATE_API_KEY      Your Weaviate instance API key
+// <PROVIDER>_APIKEY    Your Provider API key
+
+public class UsageWeaviateTextEmbeddingsArcticEmbedLV20 {
+  public static void main(String[] args) throws Exception {
+
+    String host = System.getenv("WEAVIATE_HOSTNAME");
+    String apiKey = System.getenv("WEAVIATE_API_KEY");
+
+    Config config = new Config("https", host);
+
+    WeaviateClient client = WeaviateAuthClient.apiKey(config, apiKey);
+
+    client.schema().classDeleter().withClassName("DemoCollection").run();
+
+    // START BasicVectorizerWeaviate  // START VectorizerWeaviateCustomModel  // START SnowflakeArcticEmbedLV20
+    Map<String, Object> text2vecWeaviate = new HashMap<>();
+    Map<String, Object> text2vecWeaviateSettings = new HashMap<>();
+
+    text2vecWeaviateSettings.put("properties", new String[]{"title"});
+    // END BasicVectorizerWeaviate  // START VectorizerWeaviateCustomModel  // START SnowflakeArcticEmbedLV20
+    text2vecWeaviateSettings.put("model", new String[]{"Snowflake/snowflake-arctic-embed-l-v2.0"});
+    // END BasicVectorizerWeaviate  // END VectorizerWeaviateCustomModel  // START SnowflakeArcticEmbedLV20
+    text2vecWeaviateSettings.put("dimensions", new Integer[]{1024});  // 1024, 256
+    text2vecWeaviateSettings.put("base_url", new String[]{"<custom_weaviate_url>"});
+    // START BasicVectorizerWeaviate // START VectorizerWeaviateCustomModel  // START SnowflakeArcticEmbedLV20
+    text2vecWeaviate.put("text2vec-weaviate", text2vecWeaviateSettings);
+
+    // Define the vector configurations
+    Map<String, WeaviateClass.VectorConfig> vectorConfig = new HashMap<>();
+    vectorConfig.put("title_vector", WeaviateClass.VectorConfig.builder()
+      .vectorIndexType("hnsw")
+      .vectorizer(text2vecWeaviate)
+      .build());
+
+    // Create the collection "DemoCollection"
+    WeaviateClass clazz = WeaviateClass.builder()
+      .className("DemoCollection")
+      .vectorConfig(vectorConfig)
+      .build();
+
+    Result<Boolean> result = client.schema().classCreator().withClass(clazz).run();
+    // END BasicVectorizerWeaviate  // END VectorizerWeaviateCustomModel  // END SnowflakeArcticEmbedLV20
+  }
+}
diff --git a/_includes/code/howto/search.bm25.gql 2.py b/_includes/code/howto/search.bm25.gql 2.py
new file mode 100644
index 0000000000..094d5eadbf
--- /dev/null
+++ b/_includes/code/howto/search.bm25.gql 2.py	
@@ -0,0 +1,75 @@
+# Howto: Hybrid search - Python examples
+
+# ================================
+# ===== INSTANTIATION-COMMON =====
+# ================================
+
+import weaviate
+from weaviate.classes.init import Auth
+import os
+
+# Best practice: store your credentials in environment variables
+weaviate_url = os.environ["WEAVIATE_URL"]
+weaviate_api_key = os.environ["WEAVIATE_API_KEY"]
+openai_api_key = os.environ["OPENAI_APIKEY"]
+
+client = weaviate.connect_to_weaviate_cloud(
+    cluster_url=weaviate_url,
+    auth_credentials=Auth.api_key(weaviate_api_key),
+    headers={
+        "X-OpenAI-Api-Key": openai_api_key,
+    },
+)
+
+gql_query = """
+# START BM25OperatorOrWithMin
+{
+  Get {
+    JeopardyQuestion(
+      limit: 3
+      bm25: {
+        query: "Australian mammal cute"
+        # highlight-start
+        searchOperator: {
+          operator: Or,
+          minimumOrTokensMatch: 2
+        }
+        # highlight-end
+      }
+    ) {
+      question
+      answer
+    }
+  }
+}
+# END BM25OperatorOrWithMin
+"""
+
+gqlresponse = client.graphql_raw_query(gql_query)
+
+gql_query = """
+# START BM25OperatorAnd
+{
+  Get {
+    JeopardyQuestion(
+      limit: 3
+      bm25: {
+        query: "Australian mammal cute"
+        # highlight-start
+        searchOperator: {
+          operator: And,
+        }
+        # highlight-end
+      }
+    ) {
+      question
+      answer
+    }
+  }
+}
+# END BM25OperatorAnd
+"""
+
+gqlresponse = client.graphql_raw_query(gql_query)
+
+client.close()
diff --git a/_includes/code/howto/search.hybrid.gql 2.py b/_includes/code/howto/search.hybrid.gql 2.py
new file mode 100644
index 0000000000..f5b9ab445e
--- /dev/null
+++ b/_includes/code/howto/search.hybrid.gql 2.py	
@@ -0,0 +1,75 @@
+# Howto: Hybrid search - Python examples
+
+# ================================
+# ===== INSTANTIATION-COMMON =====
+# ================================
+
+import weaviate
+from weaviate.classes.init import Auth
+import os
+
+# Best practice: store your credentials in environment variables
+weaviate_url = os.environ["WEAVIATE_URL"]
+weaviate_api_key = os.environ["WEAVIATE_API_KEY"]
+openai_api_key = os.environ["OPENAI_APIKEY"]
+
+client = weaviate.connect_to_weaviate_cloud(
+    cluster_url=weaviate_url,
+    auth_credentials=Auth.api_key(weaviate_api_key),
+    headers={
+        "X-OpenAI-Api-Key": openai_api_key,
+    },
+)
+
+gql_query = """
+# START HybridWithBM25OperatorOrWithMin
+{
+  Get {
+    JeopardyQuestion(
+      limit: 3
+      hybrid: {
+        query: "Australian mammal cute"
+        # highlight-start
+        bm25SearchOperator: {
+          operator: Or,
+          minimumOrTokensMatch: 2
+        }
+        # highlight-end
+      }
+    ) {
+      question
+      answer
+    }
+  }
+}
+# END HybridWithBM25OperatorOrWithMin
+"""
+
+gqlresponse = client.graphql_raw_query(gql_query)
+
+gql_query = """
+# START HybridWithBM25OperatorAnd
+{
+  Get {
+    JeopardyQuestion(
+      limit: 3
+      hybrid: {
+        query: "Australian mammal cute"
+        # highlight-start
+        bm25SearchOperator: {
+          operator: And,
+        }
+        # highlight-end
+      }
+    ) {
+      question
+      answer
+    }
+  }
+}
+# END HybridWithBM25OperatorAnd
+"""
+
+gqlresponse = client.graphql_raw_query(gql_query)
+
+client.close()
diff --git a/_includes/code/python/best-practices.python.ai 2.py b/_includes/code/python/best-practices.python.ai 2.py
new file mode 100644
index 0000000000..e57866f345
--- /dev/null
+++ b/_includes/code/python/best-practices.python.ai 2.py	
@@ -0,0 +1,617 @@
+"""
+===============================================
+Essential Weaviate Python Client Examples (v4.x)
+===============================================
+
+This document contains key examples for using the Weaviate Python client (v4.x).
+
+The page also includes comments that indicate to the user where they may find more information.
+
+If the provided examples are not sufficient as in-context example for code generation,
+indicate to the user in comments where they may find more information.
+"""
+
+# ========================
+# 1. CLIENT INSTANTIATION
+# ========================
+# For more information, see the Python Client library page: https://www.weaviate.io/developers/weaviate/client-libraries/python
+
+"""
+Connecting to Weaviate instances (local, cloud, custom)
+"""
+
+import weaviate
+from weaviate.classes.init import Auth
+import os
+
+# Connect to local instance
+client = weaviate.connect_to_local()
+
+# Connect with API key authentication and headers
+client = weaviate.connect_to_local(
+    headers={
+        "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]  # Add inference API keys as needed
+    }
+)
+
+# Connect to Weaviate Cloud
+client = weaviate.connect_to_weaviate_cloud(
+    cluster_url=os.environ["WEAVIATE_URL"],
+    auth_credentials=Auth.api_key(os.environ["WEAVIATE_API_KEY"]),
+    headers={
+        "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]  # Add inference API keys as needed
+    }
+)
+
+# Custom connection (more control)
+client = weaviate.connect_to_custom(
+    http_host="localhost",
+    http_port=8080,
+    http_secure=False,
+    grpc_host="localhost",
+    grpc_port=50051,
+    grpc_secure=False,
+    headers={
+        "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]  # Add inference API keys as needed
+    }
+)
+
+# Using context manager for automatic connection closing
+with weaviate.connect_to_local() as client:
+    # Client operations go here...
+    pass  # Connection is closed automatically
+
+# Using try/finally for connection handling
+client = weaviate.connect_to_local()
+try:
+    # Client operations go here...
+    pass
+finally:
+    client.close()  # Ensure connection is closed
+
+
+# =========================
+# 2. COLLECTION MANAGEMENT
+# =========================
+# For more information, see the How-to Manage Collections page: https://weaviate.io/developers/weaviate/manage-data/collections
+
+"""
+Creating, configuring, and managing collections
+"""
+
+from weaviate.classes.config import Configure, Property, DataType
+
+# Basic collection creation
+client.collections.create("Article")
+
+# Collection with properties
+client.collections.create(
+    "Article",
+    properties=[
+        Property(name="title", data_type=DataType.TEXT),
+        Property(name="body", data_type=DataType.TEXT),
+    ]
+)
+
+# Collection with vectorizer
+client.collections.create(
+    "Article",
+    vectorizer_config=Configure.Vectorizer.text2vec_openai(),
+    properties=[
+        Property(name="title", data_type=DataType.TEXT),
+        Property(name="body", data_type=DataType.TEXT),
+        Property(name="categories", data_type=DataType.TEXT_ARRAY),
+        Property(name="is_published", data_type=DataType.BOOL),
+        Property(name="word_count", data_type=DataType.INT),
+    ]
+)
+
+# Collection with named vectors
+client.collections.create(
+    "ArticleNV",
+    vectorizer_config=[
+        Configure.NamedVectors.text2vec_openai(
+            name="title",
+            source_properties=["title"]
+        ),
+        Configure.NamedVectors.text2vec_openai(
+            name="title_body",
+            source_properties=["title", "body"]
+        ),
+        # For user-provided vectors
+        Configure.NamedVectors.none(name="custom_vector")
+    ],
+    properties=[
+        Property(name="title", data_type=DataType.TEXT),
+        Property(name="body", data_type=DataType.TEXT),
+    ]
+)
+
+# Collection with generative module
+client.collections.create(
+    "Article",
+    vectorizer_config=Configure.Vectorizer.text2vec_openai(),
+    generative_config=Configure.Generative.openai(
+        model="gpt-4"  # Optional specific model
+    ),
+    properties=[
+        Property(name="title", data_type=DataType.TEXT),
+        Property(name="body", data_type=DataType.TEXT),
+    ]
+)
+
+# Collection with references (cross-references)
+client.collections.create(
+    "Author",
+    properties=[
+        Property(name="name", data_type=DataType.TEXT),
+        Property(name="birthday", data_type=DataType.DATE),
+        Property(name="height_m", data_type=DataType.NUMBER),
+    ],
+    references=[
+        weaviate.classes.config.ReferenceProperty(
+            name="wroteArticle",
+            target_collection="Article"
+        )
+    ]
+)
+
+# Get a collection
+collection = client.collections.get("Article")
+
+# Check if collection exists
+exists = client.collections.exists("Article")
+
+# List all collections
+collections = client.collections.list_all()
+
+# Update a collection
+from weaviate.classes.config import Reconfigure
+
+collection = client.collections.get("Article")
+collection.config.update(
+    inverted_index_config=Reconfigure.inverted_index(
+        bm25_k1=1.5
+    )
+)
+
+# Add a property to an existing collection
+collection.config.add_property(
+    Property(name="publication_date", data_type=DataType.DATE)
+)
+
+# Delete a collection
+client.collections.delete("Article")
+
+
+# ========================
+# 3. DATA OPERATIONS
+# ========================
+# For more information, see the How-to Manage Data pages: https://weaviate.io/developers/weaviate/manage-data
+
+"""
+Creating, updating, and retrieving objects
+"""
+
+# Insert a single object
+collection = client.collections.get("Article")
+uuid = collection.data.insert({
+    "title": "My first article",
+    "body": "This is the body of my first article.",
+})
+
+# Insert with a specific UUID
+from weaviate.util import generate_uuid5
+
+properties = {
+    "title": "My second article",
+    "body": "This is the body of my second article."
+}
+uuid = collection.data.insert(
+    properties=properties,
+    uuid=generate_uuid5(properties)  # Generate a deterministic ID
+)
+
+# Insert with a custom vector
+collection.data.insert(
+    properties={
+        "title": "Article with custom vector",
+        "body": "This article has a custom vector."
+    },
+    vector=[0.1, 0.2, 0.3, 0.4, 0.5]  # Your vector values
+)
+
+# Insert with named vectors
+collection = client.collections.get("ArticleNV")
+collection.data.insert(
+    properties={
+        "title": "Named vector article",
+        "body": "This article uses named vectors."
+    },
+    vector={
+        "title": [0.1, 0.2, 0.3, 0.4, 0.5],  # Vector for title
+        "title_body": [0.5, 0.4, 0.3, 0.2, 0.1]  # Vector for title_body
+    }
+)
+
+# Fetch an object by ID
+obj = collection.query.fetch_object_by_id(uuid)
+print(obj.properties)
+
+# Fetch objects with vectors
+obj = collection.query.fetch_object_by_id(uuid, include_vector=True)
+print(obj.vector)  # Access the vector
+
+# Update an object
+collection.data.update(
+    uuid=uuid,
+    properties={
+        "title": "Updated title"
+    }
+)
+
+# Replace an object (replaces all properties)
+collection.data.replace(
+    uuid=uuid,
+    properties={
+        "title": "Completely new title",
+        "body": "Completely new body"
+    }
+)
+
+# Delete an object
+collection.data.delete_by_id(uuid)
+
+# Working with references
+article_uuid = collection.data.insert({"title": "Referenced Article"})
+author_collection = client.collections.get("Author")
+author_uuid = author_collection.data.insert({"name": "John Doe"})
+
+# Add a reference
+author_collection.data.reference_add(
+    from_uuid=author_uuid,
+    from_property="wroteArticle",
+    to=article_uuid
+)
+
+
+# ========================
+# 4. BATCH OPERATIONS
+# ========================
+# For more information, see the How-to batch import data page: https://weaviate.io/developers/weaviate/manage-data/import
+# And the Python Client library page: https://weaviate.io/developers/weaviate/client-libraries/python#batch-imports
+
+"""
+Batch import for better performance
+"""
+
+# Fixed size batch (Recommended option)
+collection = client.collections.get("Article")
+with collection.batch.fixed_size(batch_size=50) as batch:
+    for i in range(100):
+        batch.add_object(
+            properties={
+                "title": f"Article {i}",
+                "body": f"This is article {i}"
+            }
+        )
+
+# Dynamic batch (adapts to Weaviate load)
+with collection.batch.dynamic() as batch:
+    for i in range(100):
+        batch.add_object(
+            properties={
+                "title": f"Article {i}",
+                "body": f"This is article {i}"
+            }
+        )
+
+# Rate limited batch
+with collection.batch.rate_limit(requests_per_minute=600) as batch:
+    for i in range(100):
+        batch.add_object(
+            properties={
+                "title": f"Article {i}",
+                "body": f"This is article {i}"
+            }
+        )
+
+# Batch with error handling
+with collection.batch.fixed_size(batch_size=50) as batch:
+    for i in range(100):
+        batch.add_object(
+            properties={
+                "title": f"Article {i}",
+                "body": f"This is article {i}"
+            }
+        )
+        if batch.number_errors > 10:
+            print("Too many errors, stopping batch")
+            break
+
+# Get failed objects after batch completes
+failed_objects = collection.batch.failed_objects
+if failed_objects:
+    print(f"Number of failed objects: {len(failed_objects)}")
+
+# Insert many items at once
+from weaviate.classes.data import DataObject
+
+data_objects = [
+    DataObject(
+        properties={"title": f"Article {i}", "body": f"Body {i}"},
+        vector=[0.1] * 5  # Optional vector
+    )
+    for i in range(10)
+]
+
+collection.data.insert_many(data_objects)
+
+
+# ========================
+# 5. SEARCH OPERATIONS
+# ========================
+# For more information, see the How-to search pages: https://weaviate.io/developers/weaviate/search
+
+"""
+Various search methods (semantic, keyword, hybrid)
+"""
+
+# Basic search (fetch objects)
+collection = client.collections.get("Article")
+response = collection.query.fetch_objects(
+    limit=10,
+    return_properties=["title", "body"]
+)
+
+for obj in response.objects:
+    print(obj.properties)
+
+# Semantic search with near_text
+response = collection.query.near_text(
+    query="artificial intelligence applications",
+    limit=5
+)
+
+# Search based on vector
+vector = [0.1, 0.2, 0.3, 0.4, 0.5]  # Your vector here
+response = collection.query.near_vector(
+    near_vector=vector,
+    limit=5
+)
+
+# Search based on existing object
+response = collection.query.near_object(
+    near_object="36ddd591-2dee-4e7e-a3cc-eb86d30a4303",  # UUID of reference object
+    limit=5
+)
+
+# BM25 keyword search
+response = collection.query.bm25(
+    query="artificial intelligence",
+    query_properties=["title", "body"],
+    limit=5
+)
+
+# Hybrid search (combines semantic and keyword)
+from weaviate.classes.query import HybridFusion
+
+response = collection.query.hybrid(
+    query="artificial intelligence",
+    alpha=0.5,  # Balance between keyword and vector search
+    fusion_type=HybridFusion.RELATIVE_SCORE,
+    limit=5
+)
+
+# Search with filters
+from weaviate.classes.query import Filter
+
+response = collection.query.near_text(
+    query="artificial intelligence",
+    filters=Filter.by_property("title").like("*AI*"),
+    limit=5
+)
+
+# Complex filtering
+response = collection.query.near_text(
+    query="artificial intelligence",
+    filters=(
+        Filter.by_property("title").like("*AI*") &
+        (Filter.by_property("body").like("*research*") |
+         Filter.by_property("body").like("*innovation*"))
+    ),
+    limit=5
+)
+
+# Search with groupBy
+from weaviate.classes.query import GroupBy
+
+response = collection.query.near_text(
+    query="artificial intelligence",
+    group_by=GroupBy(
+        prop="category",
+        objects_per_group=2,
+        number_of_groups=3
+    ),
+    limit=10
+)
+
+# For grouped results
+for group_name, group_data in response.groups.items():
+    print(f"Group: {group_name}, Objects: {group_data.number_of_objects}")
+    for obj in group_data.objects:
+        print(obj.properties)
+
+# Getting metadata with search
+from weaviate.classes.query import MetadataQuery
+
+response = collection.query.near_text(
+    query="artificial intelligence",
+    return_metadata=MetadataQuery(
+        distance=True,  # Vector distance
+        score=True,     # Relevance score
+        creation_time=True  # When the object was created
+    ),
+    limit=5
+)
+
+for obj in response.objects:
+    print(obj.properties)
+    print(f"Distance: {obj.metadata.distance}")
+    print(f"Score: {obj.metadata.score}")
+    print(f"Created: {obj.metadata.creation_time}")
+
+
+# ===============================
+# 6. GENERATIVE CAPABILITIES
+# ===============================
+# For more information, see the How-to generative search page: https://weaviate.io/developers/weaviate/search/generative
+
+"""
+Using generative models with Weaviate
+"""
+
+# Basic generation
+collection = client.collections.get("Article")
+response = collection.generate.near_text(
+    query="artificial intelligence",
+    single_prompt="Summarize this article in one sentence: {title} - {body}",
+    limit=3
+)
+
+for obj in response.objects:
+    print(obj.properties)
+    print(f"Generated: {obj.generative.text}")
+
+# Grouped generation
+response = collection.generate.near_text(
+    query="artificial intelligence",
+    grouped_task="Compare and contrast these AI articles",
+    limit=3
+)
+
+print(f"Grouped response: {response.generative.text}")
+
+# Generation with custom provider
+from weaviate.classes.generate import GenerativeConfig
+
+response = collection.generate.near_text(
+    query="artificial intelligence",
+    single_prompt="Summarize this article: {title}",
+    generative_provider=GenerativeConfig.openai(
+        model="gpt-4",
+        temperature=0.7
+    ),
+    limit=3
+)
+
+# Generation with parameters
+from weaviate.classes.generate import GenerativeParameters
+
+prompt = GenerativeParameters.single_prompt(
+    prompt="Summarize this article: {title}",
+    metadata=True,  # Include metadata in response
+    debug=True      # Include debug info
+)
+
+response = collection.generate.near_text(
+    query="artificial intelligence",
+    single_prompt=prompt,
+    limit=3
+)
+
+for obj in response.objects:
+    print(f"Generated: {obj.generative.text}")
+    print(f"Metadata: {obj.generative.metadata}")
+    print(f"Debug: {obj.generative.debug}")
+
+
+# =================================
+# 7. MULTI-TENANCY OPERATIONS
+# =================================
+# For more information, see the How-to multi-tenancy page: https://weaviate.io/developers/weaviate/manage-data/multi-tenancy
+# And the manage tenant data and temperatures page: https://weaviate.io/developers/weaviate/manage-data/tenant-states
+
+"""
+Working with multi-tenant collections
+"""
+
+# Create a multi-tenant collection
+client.collections.create(
+    "MultiTenantArticle",
+    multi_tenancy_config=Configure.multi_tenancy(enabled=True),
+    properties=[
+        Property(name="title", data_type=DataType.TEXT),
+        Property(name="body", data_type=DataType.TEXT),
+    ],
+    vectorizer_config=Configure.Vectorizer.text2vec_openai()
+)
+
+mt_collection = client.collections.get("MultiTenantArticle")
+
+# Add tenants
+from weaviate.classes.tenants import Tenant
+
+mt_collection.tenants.create(
+    tenants=[
+        Tenant(name="tenant1"),
+        Tenant(name="tenant2")
+    ]
+)
+
+# Get all tenants
+tenants = mt_collection.tenants.get()
+
+# Get specific tenant
+tenant = mt_collection.tenants.get_by_name("tenant1")
+
+# Use a specific tenant
+tenant1_collection = mt_collection.with_tenant("tenant1")
+
+# Add data to a specific tenant
+tenant1_collection.data.insert({
+    "title": "Tenant 1 Article",
+    "body": "This belongs to tenant 1"
+})
+
+# Search within a specific tenant
+response = tenant1_collection.query.near_text(
+    query="article",
+    limit=5
+)
+
+
+# ========================
+# 8. ITERATING OVER DATA
+# ========================
+# For more information, see the iterator section of the Python Client library page: https://weaviate.io/developers/weaviate/client-libraries/python#collection-iterator-cursor-api
+
+"""
+Iterating over large datasets
+"""
+
+# Basic iteration
+collection = client.collections.get("Article")
+for article in collection.iterator():
+    print(article.properties)
+
+# Iteration with specific properties
+for article in collection.iterator(return_properties=["title"]):
+    print(article.properties["title"])
+
+# Iteration with metadata
+from weaviate.classes.query import MetadataQuery
+
+for article in collection.iterator(
+    return_metadata=MetadataQuery(creation_time=True)
+):
+    print(article.properties)
+    print(article.metadata.creation_time)
+
+
+# ========================
+# 9. CLEANUP
+# ========================
+
+# Don't forget to close the client when done
+client.close()
diff --git a/_includes/code/python/howto.configure.rbac.oidc.users 2.py b/_includes/code/python/howto.configure.rbac.oidc.users 2.py
new file mode 100644
index 0000000000..03fcf75a2e
--- /dev/null
+++ b/_includes/code/python/howto.configure.rbac.oidc.users 2.py	
@@ -0,0 +1,51 @@
+# TODO[g-despot]: OIDC testing not yet implemented
+from weaviate.classes.rbac import Permissions
+
+# START AdminClient
+import weaviate
+from weaviate.classes.init import Auth
+
+# Connect to Weaviate as root user
+client = weaviate.connect_to_local(
+    # END AdminClient
+    # Use custom port defined in tests/docker-compose-rbac.yml (without showing the user)
+    port=8580,
+    grpc_port=50551,
+    # START AdminClient
+    auth_credentials=Auth.api_key("root-user-key"),
+)
+# END AdminClient
+
+from weaviate.classes.rbac import Permissions
+
+permissions = [
+    Permissions.collections(
+        collection="TargetCollection*", read_config=True, create_collection=True
+    ),
+    Permissions.data(collection="TargetCollection*", read=True, create=True),
+]
+
+client.roles.delete(role_name="testRole")
+client.roles.create(role_name="testRole", permissions=permissions)
+
+# START AssignOidcUserRole
+client.users.oidc.assign_roles(user_id="custom-user", role_names=["testRole", "viewer"])
+# END AssignOidcUserRole
+assert "testRole" in client.users.oidc.get_assigned_roles("custom-user")
+assert "viewer" in client.users.oidc.get_assigned_roles("custom-user")
+
+# START ListOidcUserRoles
+user_roles = client.users.oidc.get_assigned_roles("custom-user")
+
+for role in user_roles:
+    print(role)
+# END ListOidcUserRoles
+assert "testRole" in user_roles
+assert "viewer" in user_roles
+
+# START RevokeOidcUserRoles
+client.users.oidc.revoke_roles(user_id="custom-user", role_names="testRole")
+# END RevokeOidcUserRoles
+assert "testRole" not in client.users.oidc.get_assigned_roles("custom-user")
+
+client.close()
diff --git a/_includes/code/python/howto.configure.rbac.users 2.py b/_includes/code/python/howto.configure.rbac.users 2.py
new file mode 100644
index 0000000000..8c4d4d6b11
--- /dev/null
+++ b/_includes/code/python/howto.configure.rbac.users 2.py	
@@ -0,0 +1,75 @@
+from weaviate.classes.rbac import Permissions
+
+# START AdminClient
+import weaviate
+from weaviate.classes.init import Auth
+
+# Connect to Weaviate as root user
+client = weaviate.connect_to_local(
+    # END AdminClient
+    # Use custom port defined in tests/docker-compose-rbac.yml (without showing the user)
+    port=8580,
+    grpc_port=50551,
+    # START AdminClient
+    auth_credentials=Auth.api_key("root-user-key"),
+)
+# END AdminClient
+
+user_api_key = client.users.db.delete(user_id="custom-user")
+
+# START CreateUser
+user_api_key = client.users.db.create(user_id="custom-user")
+print(user_api_key)
+# END CreateUser
+assert len(user_api_key) > 0
+
+# START RotateApiKey
+new_api_key = client.users.db.rotate_key(user_id="custom-user")
+print(new_api_key)
+# END RotateApiKey
+assert len(new_api_key) > 0 and new_api_key != user_api_key
+
+from weaviate.classes.rbac import Permissions
+
+permissions = [
+    Permissions.collections(
+        collection="TargetCollection*", read_config=True, create_collection=True
+    ),
+    Permissions.data(collection="TargetCollection*", read=True, create=True),
+]
+
+client.roles.delete(role_name="testRole")
+client.roles.create(role_name="testRole", permissions=permissions)
+
+# START AssignRole
+client.users.db.assign_roles(user_id="custom-user", role_names=["testRole", "viewer"])
+# END AssignRole
+assert "testRole" in client.users.db.get_assigned_roles("custom-user")
+assert "viewer" in client.users.db.get_assigned_roles("custom-user")
+
+# START ListAllUsers
+print(client.users.db.list_all())
+# END ListAllUsers
+
+# START ListUserRoles
+user_roles = client.users.db.get_assigned_roles("custom-user")
+
+for role in user_roles:
+    print(role)
+# END ListUserRoles
+assert "testRole" in user_roles
+assert "viewer" in user_roles
+
+# START RevokeRoles
+client.users.db.revoke_roles(user_id="custom-user", role_names="testRole")
+# END RevokeRoles
+assert "testRole" not in client.users.db.get_assigned_roles("custom-user")
+
+# START DeleteUser
+client.users.db.delete(user_id="custom-user")
+# END DeleteUser
+assert all(
+    user.user_id != "custom-user" for user in client.users.db.list_all()
+), "custom-user not deleted"
+
+client.close()
diff --git a/_includes/code/typescript/howto.configure.rbac.oidc.users 2.ts b/_includes/code/typescript/howto.configure.rbac.oidc.users 2.ts
new file mode 100644
index 0000000000..2dbfc5338c
--- /dev/null
+++ b/_includes/code/typescript/howto.configure.rbac.oidc.users 2.ts	
@@ -0,0 +1,68 @@
+// TODO[g-despot]: OIDC testing not yet implemented
+import assert from 'assert'
+
+// START AdminClient
+import weaviate, { type WeaviateClient } from 'weaviate-client'
+
+// Connect to Weaviate as root user
+const client: WeaviateClient = await weaviate.connectToLocal({
+    // END AdminClient
+    // Use custom port defined in tests/docker-compose-rbac.yml (without showing the user)
+        port: 8580,
+        grpcPort: 50551, 
+        // START AdminClient
+        authCredentials: new weaviate.ApiKey("root-user-key")
+})
+// END AdminClient
+
+const { permissions } = weaviate
+
+const collectionPermissions = [
+    permissions.collections({
+        collection: "TargetCollection*", 
+        read_config: true, 
+        create_collection: true
+    }),
+    permissions.data({
+        collection: "TargetCollection*", 
+        read: true, 
+        create: true}
+        ),
+]
+
+await client.roles.delete("testRole")
+await client.roles.create("testRole", collectionPermissions)
+
+// START AssignOidcUserRole
+await client.users.oidc.assignRoles(["testRole", "viewer"], "custom-user",)
+// END AssignOidcUserRole
+assert.equal((Object.keys(await client.users.getAssignedRoles("custom-user")).some(
+    role => role == "testRole"
+)), true)
+
+assert.equal((Object.keys(await client.users.getAssignedRoles("custom-user")).some(
+    role => role == "viewer"
+)), true)
+
+// START ListOidcUserRoles
+const userRoles = await client.users.oidc.getAssignedRoles("custom-user")
+
+for (const [role, value] of Object.entries(userRoles)) {
+    console.log(role)
+}
+// END ListOidcUserRoles
+assert.equal((Object.keys(await client.users.db.getAssignedRoles("custom-user")).some(
+    role => role == "testRole"
+)), true)
+assert.equal((Object.keys(await client.users.db.getAssignedRoles("custom-user")).some(
+    role => role == "viewer"
+)), true)
+
+// START RevokeOidcUserRoles
+await client.users.oidc.revokeRoles("testRole","custom-user")
+// END RevokeOidcUserRoles
+assert.equal((Object.keys(await client.users.db.getAssignedRoles("custom-user")).some(
+    role => role == "testRole"
+)), false)
+
+client.close()
diff --git a/_includes/code/typescript/howto.configure.rbac.permissions 2.ts b/_includes/code/typescript/howto.configure.rbac.permissions 2.ts
new file mode 100644
index 0000000000..8a55a60c6c
--- /dev/null
+++ b/_includes/code/typescript/howto.configure.rbac.permissions 2.ts	
@@ -0,0 +1,157 @@
+import weaviate, { WeaviateClient } from 'weaviate-client'
+import assert from 'assert'
+
+const client: WeaviateClient = await weaviate.connectToLocal({
+        // Use custom port defined in tests/docker-compose-rbac.yml (without showing the user)
+        port: 8580,
+        grpcPort: 50551, 
+        authCredentials: new weaviate.ApiKey("user-a-key")
+})
+
+
+async function resetUser(user: string, client: WeaviateClient) {
+    // Clean slate
+    const currentRoles = await client.users.getAssignedRoles(user)  // check if user exists
+    for await (const [role, value] of Object.entries(currentRoles)) {
+        await client.users.revokeRoles(role, user)  // revoke all roles
+    }
+}
+// =================================================================
+// =============== EXAMPLE: READ + WRITE PERMISSIONS
+// =================================================================
+
+// Clean slate
+resetUser("user-b", client)
+await client.roles.delete("rw_role")  // delete if exists
+
+// START ReadWritePermissionDefinition // START MTPermissionsExample
+const { permissions } = weaviate
+// END ReadWritePermissionDefinition // END MTPermissionsExample
+
+// START ReadWritePermissionDefinition
+
+// Define permissions (example confers read+write rights to collections starting with "TargetCollection")
+const allPermissions = [
+    // Collection level permissions
+    permissions.collections({
+        collection: "TargetCollection*",
+        create_collection:  true,  // Allow creating new collections
+        read_config: true,  // Allow reading collection info/metadata
+        update_config: true,  // Allow updating collection configuration, i.e. update schema properties, when inserting data with new properties
+        delete_collection: true,  // Allow deleting collections
+   } ),
+    // Collection data level permissions
+    permissions.data({
+        collection: "TargetCollection*",
+        create: true,  // Allow data inserts
+        read: true,  // Allow query and fetch operations
+        update: true,  // Allow data updates
+        delete: false,  // Allow data deletes
+    }),
+    permissions.backup({
+        collection:"TargetCollection*", 
+        manage: true
+    }),
+    permissions.nodes.verbose({
+        collection: "TargetCollection*", 
+        read: true
+    }),
+    permissions.cluster({
+        read: true
+    }),
+]
+
+// Create a new role
+await client.roles.create("rw_role", allPermissions)
+// END ReadWritePermissionDefinition
+
+// START ReadWritePermissionAssignment
+// Assign the role to a user
+await client.users.assignRoles(["rw_role"], "user-b",)
+// END ReadWritePermissionAssignment
+
+// ===== TEST ===== basic checks to see if the role was created
+const userPermissions = await client.users.getAssignedRoles("user-b")
+
+assert.equal("rw_role", Object.keys(userPermissions))
+assert.equal(userPermissions["rw_role"].collectionsPermissions[0].collection, "TargetCollection*")
+assert.equal(userPermissions["rw_role"].name, "rw_role")
+
+// =================================================================
+// =============== EXAMPLE: VIEWER PERMISSIONS
+// =================================================================
+
+// Clean slate
+await client.roles.delete("viewer_role")  // delete if exists
+
+// START ViewerPermissionDefinition
+
+// Define permissions (example confers viewer rights to collections starting with "TargetCollection")
+const newPermissions = [
+    permissions.collections({
+        collection: "TargetCollection*",
+        read_config: true,
+    }),
+    permissions.data({
+        collection: "TargetCollection*", 
+        read: true}),
+]
+
+// Create a new role
+await client.roles.create("viewer_role", newPermissions)
+// END ViewerPermissionDefinition
+
+// START ViewerPermissionAssignment
+// Assign the role to a user
+await client.users.assignRoles("user-b", "viewer_role")
+// END ViewerPermissionAssignment
+
+// =================================================================
+// =============== EXAMPLE: VIEWER PERMISSIONS
+// =================================================================
+
+// Clean slate
+client.roles.delete("tenant_manager")
+
+// START MTPermissionsExample
+
+const tenantPermissions = [
+    permissions.tenants({
+        collection: "TargetCollection*",  // Applies to all collections starting with "TargetCollection"
+        tenant: "TargetTenant*",  // Applies to all tenants starting with "TargetTenant"
+        create: true,  // Allow creating new tenants
+        read: true,  // Allow reading tenant info/metadata
+        update: true,  // Allow updating tenant states
+        delete: true,  // Allow deleting tenants
+    }),
+    permissions.data({
+        collection: "TargetCollection*",  //  Applies to all collections starting with "TargetCollection"
+        tenant: "TargetTenant*",  // Applies to all tenants starting with "TargetTenant"
+        create: true,  // Allow data inserts
+        read: true,  // Allow query and fetch operations
+        update: true,  // Allow data updates
+        delete: true,  // Allow data deletes
+    })
+]
+
+// Create a new role
+await client.roles.create("tenant_manager", tenantPermissions)
+// END MTPermissionsExample
+// START MTPermissionsAssignment
+// Assign the role to a user
+client.users.assignRoles("user-b", "tenant_manager")
+// END MTPermissionsAssignment
+
+// ===== TEST ===== basic checks to see if the role was created
+const testUserPermissions = await client.users.getAssignedRoles("user-b")
+
+assert.equal((Object.keys(await client.users.getAssignedRoles("user-b")).some(
+    role => role == "viewer_role"
+)), true)
+assert.equal(
+    testUserPermissions["viewer_role"].collectionsPermissions[0].collection
+    ,"TargetCollection*"
+)
+assert.equal(testUserPermissions["viewer_role"].name, "viewer_role")
+
+client.close()
diff --git a/_includes/code/typescript/howto.configure.rbac.roles 2.ts b/_includes/code/typescript/howto.configure.rbac.roles 2.ts
new file mode 100644
index 0000000000..57036af053
--- /dev/null
+++ b/_includes/code/typescript/howto.configure.rbac.roles 2.ts	
@@ -0,0 +1,339 @@
+import assert from "assert"
+// START AdminClient
+import weaviate, { WeaviateClient } from 'weaviate-client'
+
+// Connect to Weaviate as root user
+const client: WeaviateClient = await weaviate.connectToLocal({
+    // END AdminClient
+    // Use custom port defined in tests/docker-compose-rbac.yml (without showing the user)
+    port: 8580,
+    grpcPort: 50551,
+    // START AdminClient
+    authCredentials: new weaviate.ApiKey("user-a-key")
+})
+// END AdminClient
+
+// TODO: Remove if not used
+const customUserClient: WeaviateClient = await weaviate.connectToLocal({
+    port: 8580,
+    grpcPort: 50551,
+    authCredentials: new weaviate.ApiKey("user-b-key")
+})
+
+const allRolesCheck = await client.roles.listAll()
+
+for await (const [key, value] of Object.entries(allRolesCheck)) {
+    if (!["viewer", "root", "admin"].includes(key)) {
+        await client.roles.delete(key)
+    }
+}
+
+// Todo: This will be added in upcoming release
+// START CreateRole
+await client.roles.create("testRole")
+// END CreateRole
+
+// START AddClusterPermission // START AddManageRolesPermission // START AddCollectionsPermission // START AddTenantPermission // START AddDataObjectPermission // START AddBackupPermission // START AddNodesPermission // START AddRoles // START RemovePermissions
+const { permissions } = weaviate
+// END AddClusterPermission // END AddManageRolesPermission // END AddCollectionsPermission // END AddTenantPermission // END AddDataObjectPermission // END AddBackupPermission // END AddNodesPermission // END AddRoles // END RemovePermissions
+
+// todo add scope when tommy adds it
+// START AddManageRolesPermission
+
+const rolePermission = permissions.roles({
+    role: "testRole",
+    create: true,
+    read: true,
+    update: true,
+    delete: true,
+})
+
+await client.roles.create("testRole", rolePermission)
+// END AddManageRolesPermission
+
+assert(Object.keys(await client.roles.listAll()).includes('testRole'));
+
+await client.roles.delete("testRole")
+
+// START AddManageUsersPermission
+
+const userPermission = permissions.users({
+    user: "testRole",  // Applies to all users starting with "testUser"
+    assignAndRevoke: true,  // Allow assigning and revoking roles to and from users
+    read: true,  // Allow reading user info
+})
+
+await client.roles.create("testRole", userPermission)
+// END AddManageUsersPermission
+
+assert(Object.keys(await client.roles.listAll()).includes('testRole'));
+
+await client.roles.delete("testRole")
+
+// START AddCollectionsPermission
+
+const collectionPermissions = [
+    permissions.collections({
+        collection: "TargetCollection*",  // Applies to all collections starting with "TargetCollection"
+        create_collection: true,  // Allow creating new collections
+        read_config: true,  // Allow reading collection info/metadata
+        update_config: true,  // Allow updating collection configuration, i.e. update schema properties, when inserting data with new properties
+        delete_collection: true,  // Allow deleting collections
+    }),
+]
+
+await client.roles.create("testRole", collectionPermissions)
+
+// END AddCollectionsPermission
+const getCollectionPermissions = await client.roles.byName("testRole")
+
+if (getCollectionPermissions) {
+    assert.equal((getCollectionPermissions.dataPermissions.some(
+        permission => permission.collection == "TargetCollection*"
+    )), true)
+}
+
+await client.roles.delete("testRole")
+
+// START AddTenantPermission
+
+const AddTenantPermissions = [
+    permissions.tenants({
+        collection: "TargetCollection*",  // Applies to all collections starting with "TargetCollection"
+        tenant: "TargetTenant*",  // Applies to all tenants starting with "TargetTenant"
+        create: true,  // Allow creating new tenants
+        read: true,  // Allow reading tenant info/metadata
+        update: true,  // Allow updating tenant states
+        delete: true,  // Allow deleting tenants
+    }),
+]
+
+await client.roles.create("testRole", AddTenantPermissions)
+// END AddTenantPermission
+const getTenantCollection = await client.roles.byName("testRole")
+
+if (getTenantCollection) {
+    assert.equal((getTenantCollection.dataPermissions.some(
+        permission => permission.collection == "TargetCollection*"
+    )), true)
+}
+
+client.roles.delete("testRole")
+
+// START AddDataObjectPermission
+
+const dataPermissions = [
+    permissions.data({
+        collection: "TargetCollection*",  // Applies to all collections starting with "TargetCollection"
+        tenant: "TargetTenant*",  // Applies to all tenants starting with "TargetTenant"
+        create: true,  // Allow data inserts
+        read: true,  // Allow query and fetch operations
+        update: true,  // Allow data updates
+        delete: false,  // Allow data deletes
+    }),
+]
+
+await client.roles.create("testRole", dataPermissions)
+
+// END AddDataObjectPermission
+const getDataPermissions = await client.roles.byName("testRole")
+
+if (getDataPermissions) {
+    assert.equal((getDataPermissions.dataPermissions.some(
+        permission => permission.collection == "TargetCollection*"
+    )), true)
+}
+
+await client.roles.delete("testRole")
+
+// START AddBackupPermission
+
+const backupsPermissions = [
+    permissions.backup({
+        collection: "TargetCollection*",  // Applies to all collections starting with "TargetCollection"
+        manage: true,  // Allow managing backups
+    }),
+]
+
+await client.roles.create("testRole", backupsPermissions)
+// END AddBackupPermission
+
+const getBackupsPermissions = await client.roles.byName("testRole")
+
+if (getBackupsPermissions) {
+    assert.equal((getBackupsPermissions.dataPermissions.some(
+        permission => permission.collection == "TargetCollection*"
+    )), true)
+}
+
+await client.roles.delete("testRole")
+
+// START AddClusterPermission
+
+const clusterPermissions = [
+    permissions.cluster({
+        read: true // Allow reading cluster data
+    }),
+]
+
+await client.roles.create("testRole", clusterPermissions)
+// END AddClusterPermission
+
+const getClusterPermissions = await client.roles.byName("testRole")
+// assert permissions.cluster_permissions
+
+await client.roles.delete("testRole")
+
+// START AddNodesPermission
+
+const verboseNodePermissions = [
+    permissions.nodes.verbose({
+        collection: "TargetCollection*",  // Applies to all collections starting with "TargetCollection"
+        read: true,  // Allow reading node metadata
+    }),
+]
+
+// The `minimal` verbosity level applies to all collections unlike
+// the `verbose` level where you specify the collection name filter
+const minimalNodePermissions = [
+    permissions.nodes.minimal({
+        read: true,  // Allow reading node metadata
+    }),
+]
+
+await client.roles.create("testRole", verboseNodePermissions)  // or `minimalNodePermissions`
+// END AddNodesPermission
+
+const getNodePermissions = await client.roles.byName("testRole")
+
+if (getNodePermissions) {
+    assert.equal((getNodePermissions.dataPermissions.some(
+        permission => permission.collection == "TargetCollection*"
+    )), true)
+}
+
+
+await client.roles.delete("testRole")
+
+// This is to add additional permission to below
+const dummyPermission = [
+    permissions.collections({
+        collection: "TargetCollection*",
+        read_config: true,
+    }),
+]
+
+await client.roles.create("testRole", dummyPermission)
+
+// START AddRoles
+
+const additionalDataPermissions = [
+    permissions.data({
+        collection: "TargetCollection*",
+        read: true,
+        create: true
+    }),
+    permissions.data({
+        collection: "TargetCollection*",
+        read: true,
+        create: false
+    }),
+]
+
+client.roles.addPermissions("testRole", additionalDataPermissions)
+// END AddRoles
+
+// START AssignRole
+await client.users.assignRoles(["testRole", "viewer"], "user-b")
+// END AssignRole
+assert.equal((Object.keys(await client.users.getAssignedRoles("user-b")).some(
+    role => role == "viewer*"
+)), true)
+
+assert.equal((Object.keys(await client.users.getAssignedRoles("user-b")).some(
+    role => role == "testRole"
+)), true)
+
+// START ListCurrentUserRoles
+console.log(await client.users.getMyUser())
+// END ListCurrentUserRoles
+
+// START ListUserRoles
+const userRoles = await client.users.getAssignedRoles("user-b")
+
+for (const [role, value] of Object.entries(userRoles)) {
+    console.log(role)
+}
+// END ListUserRoles
+
+assert.equal((userRoles["testRole"].collectionsPermissions.some(
+    permission => permission.collection == "TargetCollection*"
+)), true)
+
+assert.equal((userRoles["testRole"].dataPermissions.some(
+    permission => permission.collection == "TargetCollection*"
+)), true)
+
+// START CheckRoleExists
+console.log(await client.roles.exists("testRole"))  // Returns true or false
+// END CheckRoleExists
+
+// START InspectRole 
+const testRole = await client.roles.byName("testRole")
+
+console.log(testRole)
+console.log(testRole?.collectionsPermissions)
+console.log(testRole?.dataPermissions)
+// END InspectRole
+
+// START AssignedUsers
+const assignedUsers = await client.roles.userAssignments("testRole")
+
+for (const users of assignedUsers) {
+    console.log(users)
+}
+// END AssignedUsers
+assert.equal(assignedUsers.some(
+    role => role.id == "custom-user"
+), true)
+
+// START ListAllRoles
+const allRoles = await client.roles.listAll()
+
+for (const [key, value] of Object.entries(allRoles)) {
+    console.log(key)
+}
+// END ListAllRoles
+
+// START RemovePermissions
+
+const permissionsToRemove = [
+    permissions.collections({
+        collection: "TargetCollection*",
+        read_config: true,
+        create_collection: true,
+        delete_collection: true,
+    }),
+    permissions.data({
+        collection: "TargetCollection*",
+        read: true,
+        create: false
+    }),
+]
+
+await client.roles.removePermissions("testRole", permissionsToRemove)
+// END RemovePermissions
+
+// START RevokeRoles
+await client.users.revokeRoles("user-b", "testRole")
+// END RevokeRoles
+assert.equal((Object.keys(await client.users.getAssignedRoles("user-b")).some(
+    role => role == "testRole"
+)), false)
+
+// START DeleteRole
+await client.roles.delete("testRole")
+// END DeleteRole
+
+client.close()
+customUserClient.close()
diff --git a/_includes/code/typescript/howto.configure.rbac.users 2.ts b/_includes/code/typescript/howto.configure.rbac.users 2.ts
new file mode 100644
index 0000000000..9242a409d6
--- /dev/null
+++ b/_includes/code/typescript/howto.configure.rbac.users 2.ts	
@@ -0,0 +1,96 @@
+import assert from 'assert'
+import weaviate, { type WeaviateClient } from 'weaviate-client'
+// START AdminClient
+
+// Connect to Weaviate as root user
+const client: WeaviateClient = await weaviate.connectToLocal({
+    // END AdminClient
+    // Use custom port defined in tests/docker-compose-rbac.yml (without showing the user)
+        port: 8580,
+        grpcPort: 50551, 
+        // START AdminClient
+        authCredentials: new weaviate.ApiKey("root-user-key")
+})
+// END AdminClient
+// START CreateUser
+let userApiKey 
+// END CreateUser
+
+userApiKey = await client.users.db.delete("custom-user")
+
+// START CreateUser
+userApiKey = await client.users.db.create("custom-user")
+console.log(userApiKey)
+// END CreateUser
+assert.equal((userApiKey.length > 0), true)
+
+// START RotateApiKey
+let newApiKey
+newApiKey = await client.users.db.rotateKey("custom-user")
+console.log(newApiKey)
+// END RotateApiKey
+assert.equal( (newApiKey.length > 0) && (newApiKey != userApiKey), true)
+
+const { permissions } = weaviate
+
+const collectionPermissions = [
+    permissions.collections({
+        collection: "TargetCollection*", 
+        read_config: true, 
+        create_collection: true
+    }),
+    permissions.data({
+        collection: "TargetCollection*", 
+        read: true, 
+        create: true}),
+]
+
+await client.roles.delete("testRole")
+await client.roles.create("testRole", collectionPermissions)
+
+// START AssignRole
+await client.users.db.assignRoles(["testRole", "viewer"], "custom-user")
+// END AssignRole
+
+assert.equal((Object.keys(await client.users.getAssignedRoles("custom-user")).some(
+    role => role == "testRole"
+)), true)
+
+assert.equal((Object.keys(await client.users.getAssignedRoles("custom-user")).some(
+    role => role == "viewer"
+)), true)
+
+// START ListAllUsers
+console.log(await client.users.db.listAll())
+// END ListAllUsers
+
+// START ListUserRoles
+let userRoles = await client.users.db.getAssignedRoles("custom-user")
+
+for (const [role, value] of Object.entries(userRoles)) {
+    console.log(role)
+}
+// END ListUserRoles
+assert.equal((Object.keys(await client.users.db.getAssignedRoles("custom-user")).some(
+    role => role == "testRole"
+)), true)
+assert.equal((Object.keys(await client.users.db.getAssignedRoles("custom-user")).some(
+    role => role == "viewer"
+)), true)
+
+// START RevokeRoles
+await client.users.db.revokeRoles("custom-user", "testRole")
+// END RevokeRoles
+assert.equal((Object.keys(await client.users.db.getAssignedRoles("custom-user")).some(
+    role => role == "testRole"
+)), false)
+
+// START DeleteUser
+await client.users.db.delete("custom-user")
+// END DeleteUser
+assert(
+    !(await client.users.db.listAll()).some(user => user.id === "custom-user"),
+    "custom-user not deleted"
+  )
+
+client.close()
diff --git a/_includes/collections-count-limit 2.mdx b/_includes/collections-count-limit 2.mdx
new file mode 100644
index 0000000000..94e20f1269
--- /dev/null
+++ b/_includes/collections-count-limit 2.mdx	
@@ -0,0 +1,3 @@
+:::info
+To ensure optimal performance, Weaviate **limits the number of collections per instance**. The default limit is `100` collections and it can be adjusted via the [`MAXIMUM_ALLOWED_COLLECTIONS_COUNT`](/developers/weaviate/config-refs/env-vars) environment variable. 
+:::
\ No newline at end of file
diff --git a/_includes/configuration/configure-rbac 2.mdx b/_includes/configuration/configure-rbac 2.mdx
new file mode 100644
index 0000000000..e709c1b02c
--- /dev/null
+++ b/_includes/configuration/configure-rbac 2.mdx	
@@ -0,0 +1,50 @@
+import Link from '@docusaurus/Link';
+
+:::tip Follow these general steps to configure RBAC:
+
+<ol className="step-list">
+  <li>
+    <i className="fa-solid fa-key icon-list" style={{ marginRight: '0.5rem' }}></i>
+    <b>Step 1.</b> Connect to Weaviate with a user possessing{' '}
+    <b>
+      <Link to="/developers/weaviate/configuration/rbac/manage-roles#requirements">
+        role management permissions
+      </Link>
+    </b>
+    .
+  </li>
+  <li>
+    <i
+      className="fa-solid fa-user-shield icon-list"
+      style={{ marginRight: '0.5rem' }}
+    ></i>
+    <b>Step 2.</b> Grant permissions to a{' '}
+    <b>
+      <Link to="/developers/weaviate/configuration/rbac/manage-roles#create-new-roles-with-permissions">
+        new role
+      </Link>
+    </b>{' '}
+    or an{' '}
+    <b>
+      <Link to="/developers/weaviate/configuration/rbac/manage-roles#grant-additional-permissions">
+        existing role
+      </Link>
+    </b>
+    .
+  </li>
+  <li>
+    <i
+      className="fa-solid fa-user-lock icon-list"
+      style={{ marginRight: '0.5rem' }}
+    ></i>
+    <b>Step 3.</b>{' '}
+    <b>
+      <Link to="/developers/weaviate/configuration/rbac/manage-users#assign-a-role-to-a-user">
+        Assign the role to a user
+      </Link>
+    </b>
+    .
+  </li>
+</ol>
+
+:::
diff --git a/_includes/configuration/dynamic-user-management 2.mdx b/_includes/configuration/dynamic-user-management 2.mdx
new file mode 100644
index 0000000000..e564316069
--- /dev/null
+++ b/_includes/configuration/dynamic-user-management 2.mdx	
@@ -0,0 +1,5 @@
+:::tip TIP: User management API available from `v1.30`
+
+Instead of adding additional users via the `AUTHENTICATION_APIKEY_USERS` environment variable, we suggest using the [user management API](/developers/weaviate/configuration/rbac/manage-users) which you can use to create and delete users, manage their roles and rotate their API keys.
+
+:::
diff --git a/_includes/latest-weaviate-version 2.mdx b/_includes/latest-weaviate-version 2.mdx
new file mode 100644
index 0000000000..b32f227abe
--- /dev/null
+++ b/_includes/latest-weaviate-version 2.mdx	
@@ -0,0 +1,6 @@
+:::tip TIP: Use the latest Weaviate version!
+
+When possible, try to use the latest Weaviate version.
+New releases include cutting-edge features, performance enhancements, and critical security updates to keep your application safe and up-to-date.
+
+:::
diff --git a/_includes/named-vector-compress 2.mdx b/_includes/named-vector-compress 2.mdx
new file mode 100644
index 0000000000..e952ed3e87
--- /dev/null
+++ b/_includes/named-vector-compress 2.mdx	
@@ -0,0 +1,4 @@
+:::info Added in `v1.24`
+:::
+
+Collections can have multiple [named vectors](/developers/weaviate/config-refs/schema/multi-vector). The vectors in a collection can have their own configurations, and compression must be enabled independently for each vector. Every vector is independent and can use [PQ](/developers/weaviate/configuration/compression/pq-compression), [BQ](/developers/weaviate/configuration/compression/bq-compression), [SQ](/developers/weaviate/configuration/compression/sq-compression), or no compression.
diff --git a/_includes/runtime-generative 2.mdx b/_includes/runtime-generative 2.mdx
new file mode 100644
index 0000000000..eb6fc14d6d
--- /dev/null
+++ b/_includes/runtime-generative 2.mdx	
@@ -0,0 +1,5 @@
+:::tip
+
+You can [override the generative integration settings at query time](/developers/weaviate/search/generative#configure-a-generative-model-provider) without updating it in the collection configuration.
+
+:::
diff --git a/_includes/wcs/create-api-keys 2.mdx b/_includes/wcs/create-api-keys 2.mdx
new file mode 100644
index 0000000000..d49fe366da
--- /dev/null
+++ b/_includes/wcs/create-api-keys 2.mdx	
@@ -0,0 +1,95 @@
+When connecting to a Weaviate Cloud cluster, you need an API key and the REST endpoint URL for authentication.
+
+If you don't have an existing API key, you'll need to create one. Follow these steps to find the API keys section and create a new key if necessary:
+
+import Link from '@docusaurus/Link';
+import Register from '/developers/wcs/img/weaviate-cloud-api-key-create.png';
+
+<div class="row">
+  <div class="col col--6">
+    <ol>
+      <li>
+        Open the{' '}
+        <Link to="https://console.weaviate.cloud">Weaviate Cloud console</Link>{' '}
+        and{' '}
+        <Link to="/developers/wcs/manage-clusters/status#select-your-cluster">
+          select your cluster
+        </Link>
+        .
+      </li>
+      <li>
+        Navigate to the <code>API Keys</code> section, found in the{' '}
+        <code>Cluster details</code> panel.
+      </li>
+      <li>
+        If you need a new API key, click the <code>Create API key</code>{' '}
+        button (<span class="callout">1</span> in the image below).
+      </li>
+    </ol>
+  </div>
+  <div class="col col--6">
+    <div class="card">
+      <div class="card__image">
+        <img src={Register} alt="Navigate to the API Keys section" />
+      </div>
+      <div class="card__body">Navigate to the API Keys section.</div>
+    </div>
+  </div>
+</div>
+<br />
+
+import NewAPIKeyForm from '/developers/wcs/img/weaviate-cloud-api-key-create-form.png';
+import SaveAPIKey from '/developers/wcs/img/weaviate-cloud-api-key-save.png';
+
+<div class="row">
+  <div class="col col--6">
+    <ol start="4">
+      <li>
+        In the <code>Create API Key</code> form, provide a descriptive name for
+        your key (<span class="callout">1</span>).
+      </li>
+      <li>
+        Choose the role for this API key (<span class="callout">2</span>). You
+        can either select an existing role like <code>admin</code> or{' '}
+        <code>viewer</code>, or{' '}
+        <Link to="/developers/wcs/manage-clusters/authorization">create a new role</Link> with
+        specific permissions.
+      </li>
+      <li>
+        Click the <code>Create</code> button (<span class="callout">3</span>).
+      </li>
+    </ol>
+  </div>
+  <div class="col col--6">
+    <div class="card">
+      <div class="card__image">
+        <img src={NewAPIKeyForm} alt="Create a new API key" />
+      </div>
+      <div class="card__body">Create a new API key.</div>
+    </div>
+  </div>
+</div>
+<br />
+
+<div class="row">
+  <div class="col col--6">
+    <ol start="7">
+      <li>
+        <strong>Important:</strong> This is the only time your API key will be
+        displayed. Make sure to copy it (<span class="callout">1</span>) or
+        download it (<span class="callout">2</span>) and store it in a secure
+        location immediately after creation. You will not be able to retrieve
+        the full key again.
+      </li>
+    </ol>
+  </div>
+  <div class="col col--6">
+    <div class="card">
+      <div class="card__image">
+        <img src={SaveAPIKey} alt="Save your API key" />
+      </div>
+      <div class="card__body">Save your API key.</div>
+    </div>
+  </div>
+</div>
+<br />
diff --git a/_includes/wcs/hostname-warning 2.mdx b/_includes/wcs/hostname-warning 2.mdx
new file mode 100644
index 0000000000..788c19d774
--- /dev/null
+++ b/_includes/wcs/hostname-warning 2.mdx	
@@ -0,0 +1,3 @@
+:::caution
+This client uses the `hostname` parameter (without the `https` scheme) instead of a complete `URL`.
+:::
\ No newline at end of file
diff --git a/_includes/wcs/retrieve-rest-endpoint 2.mdx b/_includes/wcs/retrieve-rest-endpoint 2.mdx
new file mode 100644
index 0000000000..b9018dd43e
--- /dev/null
+++ b/_includes/wcs/retrieve-rest-endpoint 2.mdx	
@@ -0,0 +1,37 @@
+import Link from '@docusaurus/Link';
+
+This is how you can retrieve your `REST Endpoint`:
+
+import WCDClusterURL from '/developers/weaviate/quickstart/img/cluster_url.png';
+import WCDClusterAdminKey from '/developers/weaviate/quickstart/img/cluster_admin_key.png';
+
+<div class="row">
+  <div class="col col--6">
+    <ol start="8">
+      <li>
+        On the <code>Cluster details</code> page or within the{' '}
+        <code>API Keys</code> section, find the <code>REST Endpoint</code> URL.
+      </li>
+      <li>
+        Copy the <code>REST Endpoint</code> URL and store it securely.
+      </li>
+    </ol>
+  </div>
+  <div class="col col--6">
+    <div class="card-demo">
+      <div class="card">
+        <div class="card__image">
+          <img src={WCDClusterURL} alt="Get the (REST) endpoint URL" />
+        </div>
+        <div class="card__body">
+          Grab the <code>REST Endpoint</code> URL.
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<br />
+
+:::note REST Endpoint vs gRPC Endpoint
+When using an official Weaviate [client library](/developers/weaviate/client-libraries), you need to authenticate using the `REST Endpoint` and your API key. The client will infer the gRPC endpoint automatically and use the more performant gRPC protocol when available.
+:::
diff --git a/_includes/wcs/weaviate-cloud-edit-organization 2.mdx b/_includes/wcs/weaviate-cloud-edit-organization 2.mdx
new file mode 100644
index 0000000000..db0f587ebe
--- /dev/null
+++ b/_includes/wcs/weaviate-cloud-edit-organization 2.mdx	
@@ -0,0 +1,32 @@
+import Link from '@docusaurus/Link';
+import OrganizationSettings from '/developers/wcs/img/weaviate-cloud-organization-settings.png';
+
+<div class="row">
+  <div class="col col--6">
+    <ol>
+      <li>
+        Open the{' '}
+        <Link to="https://console.weaviate.cloud">Weaviate Cloud console</Link>.
+      </li>
+      <li>
+        Open the organization dropdown menu (<span class="callout">1</span>).
+      </li>
+      <li>
+        Click on <code>Organization settings</code> (
+        <span class="callout">2</span>).
+      </li>
+    </ol>
+  </div>
+  <div class="col col--6">
+    <div class="card">
+      <div class="card__image">
+        <img
+          src={OrganizationSettings}
+          alt="Edit an organization in Weaviate Cloud"
+        />
+      </div>
+      <div class="card__body">Edit an organization in Weaviate Cloud.</div>
+    </div>
+  </div>
+</div>
+<br />
diff --git a/_includes/weaviate-embeddings-models 2.mdx b/_includes/weaviate-embeddings-models 2.mdx
new file mode 100644
index 0000000000..b52f313b87
--- /dev/null
+++ b/_includes/weaviate-embeddings-models 2.mdx	
@@ -0,0 +1,21 @@
+### `Snowflake/snowflake-arctic-embed-l-v2.0` (default) {#snowflake-arctic-embed-l-v2.0}
+
+- A 568M parameter, 1024-dimensional model for multilingual enterprise retrieval tasks.
+- Trained with Matryoshka Representation Learning to allow vector truncation with minimal loss.
+- Quantization-friendly: Using scalar quantization and 256 dimensions provides 99% of unquantized, full-precision performance.
+- Read more at the [Snowflake blog](https://huggingface.co/Snowflake/snowflake-arctic-embed-l-v2.0), and the Hugging Face [model card](https://huggingface.co/Snowflake/snowflake-arctic-embed-l-v2.0)
+- Allowable `dimensions`: 1024 (default), 256
+
+---
+
+### `Snowflake/snowflake-arctic-embed-m-v1.5` {#snowflake-arctic-embed-m-v1.5}
+
+- A 109M parameter, 768-dimensional model for enterprise retrieval tasks in English.
+- Trained with Matryoshka Representation Learning to allow vector truncation with minimal loss.
+- Quantization-friendly: Using scalar quantization and 256 dimensions provides 99% of unquantized, full-precision performance.
+- Read more at the [Snowflake blog](https://www.snowflake.com/engineering-blog/arctic-embed-m-v1-5-enterprise-retrieval/), and the Hugging Face [model card](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5)
+- Allowable `dimensions`: 768 (default), 256
+
+:::info Input truncation
+Currently, input exceeding the model's context windows is truncated from the right (i.e. the end of the input).
+:::
diff --git a/_includes/weaviate-embeddings-requirements 2.mdx b/_includes/weaviate-embeddings-requirements 2.mdx
new file mode 100644
index 0000000000..76d83c8590
--- /dev/null
+++ b/_includes/weaviate-embeddings-requirements 2.mdx	
@@ -0,0 +1,7 @@
+To use Weaviate Embeddings, you need:
+
+- A Weaviate Cloud instance running at least Weaviate version `>=1.27.10`, `>=1.28.3` or `>=1.29.0`.
+- A Weaviate client library that supports Weaviate Embeddings:
+    - Python client version `4.9.5` or higher
+    - JavaScript/TypeScript client version `3.2.5` or higher
+    - Go/Java clients are not yet officially supported; you must pass the `X-Weaviate-Api-Key` and `X-Weaviate-Cluster-Url` headers manually upon instantiation as shown below.
diff --git a/_includes/weaviate-embeddings-vectorizer-parameters 2.mdx b/_includes/weaviate-embeddings-vectorizer-parameters 2.mdx
new file mode 100644
index 0000000000..2688f68c68
--- /dev/null
+++ b/_includes/weaviate-embeddings-vectorizer-parameters 2.mdx	
@@ -0,0 +1,51 @@
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock';
+import PyCode from '!!raw-loader!/developers/weaviate/model-providers/_includes/provider.vectorizer.py';
+import TSCode from '!!raw-loader!/developers/weaviate/model-providers/_includes/provider.vectorizer.ts';
+import GoCode from '!!raw-loader!/_includes/code/howto/go/docs/model-providers/2-usage-text/main.go';
+import JavaCode from '!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/model_providers/UsageWeaviateTextEmbeddings.java';
+import JavaCode2 from '!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/model_providers/UsageWeaviateTextEmbeddingsArcticEmbedLV20.java';
+import JavaImportQueries from '!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/model_providers/ImportAndQueries.java';
+
+- `model` (optional): The name of the model to use for embedding generation.
+- `dimensions` (optional): The number of dimensions to use for the generated embeddings.
+- `base_url` (optional): The base URL for the Weaviate Embeddings service. (Not required in most cases.)
+
+The following examples show how to configure Weaviate Embeddings-specific options.
+
+<Tabs groupId="languages">
+  <TabItem value="py" label="Python API v4">
+    <FilteredTextBlock
+      text={PyCode}
+      startMarker="# START SnowflakeArcticEmbedMV15"
+      endMarker="# END SnowflakeArcticEmbedMV15"
+      language="py"
+    />
+  </TabItem>
+<TabItem value="js" label="JS/TS API v3">
+  <FilteredTextBlock
+    text={TSCode}
+    startMarker="// START SnowflakeArcticEmbedMV15"
+    endMarker="// END SnowflakeArcticEmbedMV15"
+    language="ts"
+  />
+</TabItem>
+<TabItem value="go" label="Go">
+  <FilteredTextBlock
+    text={GoCode}
+    startMarker="// START SnowflakeArcticEmbedMV15"
+    endMarker="// END SnowflakeArcticEmbedMV15"
+    language="goraw"
+  />
+</TabItem>
+<TabItem value="java" label="Java">
+  <FilteredTextBlock
+    text={JavaCode}
+    startMarker="// START SnowflakeArcticEmbedMV15"
+    endMarker="// END SnowflakeArcticEmbedMV15"
+    language="java"
+  />
+</TabItem>
+</Tabs>
diff --git a/blog/2025-02-13-agents-simplified/index 2.mdx b/blog/2025-02-13-agents-simplified/index 2.mdx
new file mode 100644
index 0000000000..4fa698198c
--- /dev/null
+++ b/blog/2025-02-13-agents-simplified/index 2.mdx	
@@ -0,0 +1,206 @@
+---
+title: 'Agents Simplified: What we mean in the context of AI'
+slug: ai-agents
+authors: [tuana, prajjwal]
+date: 2025-02-13
+tags: ['concepts', 'agents']
+image: ./img/hero.png
+description: "What is an AI Agent? Learn how AI agents work, the benefits of using AI agents and more"
+---
+![Agents Simplified: What we mean in the context of AI](./img/hero.png)
+
+
+If you’re in the AI-osphere, you’ve probably heard the term ‘AI Agents’ being thrown around quite a bit recently. In this article, let’s boil down what we mean when we say ‘Agents’ in the context of large language models (LLMs) and artificial intelligence (AI).
+
+Before we dive into the topic, one thing to remember is that the term ‘agents’ has existed long before we had todays performant LLMs. We could even say that AI agents have existed for a long time too, just not with today’s generative LLMs as the star. What has changed though is just how good and sophisticated they’ve become. So, in short, you’re not hearing more about agents because they’re a brand new technology. No, you’re hearing more about AI agents because things just got very, very interesting.
+
+## What is an AI Agent
+
+At a basic level, an AI agent today is a semi- or fully-autonomous system that uses an LLM as its ‘brain’ for critical decision making and solving complex tasks. Think of them as automated decision making engines so that you, the user, only have to come with your query. They operate and use a variety of tools available to them in their environment to get things done for you so that you can sit back and relax while it figures out how to solve your problem.
+
+Agents autonomously direct their own processes and execution flow, choosing which tools to use based on the task at hand. These tools can include web search engines, databases, APIs, and more, enabling agents to interact with the real world.
+
+## A Brief History on AI Agents
+
+AI agents have technically existed for a long time. You can even see the authors of [this recent article on AI agents by Microsoft](https://news.microsoft.com/source/features/ai/ai-agents-what-they-are-and-how-theyll-change-the-way-we-work/) referring to AI agents they’d been working on back in 2005. However, the shape and abilities of our AI agents have significantly changed in the last couple of years largely thanks to the abilities of the latest LLMs. Now, we’re able to use LLMs as a core component when it comes to planning, reasoning and acting.
+
+> Reader: This section was a pleasure for me (Tuana) to write and geek out on. But if history and research is not your thing, feel free to jump to the next section. I won’t take offense.
+
+So, with that said, I’d like to highlight a few milestones in our _recent_ history of AI agents, and you can assume that from here on out we are only referring to the AI agents of today (2025). This is of course my own expecrience on the matter looking back over the last few years. But let’s turn back the clock to just before the release of ChatGPT. In 2020, there were 2 papers that were published that in my view could be viewed as the beginnings of current day AI agents that make use of LLMs as the core decision making component:
+
+-   [MRKL Systems](https://arxiv.org/abs/2205.00445): Pronounced ‘miracle’ systems, this paper was largely centered around the shortcomings of language and studied the _reason_ as to why we were getting so many hallucinated responses. And in short, they highlighted what we now fully understand: Language models don’t _know_ everything, they’re designed to generate language. Think of it this way, we can’t expect people to know our birthday unless we tell them when it is. This paper introduces a way in which we can provide language models with external knowledge bases which can be referred to extract the relevant information from.
+-   [ReAct](https://arxiv.org/pdf/2210.03629): Published slightly after MRKL systems, this paper introduced another crucial component into what makes an agent today. This paper introduced a process of prompting that we call “ReAct”, which stands for ‘reason and act’. In short, it highlighted a clever way we can structure our prompts which results in the LLM taking into account the question at hand, reasoning about its options on how to solve it, selecting the correct tools to use to solve the question, and acting on it. To keep things _very_ simple, take the following example. Instead of only asking the question, we’re also telling the model which resources it has access to and asking it to make a plan about how it would solve the query. In short, this paper introduced a way to start thinking about our LLM instructions to make the process of reasoning and acting more reliable:
+
+![chat](img/chat.png)
+
+> Note: The actual ReAct prompt recommended in the paper is a lot more involved than this, including instructions on how to generate thought, how to reason and so on.
+
+In my view, these two papers highlight two very important findings and features that bring us to the AI agents of today: a good instruction, and external tools. That, and thousands of humans who started to tinker around with these LLMs and we’re now in a world where we’ve started to build more and more sophisticated AI agents (that no longer only use the ReAct prompting approach).
+
+With that, let’s have a look into what makes up an AI agent of today.
+
+### Core Components of an AI Agent
+
+Although not every AI agent has to include _all_ of these components, when we build agents they include at least a few of the following components and processes: An LLM, access to tools (via function calling), some level of memory, and reasoning.
+
+Let’s dive into what they each do:
+
+-   **LLM:** Think of the LLM as the brain of the operation. Although not necessarily for _every step_, when we say ‘agents’ in 2025 a generative model is involved as the orchestrator of the operation to a great degree. Simply put, think of the example scenario in the section above: it’s the LLM that has decided that it’s best to first look up the `user_calendar` followed by looking up the weather.
+-   **Tools:** A great feature of agents is that they interact with the environment through different tools. One can think of them as ‘add-ons’ that make agents better. These tools let agents go beyond the fixed training knowledge of the LLMs by providing highly relevant and real-time data (like to your personal database) and abilities (like sending emails). With function calling, LLMs can directly interact with a predefined set of tools, expanding the operational scope and efficiency of agents.
+-   **Memory:** Agents often have some form of memory (both short-term and long-term), which allows them to store logs of their reasoning process, conversation histories, or information collected during different execution steps. We need memory both for ongoing conversations with our agents as well as conversations we want to come back to. Memory can be used to personalize the experience or plan future decisions
+-   **Observation & Reasoning:** The LLM is at the heart of problem solving, task decomposition, planning, and routing. It’s the component that allows the agent to reason about a problem, break it down into smaller steps (if needed), and decide how & when to use the available resources/tools to provide the best solution. However, not every agent is built equally, sometimes we include reasoning as an explicit step of the process when we’re building our agents.
+
+An important thing to remember is that there are various design patterns that result in an AI agent and these components can be used to varying degrees. The agents we see today exist on a spectrum, and the level of autonomy or ‘agentic’ behavior largely depends on how much decision making authority is delegated to the LLMs. In simpler terms: some agents are designed to operate more independently than others.
+
+![agents](img/ai_agents.png)
+
+## How do AI Agents Work?
+
+Most AI agents we see today use the LLM as the core decision maker/orchestrator of the operation. The level of autonomy this LLM has can of course vary, which we’ll talk more about in the ‘A look into the future’ section of this article. But let’s first start by discussing the basics of how an AI agent that uses an LLM for most of the decisions works.
+
+Something I notice is that when people discuss LLMs and agents these days, it seems like there’s quite a lot of magic happening. So here, I’ll try to explain what is _actually_ going on behind the scenes of an AI agent that has access to some tools.
+
+### Define the Prompt
+
+At the heart of any system that uses an LLM is an instruction (a prompt) that sets the scene for the LLM as to what its core purpose is. The ReAct paper also clearly presented this by highlighting a complex prompt that defines a reasoning, thought-generating, observing agent. For example, an LLM could be given the instruction about how it’s a “helpful assistant that has access to my databases in order to answer my queries”.
+
+### Provide Tools
+
+Next, we need to provide a list of tools to the LLM. This is by far one of the most popular ways of creating AI agents today, although it’s not always necessary and we can still create agentic functionality without it having to be via tools and function calling. Most model providers today support ‘function calling’ which allows us to set up our interactions with an LLM with a list of tools that it knows it may access at any given time to resolve a query.
+
+When we provide tools to an LLM, we tell the LLM about a few things. It uses these things to decide whether it’s time to use the tool or not:
+
+-   **The name:** for example, a tool may have the name `technical_documentation_search`
+-   **The description:** which is probably the most important piece of information the model has access to when reasoning about which tool to use. For example, for the tool `technical_documentation_search` we may provide the description “Useful for when you need to search the Weaviate technical docs for answers”
+-   **The expected inputs:** Remember that tools are _external_ to the LLM. The LLM knows their name, it has a description for them too, but ultimately the job of a generative large language model is to produce language. So what can it do? Well, what it’s good at! It can probably produce some content which returns the name of a function (a tool), and the expected inputs for it to run. So, we also provide this information when we give a list of tools to an LLM. For example, for our tool `technical_documentation_search` tool, we may tell the LLM that it expects `query: str` to run.
+
+If you’re interested in what this looks like in reality, you can check out the [Function Definition docs by OpenAI](https://platform.openai.com/docs/guides/function-calling) for example.
+
+### Use Tools
+
+So, we have an LLM, it knows that it may access some tools, how to run them, and what they’re useful for. However, an LLM doesn’t have an inherent ability to, for example, run a python script… Or search your documentation. What it can do though is provide a message that _explains_ that it intends to run a tool, and what inputs it wants to run it with.
+
+Let’s take the following scenario as an example:
+
+-   We have an AI agent using an LLM
+-   We’ve provided `technical_documentation_search` as a tool with expected input `query: str`. We’ve said it’s “Useful for when you need to search the Weaviate technical docs for answers”
+-   User asks: “Hey, how can I use Ollama with Weaviate?”
+
+In this scenario, what actually happens is something like this:
+
+-   The LLM produces a response that boils down to “Run tool `technical_documentation_search` with `query = "Using Ollama"` ”.
+
+So, in reality, the LLM is making our AI agent application take a step outside of its own world. It instructs our system that there’s an external resource to be referenced.
+
+### Observe Tool Responses
+
+If all goes well, by this point your AI agent has run a tool. Remember that this tool could be _anything_. For example, our `technical_documentation_search` tool could in itself be a [RAG application (retrieval augmented generation)](/blog/introduction-to-rag) that in itself uses yet another LLM to generate responses to queries. The point is, at the end of the day we’ve probably run the tool with the query “Using Ollama” and the response is “You can use Ollama by enabling the text2vec-ollama or generative-ollama modules, both for embedding models and generative modules”, or something along those lines. But that’s not the end of it, because the original LLM that makes up the core of our AI agent doesn’t know the response yet.
+
+When a tool runs, the results of that tool are then returned back to the agent’s LLM. This is usually provided as a chat message where the role is set to “function call”. So our LLM knows that the response it’s seeing is not from the user, but a result of the tool it decided to run. The LLM then observes the results of the tool (or tools) to provide the user with the final answer.
+
+Congratulations! By this point, you’ve learned the basics of what makes an AI agent! Especially those that rely on tools and function calling. The way I like to imagine it is that the LLM that is the core orchestrator of an AI agent is a bit like a wizard with a spell book but no wand. The LLM knows what it can do, and how, but it can do nothing more than say the magic word. The tools still have to run outside the LLM.
+
+![wizard](img/wizard.png)
+
+## What is “Agentic” AI
+
+There’s a lot of new vocabulary to get used to, which can be confusing. But actually, when it comes to what’s “agentic AI” versus what an “AI agent” is, we can make our lives a lot easier. An AI agent is inherently _agentic_, but an AI agent usually refers to the an end application designed for a specific task. For example, an AI agent might be a documentation search assistant, or a personal assistant that has access to your email and slack.
+
+When we say ‘Agentic AI’ however, we’re usually to a system that is designed with elements of agentic components such as a decision making LLM, a reasoning step, maybe some tools, self-reflection, and so on. For something to be deemed agentic, it doesn’t need to have all of components. Rather, it often showcases the features of some of them.
+
+## Tools for Building AI Agents
+
+Building an AI agent requires integrating many components and tools to create a system capable of autonomous or semi-autonomous decision-making, interaction, and task execution. While advanced agents can be highly complex, even the simplest ones need a few essential elements. Below are some resources that can help you get started with building your own AI agents:
+
+### 1. Language Model Providers:
+
+The foundation of an AI agent is an LLM, which powers its entire reasoning. It allows the agent to understand different inputs and plan its actions effectively. It is also essential to look for an LLM that has built-in function-calling support so that we can connect it to external tools and APIs. Popular LLM providers include:
+
+-   [OpenAI](https://platform.openai.com/docs/models): GPT 4o, o3-mini
+-   [Anthropic](https://docs.anthropic.com/en/docs/about-claude/models): Claude 3.5 Sonnet, Claude 3.5 Haiku
+-   [Google](https://ai.google.dev/gemini-api/docs/models/gemini): Gemini 2.0 Pro, Gemini 2.0 Flash
+-   [Mistral](https://docs.mistral.ai/getting-started/models/models_overview/): Mistral Large, Mistral Small 3
+-   Open-source models using [Hugging Face](https://huggingface.co/models) or [Ollama](https://ollama.com/search)
+
+### 2. Memory and Storage:
+
+Agents need some kind of persistent memory to retain context over time. The memory can be of two types:
+
+-   Short-term Memory: To keep track of current conversation or the task at hand.
+-   Long-term Memory: To remember past conversations, personalization, and experiences over time.
+
+There are currently many variations and implementations of both types of memory for agents today, and we’re likely to see more as the technology progresses. For example, for short-term memory, we see implementations as simple as providing “conversation summaries” to the LLM at each iteration or message, so as to navigate context length limits. For long-term memory, we may choose to use a database to back up conversations. This may even start changing the role of vector databases like Weaviate, where they start being used as long-term memory which the AI agent can extract most relevant bits of prior conversation from.
+
+### 3. Frameworks for AI Agent Orchestration:
+
+Orchestration frameworks act as smart conductors, coordinating all components of an AI agent and even managing multiple agents in a multi-agent setup. They abstract away most of the complexities, handle errors/retries cycles, and ensure that the language model, external tools/APIs, and memory systems all work together smoothly.
+
+There are several frameworks available that simplify the development of AI agents:
+
+-   [Langgraph](https://www.langchain.com/langgraph): Provides a structured framework for defining, coordinating, and executing multiple agents.
+-   [LlamaIndex](https://www.llamaindex.ai/): Enables the creation of complex, agentic systems with varying degrees of complexity.
+-   [CrewAI](https://www.crewai.com/): Multi-agent framework for orchestrating autonomous AI agents having specific roles, tools, and goals.
+-   [Hugging Face smolagents](https://huggingface.co/docs/smolagents/en/index): Library that enables you to run powerful agents in just a few lines of code.
+-   [Haystack](https://haystack.deepset.ai/): End-to-end framework that allows you to build AI applications like agents, powered by LLMs.
+-   [OpenAI Swarm](https://github.com/openai/swarm):An educational framework exploring ergonomic, lightweight multi-agent orchestration.
+- [AgentKit](https://agentkit.inngest.com/overview): A TypeScript library to create and orchestrate AI Agents.
+
+### 4. Tools and APIs:
+
+An agent is only as powerful as the tools it can access. By connecting to various APIs and tools, the agent can interact with its environment and perform tasks such as web browsing, data retrieval, database queries, data extraction & analysis, code execution, etc.
+
+Frameworks like LlamaIndex, offer pre-made tool integrations like data loaders for PDFs, websites, and databases, as well as for apps like Slack, and Google Drive via [LlamaHub](https://llamahub.ai/). Similarly, [Langchain](https://python.langchain.com/docs/integrations/tools/) offers a wide range of similar tools that agents can readily use. Also, developers can always build custom tools as per their needs by wrapping APIs to introduce entire new functionalities. Recent works like [Querying Databases with Function Calling](https://arxiv.org/abs/2502.00032) even hint at the promise of function calling for database queries.
+
+In a nutshell, building AI agents is a lot like assembling pieces of a puzzle. You start off with a good language model, add the right set of tools and APIs, and then add in memory so that the agent remembers what’s important. An orchestration framework can be used to make things simpler and tie things together, making sure every piece plays its part perfectly.
+
+## A look into the future of AI Agents: challenges and advances
+
+The great thing about AI agents and agentic AI in general is that it’s still evolving every day. Although there’s a lot we didn’t discuss here from the challenges we see, to other core components of actually building AI agents for production, like observability, there are a few things that is probably worth highlighting when it comes to the future of AI agents.
+
+For example, you may have already noticed that unless we take some time to intentionally design our agentic applications, it may seem that a lot (too much?) relies on an LLM making the right call, if you will. And in the case that the agent has access to search tools, or knowledge bases, maybe that’s ok. But what happens when the tool has access to your bank account and the agent can now buy you a very expensive one way ticket to Hawaii?
+
+A debate I’ve really been enjoying listening to is whether the use of AI agents is mostly as “research assistants” or as the “executors of our will”. Which is a simple, but important debate, and probably one on which our opinions change over time as LLMs get better, and we have better regulations and guard rails in the field of AI in general.
+
+### Levels of Autonomy & Human in the Loop
+
+Now you understand how an AI agent in its most basic form operates. But it’s not _necessary_ (or advisable) to have the LLM be the orchestrator of _everything_. We’re already seeing more and more agents that delegate the process to simpler, more deterministic systems. And in some cases, to humans. For example, we’ll probably see more and more of the scenario in which a human is supposed to approve an action before it can take place.
+
+We’re even seeing tools like [Gorilla](https://github.com/ShishirPatil/gorilla) implement agents with “undo” functionality that allows a human to decide whether an action should be back tracked, adding a layer of human intervention into the process.
+
+### Multi-modal AI Agents
+
+Multi-modality refers to the ability to make use of more than one modality, i.e. the ability to go beyond just language (text) and incorporate images, videos, audio and so on. In a way, the technology is for the most part there. So, we will probably start seeing more and more AI agents that can interact with a variety of mediums, either as part of their tooling, or inherently if they make use of a multi-modal LLM. Think of an AI agent which you can ask to “create a cute cat video and forward it to my email”!
+
+### The role of vector databases
+
+Another interesting topic, especially for us at Weaviate, is the potential for the role of [vector databases](/blog/what-is-a-vector-database) in AI to expand. So far, we’ve mostly been seeing vector databases used as knowledge sources which an agent can have access to. However, it’s not difficult to imagine a future in which we’re making use of vector databases, as well as other types of databases, as memory resources for our agent interactions.
+
+## Examples and Use Cases of AI agents
+
+AI agents are reshaping the way we work and this change is already visible across multiple industries. They shine brightest when we need a perfect blend of conversation with action. By automating repetitive tasks they not only increase the work efficiency but also improve the overall user experience. Here are some real-world examples of AI agents in action:
+
+### AI Research Agent
+
+AI research agents, or research assistants simplify the process of analyzing large amounts of data, spotting trend, and generating hypotheses. Today, we can already see people in academia or professionals at work using ChatGPT as a companion to help them gather information, to help them structure their thoughts and provide the first step in many tasks. In a way, ChatGPT in its bare form is in itself a research assistant agent. These types agents are also sometimes referred to as [“Agentic RAG”](/blog/what-is-agentic-rag), where an AI agent has access to multiple RAG tools, each accessing different knowledge bases.
+
+### Customer Service Agent
+
+AI customer service agents provide 24/7 support, handling inquiries, troubleshooting, and offering personalized interactions. They reduce wait times and let human agents take on more complex tasks. They can both act as research assistants for customers, getting answers to their queries quicker, as well as completing tasks for them.
+
+### Marketing & Sales Agent
+
+These agents optimize marketing campaigns and sales processes by analyzing customer data, personalizing outreach, and automating repetitive tasks like lead qualification and email follow-ups.
+
+### Code Assistant Agent
+
+These agents help developers by suggesting code, debugging errors, resolving tickets/issues, and even building new features. This enables developers to save time and focus on creative problem-solving. Examples of this are already out there with Cursor and Copilot.
+
+## Summary
+
+This article gave a high level overview of what we mean when we say ‘AI agents’ in 2025, as well as giving a simple look into how they work. Although we did not go into all the technical details of different ‘agentic workflows’, another blog going into more technical detail is coming soon! We go through the components that help with the basic understanding of AI agents, such as prompts, tools, observing tool responses and reasoning about the final answer. Finally, we look into the future of AI agents, discuss the current short-comings and the advancements we could expect.
+
+A lot of the historical overview mentioned in this blog was also my (Tuana’s) subjective view looking over the past few years. If you do think I’m missing a curcial step, do let me know (DMs open on [X](https://x.com/tuanacelik))
+
+import WhatsNext from '/_includes/what-next.mdx'
+
+<WhatsNext />
\ No newline at end of file
diff --git a/blog/2025-02-25-accelerate-enterprise-ai/index 2.mdx b/blog/2025-02-25-accelerate-enterprise-ai/index 2.mdx
new file mode 100644
index 0000000000..53169923a2
--- /dev/null
+++ b/blog/2025-02-25-accelerate-enterprise-ai/index 2.mdx	
@@ -0,0 +1,82 @@
+---
+title: 'Accelerate Enterprise AI: 94% Faster Search, Simplified Embedding Creation, and Dedicated Azure Deployment'
+slug: accelerate-enterprise-ai
+authors: [alvin, nenand]
+date: 2025-02-25
+tags: ['release']
+image: ./img/hero.png
+description: "Weaviate Embeddings is Now Generally Available, and Weaviate 1.29 is Officially Here! Read more about it in our launch announcement."
+---
+![hero](./img/hero.png)
+
+**TL;DR**
+
+* **Weaviate Embeddings General Availability (GA)**: Now natively hosted in Weaviate Cloud, featuring Snowflake’s Arctic Embed 1.5 and 2.0 for multilingual, high-performance vector embeddings.  
+* **94% Faster Keyword Search**: BlockMax WAND slashes BM25 query latency by up to 94%, making large-scale hybrid search more efficient.  
+* **Enterprise Security with RBAC GA**: Our fully supported Role-Based Access Control is now generally available, giving enterprises the granular access controls they need.  
+* **Multi-Vector Embeddings Preview:** Now enables both queries and objects to be represented by multiple vectors, unlocking more nuanced data understanding and improved search relevance.  
+* **Dedicated Enterprise Deployment on Azure**: Simplifies deployments for customers deploying on Microsoft Azure, offering private clusters, IP whitelisting, and seamless cloud management.  
+* **NVIDIA integrations:** Leverage NVIDIA's inference services and models directly within Weaviate.  
+* **March 5 Release Event**: Join the Weaviate 1.29 deep dive for a live walkthrough of new features—recording available if you can’t attend live.
+
+## What’s New in Weaviate?
+
+We’re excited to share major enhancements across both Weaviate Cloud and the newly released Weaviate 1.29—our latest product launch packed with features that accelerate enterprise AI adoption without steep learning curves or costly overhead.
+
+From boosting hybrid search performance to simplifying the creation of **vector embeddings**, Weaviate continues to blend innovation with practicality for teams building production-grade AI solutions. Have stringent security requirements? With **Role-Based Access Controls (RBAC)** and dedicated deployment in Microsoft **Azure,** organizations can more easily comply with enterprise standards. Below, you’ll find the highlights that make Weaviate a compelling choice for those looking to push the boundaries of AI-powered search, data ingestion, and security.
+
+## 94% Faster BM25 Keyword Search with BlockMax WAND
+
+Weaviate has proven billion-scale vector search with low latency, it now delivers the same high-performance level for BM25 keyword search. **BlockMax WAND** implementation supercharges Weaviate’s BM25 keyword search with up to a **94% reduction** in search latency**[^1]**, making large-scale keyword lookups faster and more efficient.
+
+By organizing parts of the keyword index with pre-computed statistics, it can skip irrelevant documents and compress data far more effectively. For enterprises handling billions of records, this translates to dramatically lower latency and significantly reduced storage requirements—**elevenfold compression in some tests[^2]**. Whether you’re searching across product catalogs, customer communications, or internal knowledge bases, the new algorithm helps your teams find relevant insights faster **without extra hardware or infrastructure overhead**.
+
+Best of all, once a user opts in, these performance gains work behind the scenes–applying only to data ingested after enabling–so there’s no need for developers to revamp existing applications. With **BlockMax WAND** in the **Weaviate 1.29** release, users gain a robust, future-proof search solution that scales to enterprise needs. For further insights into BlockMax WAND, explore our blog, "[BlockMax WAND: How Weaviate Achieved 10x Faster Keyword Search](/blog/blockmax-wand).”
+
+## Weaviate Embeddings: Now Generally Available
+
+**Weaviate Embeddings** is now generally available in **Weaviate Cloud**, taking a major leap forward from our initial [Preview](/blog/introducing-weaviate-embeddings). Designed to eliminate friction in AI data pipelines, it combines vector creation and storage into a single service. No more juggling external providers or hitting rate limits mid-project. You can ingest millions of data points at high throughput, all while co-locating models and vector indexes in Weaviate Cloud for optimal performance.
+
+A key addition to this release is **Snowflake’s Arctic Embed 2.0**—an open-source text embedding model that goes beyond its 1.5 predecessor with better multilingual support and **impressive benchmark results** (including [MTEB](https://arxiv.org/html/2412.04506v1)). Simply put, it handles large-scale, high-fidelity document ingestion while delivering more accurate semantic search. By running these embeddings natively in Weaviate Cloud, developers can focus on building next-gen AI applications instead of wrestling with model hosting or unwieldy data pipelines.
+
+For a deeper look at **Arctic Embed 2.0**’s journey, check out the latest [**Weaviate Podcast**](https://www.youtube.com/watch?v=Kjqv4uk3RCs&ab_channel=Weaviate%E2%80%A2VectorDatabase) featuring its creators from Snowflake. If you’re ready to get hands-on, visit our [**Quickstart tutorial**](/developers/wcs/embeddings#get-started), or get started with a free trial of [**Weaviate Cloud**](https://console.weaviate.cloud/).
+
+## Raising the Bar on Enterprise Security & Compliance
+
+Compliance is table stakes for enterprises, and it’s more vital than ever as AI-driven breaches are alarmingly on the rise. As industries like banking, insurance, and healthcare ramp up their AI use cases, the regulations around data privacy and security continue to tighten. That’s why Weaviate now provides **RBAC** as a fully supported, enterprise-grade feature in the **1.29 release**.
+
+**RBAC** gives you granular permission settings to secure your collections and tenants—ensuring that sensitive data, such as customer records or key business information, is accessible only to authorized users without cumbersome workarounds or bottlenecks. With custom or predefined roles, security and compliance teams can ensure that every user gets the right level of access, no more, no less. This approach not only reduces the risk of unauthorized data access but also streamlines compliance reporting and auditing, eliminating costly manual procedures.
+
+Executives, IT teams, and developers can now have more confidence to keep innovating, without compromising on compliance or risking data integrity.
+
+## Dedicated Deployment on Microsoft Azure 
+
+Many enterprises rely on **Microsoft Azure** for their core infrastructure. To meet these users where they are, Weaviate now offers an Azure Enterprise Provisioner for easy cloud deployment. This enables customers to have their Weaviate Cloud deployment (managed by Weaviate) run in an isolated Azure environment.
+
+For businesses prioritizing security and compliance, the provisioner ensures full resource isolation via a private cluster and IP whitelisting, plus deep integration with Azure’s security and **role-based access controls**. It also manages essential infrastructure, like backups, logs, and metrics, so teams spend less time on DevOps overhead and more time building AI-driven applications.
+
+For those operating on Azure, this opens the door for simpler procurement, reduced operational friction, and dedicated resource environments that match the strict requirements of heavily regulated sectors.
+
+## Enhanced AI Performance: Multi-Vector Embeddings, NVIDIA Integrations, and Asynchronous Replication
+
+With Weaviate 1.29, we’re taking innovation even further. **Multi-vector embeddings** now enable both queries and objects to be represented by multiple vectors, unlocking more nuanced data understanding and improved search relevance—especially for dense content like medical data or academic papers. Our integration with the **Jina AI ColBERT vectorizer** exemplifies this approach, supporting “late interaction” search techniques that deliver superior precision.
+
+In addition, Weaviate 1.29 introduces robust **NVIDIA integrations** – including text2vec-nvidia, multi2vec-nvidia, generative-nvidia, and reranker-nvidia – that seamlessly bring NVIDIA’s powerful inference engine into Weaviate. These modules simplify embedding creation, semantic search, and Retrieval Augmented Generation (RAG), empowering you to build sophisticated AI applications faster and more efficiently.
+
+Complementing these innovations, our new **asynchronous replication** feature seamlessly synchronizes data across nodes, further bolstering system reliability and performance in large-scale deployments.
+
+Together, these enhancements further elevate Weaviate’s position as a leading enterprise-grade vector database. For a deeper dive into the technical details, check out the [Weaviate 1.29 Release Highlights](/blog/weaviate-1-29-release) blog.
+
+## What’s Next
+
+That wraps up our look at the **new additions to Weaviate Cloud and the 1.29 release**—but the excitement doesn’t stop here. [Mark your calendars for **March 5**](https://link.mail.beehiiv.com/ss/c/u001.7ph1bOQkPnwamO4cv9f9A8jeYYCfFv91aBjGm1l9ezWQ7PVH1EkZELJKK7fxa04bAOWbwJ2WYaaD136yiCTURqFbSQBejldEJqvIuDUo_6B-tD8pmJCXebuYJUx0NpviZQxynvPP4vitQpuZ-FAOu-PKwROV8cNcnbceaxbz3yeDwP7eJ970d5IHv583qMine1EFPLJmWl2sido3qgWBFTMhCOUEyAYu4cGxr9A2HCc3T9hBBRkgPiIYpFO7yTd7RvVKZlMIxCGrcG3E8tCgaQ/4dz/v3dtMN9ZSJaBYAUu0KBytA/h20/h001.TrgKKrLVHWSDrE7WMueG26GAoMG46biDXDrp0keR2fU), where our experts will deliver a deep dive into new features like **multi-valued vectors**, **RBAC**, and more. If you can’t make the live session, don’t worry—we’ll share a recording so you won’t miss a thing.
+
+In the meantime, if you’re eager to get hands-on, check out the [**Quickstart tutorial**](/developers/wcs/embeddings#get-started), or explore [**Weaviate Cloud with a free trial**](https://console.weaviate.cloud/)—and experience for yourself how Weaviate can supercharge your AI applications. We’re excited to see what you build\!
+
+[^1]:  [BlockMax WAND: How Weaviate Achieved 10x Faster Keyword Search](/blog/blockmax-wand) (Fever dataset–5.4M documents): reduced search time from 517ms to 33ms. 
+
+[^2]:  [BlockMax WAND: How Weaviate Achieved 10x Faster Keyword Search](/blog/blockmax-wand) (MS Marco dataset–8.6M documents): from 10.53 GB to 0.92 GB, 77% fewer documents scored, 79% fewer blocks decompressed.
+
+import WhatsNext from '/_includes/what-next.mdx'
+
+<WhatsNext />
\ No newline at end of file
diff --git a/blog/2025-02-25-weaviate-1-29-release/_core-1-29-include 2.mdx b/blog/2025-02-25-weaviate-1-29-release/_core-1-29-include 2.mdx
new file mode 100644
index 0000000000..2e39b6a5d5
--- /dev/null
+++ b/blog/2025-02-25-weaviate-1-29-release/_core-1-29-include 2.mdx	
@@ -0,0 +1,100 @@
+Weaviate `1.29` brings a host of new features and improvements. It introduces multi-vector embedding support (preview) and new NVIDIA model support. Weaviate's role-based access control (RBAC) and async replication are now generally available. We've also made further improvements to the BlockMax WAND algorithm to speed up keyword and hybrid searches, among other enhancements.
+
+Here are the release ⭐️*highlights*⭐️!
+
+![Weaviate 1.29](./img/hero.png)
+
+- [Multi-vector embedding support (Preview)](#multi-vector-embedding-support-preview)
+- [NVIDIA model support](#nvidia-model-support)
+- [Role-based access control (RBAC) in GA](#role-based-access-control-rbac-in-ga)
+- [BlockMax WAND (Technical Preview)](#blockmax-wand-technical-preview)
+- [Async replication in GA](#async-replication-in-ga)
+
+## Multi-vector embedding support (Preview)
+
+:::caution 🚧 Technical Preview
+Multi-vector embedding support is added `v1.29` as a **technical preview**. This means that the feature is still under development and may change in future releases, including potential breaking changes. **We do not recommend using this feature in production environments at this time.**
+:::
+
+Weaviate now supports multi-vector embeddings, allowing you to store and query using multi-vector embeddings such as ColBERT, ColPali and ColQwen.
+
+![Single vs Multi-vector embedding comparison visualization](../../developers/weaviate/tutorials/_includes/single_multi_vector_comparison_light.png#gh-light-mode-only "Single vs Multi-vector embedding comparison visualization")
+![Single vs Multi-vector embedding comparison visualization](../../developers/weaviate/tutorials/_includes/single_multi_vector_comparison_dark.png#gh-dark-mode-only "Single vs Multi-vector embedding comparison visualization")
+
+This approach enables more precise searching through "late interaction" - a technique that matches individual parts of texts rather than comparing them as whole units.
+
+Using multi-vector embeddings can improve the quality of search results, especially for long texts or complex queries.
+
+The following visualization shows how late interaction works in a ColBERT model, in comparison to a single-vector model.
+
+![ColBERT late interaction vs single-vector visualization](../../developers/weaviate/tutorials/_includes/colbert_late_interaction_light.png#gh-light-mode-only "ColBERT late interaction vs single-vector visualization")
+![ColBERT late interaction vs single-vector visualization](../../developers/weaviate/tutorials/_includes/colbert_late_interaction_dark.png#gh-dark-mode-only "ColBERT late interaction vs single-vector visualization")
+
+This feature is available as a technical preview in `1.29`, so we're excited to hear your feedback and suggestions for further improvements.
+
+If you would like to try out multi-vector embeddings in Weaviate, check out the [Multi-vector embeddings tutorial](/developers/weaviate/tutorials/multi-vector-embeddings) which will take you end-to-end, for both:
+
+- [Using Jina AI ColBERT model integration](/developers/weaviate/tutorials/multi-vector-embeddings#option-1-colbert-model-integration), or
+- [Using user-provided multi-vector embeddings](/developers/weaviate/tutorials/multi-vector-embeddings#option-2-user-provided-embeddings).
+
+## NVIDIA model support
+
+Weaviate's suite of [model integrations](/developers/weaviate/model-providers/) now includes support for NVIDIA's NIM inference service.
+
+![Embedding integration illustration](../../developers/weaviate/model-providers/_includes/integration_nvidia_embedding.png)
+
+Weaviate users can now use NVIDIA model integration to [create text embeddings](/developers/weaviate/model-providers/nvidia/embeddings), [create multi-modal embeddings](/developers/weaviate/model-providers/nvidia/embeddings-multimodal), and use [generative AI models](/developers/weaviate/model-providers/nvidia/generative). (Reranker model support coming soon)
+
+These model integration pages provide detailed instructions on how to configure Weaviate with NVIDIA models and start using them in your applications.
+
+## Role-based access control (RBAC) in GA
+
+Role-based access control (RBAC) is now generally available in Weaviate `1.29`, offering more granular control over user permissions.
+
+The RBAC feature allows you to define roles and assign permissions to users based on their roles. This enables you to control who can access, read, write, or delete data in Weaviate.
+
+There have been a number of changes to the RBAC API in `1.29` from the preview API in `1.28`, some of which are breaking changes.
+
+This was done to make the API more consistent and easier to use, and to introduce new features. Keep also in mind that the RBAC features is still in development, and we have plans to add more features in the future.
+
+Refer to the [RBAC documentation](/developers/weaviate/configuration/rbac) for more information.
+
+## BlockMax WAND (Technical Preview)
+
+:::caution 🚧 Technical Preview
+BlockMax WAND algorithm is available in `v1.29` as a **technical preview**. This means that the feature is still under development and may change in future releases, including potential breaking changes. **We do not recommend using this feature in production environments at this time.**
+:::
+
+The BlockMax WAND algorithm continues to evolve in Weaviate `1.29` with further improvements to speed up BM25 and hybrid searches.
+
+It organizes the inverted index in blocks to enable skipping over blocks that are not relevant to the query. This can significantly reduce the number of documents that need to be scored, improving search performance.
+
+In our internal testing, we have seen up to a 10x speedup in keyword searches due to BlockMax WAND.
+
+If you are experiencing slow BM25 (or hybrid) searches, try enabling BlockMax WAND to see if it improves performance.
+
+To read more about BlockMax WAND, and to try it out, refer to the [Indexing page](/developers/weaviate/concepts/indexing#blockmax-wand-algorithm).
+
+**To use BlockMax WAND in Weaviate `v1.29`, it must be enabled prior to collection creation.** As of this version, Weaviate will not migrate existing collections to use BlockMax WAND.
+
+## Async replication in GA
+
+For those of you using Weaviate in a distributed environment, async replication is now generally available in `1.29`.
+
+When each shard is replicated across multiple nodes, async replication guarantees that all nodes holding copies of the same data remain in sync by periodically comparing and propagating data.
+
+Async replication supplements the existing repair-on-read mechanism. If a node becomes inconsistent between sync checks, the repair-on-read mechanism catches the problem at read time.
+
+To activate async replication, set `asyncEnabled` to true in the [`replicationConfig` section of your collection definition](/developers/weaviate/manage-data/collections#replication-settings). Visit the [How-to: Replication](/developers/weaviate/configuration/replication#async-replication-settings) page to learn more about the available async replication settings, and [Concepts: Replication/Consistency](/developers/weaviate/concepts/replication-architecture/consistency) for more information on how async replication works.
+
+## Summary
+
+Ready to Get Started?
+
+Enjoy the new features and improvements in Weaviate `1.29`. The release is available open-source as always [on GitHub](https://github.com/weaviate/weaviate/releases/tag/v1.28.0), and will be available for new Sandboxes on [Weaviate Cloud](https://console.weaviate.cloud/) very shortly.
+
+For those of you upgrading a self-hosted version, please check the [migration guide](/developers/weaviate/more-resources/migration#general-upgrade-instructions) for detailed instructions.
+
+It will be available for Serverless clusters on Weaviate Cloud soon as well.
+
+Thanks for reading, see you next time 👋!
diff --git a/blog/2025-02-25-weaviate-1-29-release/index 2.mdx b/blog/2025-02-25-weaviate-1-29-release/index 2.mdx
new file mode 100644
index 0000000000..112e15b260
--- /dev/null
+++ b/blog/2025-02-25-weaviate-1-29-release/index 2.mdx	
@@ -0,0 +1,18 @@
+---
+title: Weaviate 1.29 Release
+slug: weaviate-1-29-release
+authors: [jp]
+date: 2025-02-25
+image: ./img/hero.png
+tags: ['release', 'engineering']
+description: "Read about multi-vector embedding support, improved keyword/hybrid searches, role-based access control and async replication going GA, new nvidia modules, and more."
+
+---
+
+import Core129 from './_core-1-29-include.mdx';
+
+<Core129 />
+
+import WhatsNext from '/_includes/what-next.mdx'
+
+<WhatsNext />
diff --git a/blog/2025-02-26-blockmax-wand/index 2.mdx b/blog/2025-02-26-blockmax-wand/index 2.mdx
new file mode 100644
index 0000000000..59b8780cef
--- /dev/null
+++ b/blog/2025-02-26-blockmax-wand/index 2.mdx	
@@ -0,0 +1,255 @@
+---
+title: 'BlockMax WAND: How Weaviate Achieved 10x Faster Keyword Search'
+slug: blockmax-wand
+authors: [amourao, jp]
+date: 2025-02-26
+tags: ['search', 'concepts', 'engineering']
+image: ./img/hero.png
+description: "How Weaviate achieved 10x Faster Keyword Search and 90% index compression"
+---
+![BlockMax WAND: How Weaviate Achieved 10x Faster Keyword Search](./img/hero.png)
+
+
+Keyword search is an integral part of Weaviate’s [hybrid search](/blog/hybrid-search-explained), designed to return [best of both](/blog/hybrid-search-fusion-algorithms) vector and keyword search.
+Hybrid search as a tool for [RAG](/blog/introduction-to-rag) and [Agentic AI](/blog/ai-agents) increases the breadth and depth of information that can be retrieved from a dataset, but it comes with its own challenges.
+
+As the text corpora size becomes larger and larger, keyword searches can take long times to execute compared to vector searches.
+In this blog post, you will learn how we improved Weaviate’s inverted index, how we avoid scoring all documents that have the query terms, how we compress the inverted index, and more about the improvements from using BlockMax WAND in Weaviate.
+
+:::caution 🚧 Technical Preview
+BlockMax WAND algorithm is available in `v1.29` as a **technical preview**. This means that the feature is still under development and may change in future releases, including potential breaking changes. **We do not recommend using this feature in production environments at this time.**
+[Instructions on how to enable it are available here.](/developers/weaviate/concepts/indexing#blockmax-wand-algorithm)
+:::
+
+
+## Inverted Index and Tokenization
+
+Keyword search works by comparing the terms in your queries to the terms in your database documents, giving higher scores to rarer terms and terms that show up more often in your documents.
+In keyword search context, these `terms` are called `tokens`, but we'll use the terms interchangeably in this blog post.
+
+Keyword search requires us to first define what terms we want to search. 
+A big part of this process is [tokenization](/developers/academy/py/tokenization/basics). It splits the input documents into [tokens](https://en.wikipedia.org/wiki/Lexical_analysis#Token) (i.e. terms, numbers or anything else one would consider important to be searched individually).  
+In this simple example, consider a dataset made of three documents with a single property, title, composed of a single sentence and a *whitespace* tokenizer that lowercases the documents and splits them by whitespace. 
+
+| Doc Id | Document Title | Tokenized Title |
+| :---- | :---- | :---- |
+| 1 | A Web Developer's Guide to Hybrid Search | \[“a”, “web”, “developer", "s”, “guide”, “to”, “hybrid”, “search”\] |
+| 2 | Unlocking the Power of Hybrid Search | \[“unlocking”, “the”, “power”, “of”, “hybrid”, “search”\] |
+| 3 | Vector Library versus Vector Database | \[“vector”, “library”, “versus”, “vector”, “database”\] |
+
+**Table**: Example dataset with tokenized titles.
+
+Now we have turned documents into a [bag-of-words](https://en.wikipedia.org/wiki/Bag-of-words_model) model, where we can find for individual query terms in the sentences. But having to go through all documents to find the ones that have query terms isn't efficient.   
+
+This is where the *inverted* part of the [inverted index](https://en.wikipedia.org/wiki/Inverted_index) comes from: instead of going document \-\> term, create an index that does from term \-\> documents. 
+It works like the indexes at the end of books, but instead of mapping terms to book pages, we map terms to documents using posting lists.
+
+A posting list is a list of "postings", which contain the information needed to score documents:
+* doc id: to identify which documents have the term.
+* [term frequency (tf)](https://en.wikipedia.org/wiki/Tf%E2%80%93idf), which represents the number of times the term is part of the property. For example, `tf("vector")` for doc 3 is 2, as it shows up twice.
+
+| Term | Posting List |
+| :---- | :---- |
+| hybrid | (Doc 1, tf: 1); (Doc 2, tf: 1) |
+| search | (Doc 1, tf: 1); (Doc 2, tf: 1) |
+| vector | (Doc 3, tf: 2) |
+
+**Table**: Posting lists for terms `hybrid`, `search`, and `vector` in the example dataset.
+
+When a user searches for a query, we tokenize the query in the same way we tokenized the documents.
+Thus, if you want to search for `"Hybrid search or Vector search"`, we get the tokens `["hybrid", "search", "or", "vector"]`.
+Inspecting the posting lists for each token, we can see that documents 1, 2, and 3 have at least one of the query tokens.
+But we still need to score them to see which ones are the most relevant to the query.
+
+## tf-idf and BM25
+
+Not all terms are created equal. In our examples, words like "hybrid," "vector," and "database" are more informative than "a," "to," or "the." To rank results meaningfully, we need to score documents based on:
+[idf (Inverse Document Frequency)](https://en.wikipedia.org/wiki/Tf%E2%80%93idf) is a measure of this importance, based on the number of documents a term appears in compared to the total number of documents. Higher values mean rarer terms that will contribute more to the score of a document. Combined with the tf, it becomes the cornerstone of keyword search, [tf-idf](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).
+
+[BM25](/blog/hybrid-search-explained#bm25) further refines tf-idf by applying property length and frequency saturation normalization.
+
+## Exhaustive Search 
+
+The exhaustive way of computing the [BM25](/blog/hybrid-search-explained#bm25) scores would be to check all the documents that have at least one of the query terms and score them.
+
+But this is quite resource intensive; most searches are  for the top 10-100 results, and even with pagination, at most, only about 100 documents end up being shown to the user for each search. 
+This means that if 100 000 documents have at least one of the query terms (normal for queries with common words in databases with 1 million documents), this is 0.1% of the documents, many of which are completely irrelevant to the query, wasting a lot of CPU and I/O resources.
+
+## WAND
+
+[WAND (Weak AND)](https://dl.acm.org/doi/abs/10.1145/956863.956944) takes the inverted index and idf to greatly reduce the number of documents we need to inspect when searching for the top-*k* documents that match a query.  
+It relies on two step search, to avoid ranking all documents for top-k search.
+
+* Approximate evaluation over query term postings in parallel to identify candidate docs with max impact heuristics (based on idf);  
+* Promising candidates are fully evaluated, their exact scores are computed and they are added to the top-*k* results if the scores are higher than the lowest score.
+
+Max impact is the maximum score a term can contribute to the score.  
+Its upper bound is the idf, e.g. a document with only the term *vector* will have a max impact equal to vector’s idf. 
+
+* As we start to rank, we add enough documents to fill the top-*k* list;  
+* When we get *k* candidates, the list is full and we have a **lower bound** to beat, which is the score of the lowest ranked document;  
+* As we move forward, we can then start skipping documents where the sum of the max impacts of its terms, is lower than the lower bound.
+
+WAND is what currently powers keyword search at Weaviate. 
+The following section will show why we are excited to introduce BlockMax WAND.
+
+## BlockMax WAND
+
+While WAND works well and is already able to greatly reduce the number of documents that we need to inspect, it still has some limitations: it relies on a single global value of idf for all documents in a term, which relies on the assumption that there may be a single document that just has that term.  
+[BlockMax WAND (BMW)](https://dl.acm.org/doi/10.1145/2009916.2010048) is WAND on steroids:
+* Divides posting lists into blocks with local max impact;
+* Skips and avoids decoding doc ids in blocks.
+
+BMW can be viewed as a *meta*-WAND, where we perform document skips at block level using max impact (shallow advances), and use the combination of max doc ids from blocks to even avoid loading whole blocks from disk.
+
+This table shows an example posting, as output by BlockMax WAND. You may notice that this includes some additional elements compared to postings for WAND shown above.
+
+![BlockMax WAND Block example](./img/block_example.png)
+
+A block is a mini posting list (list of doc ids and tfs) with its own metadata:
+* **Max doc id**: highest doc id that shows up in the block;
+* **Max impact**: maximum possible score for a document in the block; for tf-idf, this represents the maximum tf of a document within the block (norm tf equals tf/prop length).
+
+
+| Dataset | WAND | BlockMax WAND |
+| :---- | -----: | ----: |
+| MS Marco (8.6M docs) | 15.1% | 6.7% (-56%) |
+| Fever (5.4M docs) | 20.8% | 8.4% (-60%) |
+| Climate Fever (5.4M docs) | 29.3% | 12.2% (-58%) |
+
+**Table**:  Average % of doc query terms scored Weaviate `v1.29.0` on standard [beir datasets](https://github.com/beir-cellar/beir) without stopword removal. 
+Exhaustive search always needs to examine **100%** of the document query terms with at least one query term.
+
+The experimental results show that BlockMax WAND is able to further halve the number of documents inspected from the already remarkable **15-30%** number of terms scored to **5-15%**.
+But how does BlockMax WAND work in practice?
+
+### BlockMax WAND Demo
+
+At Weaviate, we like to show, not just tell. That's why we've created a **demo** to show you exactly how BlockMax WAND works in practice!
+* **Input your documents, queries, and search parameters** and see exactly how Exhaustive search, WAND, and BlockMax WAND work;
+* **Get the metrics on the number of documents and blocks scored**, and see the improvements of BlockMax WAND vs. WAND and Exhaustive search;
+* **Share your dataset and queries** with others to show the improvements!
+
+<iframe src="https://andremourao.com/wand/" width="100%" height="1200px" />
+
+
+
+## Varenc Doc ID and Term Frequency Compression
+
+For scoring using tf-idf or BM25, we need to load the term frequencies and doc ids for each term in the query.
+This is a lot of information to store and decode for each query:
+* For 1 million documents with 100 terms each, we would need to store and decode 100 million term frequencies and doc ids;
+* Term frequencies are usually in the 1-100 range, with no hard ceiling. Assume they may grow all the way to `2^32`. Storing them as-is would take 32 bits * number of documents for each term;
+* Doc ids are 64 bit unsigned integers, and we need to store them for each document that has the term.
+
+At 4 and 8 bytes each respectively, `100 000 000 * 12 = 1.2 GB` of data, without considering the overhead of the inverted index structure.
+Can we compress this data?
+
+Consider this set of four term frequencies: 12, 3, 100, 1\. The largest number on this posting list is 100, which can be represented in binary as `floor(log2(100)) + 1 = 7 bits`, instead of the full 32 bits. What if we just store the values as 7 bits instead of 32 bits, keeping an extra 6 bits (enough to store all possible bit counts) to store the per value bit count? This process can be described as a type of [variable-length coding](https://en.wikipedia.org/wiki/Variable-length_code) we’ll call varenc.
+
+| 32 bit term freqs | Compressed data |
+| :---- | :---- |
+| 12, 3, 100, 1 | (7), 12, 3, 1, 3 |
+| `4 * 32 bits = 128 bits` | `6 + 4 * 7 bits = 79 bits` |
+
+**Table**: Example of term frequency compression.
+
+Even with the extra 6 bits to store the bit count, we are still able to reduce the number of bits needed from 128 to 79 bits. The compression gains translate directly to larger lists, but one needs to be mindful, as a single high value will increase the bit count 
+
+What if we apply the same logic to doc ids? At Weaviate, doc ids are implemented as 64 bit unsigned integers that uniquely identify documents. They are associated with the external facing doc UUID, and simplify internal data management.  
+   
+Doc ids are monotonically increasing; so they are in the same order of magnitude of the size of your data.  
+Consider the following set of doc ids from a million scale index: `1000000`, `1000003`, `1000004`, `1000007`.   
+Using the same logic as with tfs, `floor(log2(1000000)) + 1 = 20`, meaning that we’ll need 20 bits to store the ids. Following the previous process, `6 + 4 * 20 = 86 bits`.
+
+Better than the full `4 * 64 = 256 bits` needed to store them uncompressed, but there are some properties we can take advantage of to improve compression efficiency.  
+Doc ids are always stored in ascending order for WAND and BlockMax WAND to work. This property means that we can use [delta encoding](https://en.wikipedia.org/wiki/Delta_encoding) and store the differences between consecutive ids, instead of the ids themselves.
+
+| Raw data (64 bit doc ids) | Delta encoding |
+| :---- | :---- |
+| 1000000, 1000003, 1000004, 1000007 | 1000000, (1000003-1000000), (1000004-1000003), (1000007-1000004) = 1000000, 3, 1, 3 |
+
+**Table**: Example of doc id delta encoding.
+
+1000000, 3, 1, 3 looks more compressible, but the first value of the list will still be larger than the differences. So, we store the first value uncompressed as 64 bit, and compress only the diffs values with the same process as before.
+
+| Delta encoded | Compressed data |
+| :---- | :---- |
+| 1000000, 3, 1, 3 | **1000000**, (*2\)*,  3, 1, 3 |
+| <ul><li>Encode all as 64 bit</li><li>`64 * 4 = 256 bits`</li></ul> | <ul><li>64 bits: encode 1000000 fully</li><li>6 bits: number of bits needed to represent deltas (2 in this example)</li><li>2 bits: delta encoded values</li><li> `64 + 6 + 2*3 = 76 bits`</li></ul> | 
+
+**Table**: Example of doc id compression.
+
+Even with the extra 6 bits to store the bit count and encoding the full value of the first value, we are still able to reduce the number of bits needed from 256 to 76 bits, which is better than doing without delta encoding at 86 bits.  
+Larger lists will have a better compression ratio, as the proportion of the bits needed to store the first value is already taken into account, and we’ll only need to store the compressed deltas.
+
+The last efficiency gain comes from the block index: as posting lists are sorted and divided into blocks of documents, we will avoid most scenarios where we would have to encode document 1 and document 1000000 together and save a lot of bits in the process. 
+
+At Weaviate, this is done for blocks of 128 documents:
+
+* **doc ids** are packed together using delta encoding \+ varenc compression;
+* **tfs** are packed together using varenc compression;
+* **block metadata** (max impact \+ max doc ids) is stored in a separate structure. During BlockMax WAND, we use this separate metadata to check if we even need to load a block from disk by checking the max impact and id. If not, we avoid expensive disk reads and provide a more memory, CPU and I/O effective keyword search.   
+
+Combined with a new way of representing deleted documents and more efficient property length storage, we are able to reduce the size of the inverted indices on disk by **between** **50 and 90%** (2-10x smaller files), depending on the data distribution (longer properties with fewer unique terms will compress better).
+
+| Dataset | WAND | BlockMax WAND |  
+|:--------|------:|--------------:|
+| MS Marco (8.6M docs) | 10531 MB | 941 MB (-91%) |
+| Fever/Climate Fever (5.4M docs) | 9326 MB | 1175 MB (-87%) |
+
+**Table**: Comparing the size of `searchable` folders of Weaviate `v1.29.0` on standard [beir datasets](https://github.com/beir-cellar/beir). 
+
+These auxiliary structures and compression techniques mean that we cannot use BlockMax WAND to its full potential without changing the way data is represented. Thus, as of `v1.29.0`, Weaviate only supports BlockMax WAND search for new collections and with the proper env vars enabled. We are working on a way to transparently migrate existing databases.
+
+## Results
+
+The compression ratios and reduced number of documents scored look impressive, but how does it translate into performance?   
+Our experiments show that average p50 (median) query time was reduced to 10-20% of the original query time. 
+
+| Dataset | WAND | BlockMax WAND |
+|:--------|------:|--------------:|
+| MS Marco (8.6M docs) | 136 ms | 27 ms (-80%) |
+| Fever (5.4M docs) | 517 ms | 33 ms (-94%) |
+| Climate Fever (5.4M docs) | 712 ms | 87 ms (-88%) |
+
+**Table**: Average p50 query time in ms for Weaviate `v1.29.0` on standard [beir datasets](https://github.com/beir-cellar/beir) using the test queries without stopword removal. Experiments performed on a Apple M3 Max 36 GB. Similar gains observed on x86-64 cloud machines
+
+On internal tests with 100M documents, we were able to increase from 1 to 50 QPS, while maintaining a p50 between 100-200 ms and p99 at 1000 ms. 
+
+
+## Glossary of Terms
+
+* **Inverted Index:** [Data structure mapping terms to documents](https://en.wikipedia.org/wiki/Inverted_index)
+* **Posting List:** [List of documents containing a specific term with metadata like frequency](https://en.wikipedia.org/wiki/Inverted_index#Inverted_file)
+* **Term Frequency (tf):** [How often a term appears in a document](https://en.wikipedia.org/wiki/Tf%E2%80%93idf)
+* **Inverse Document Frequency (idf):** [Measure of term rarity across the document collection](https://en.wikipedia.org/wiki/Tf%E2%80%93idf)
+* **BM25:** [Scoring algorithm that enhances tf-idf with document length normalization](https://en.wikipedia.org/wiki/Okapi_BM25)
+* **WAND:** [Algorithm that optimizes top-k retrieval using upper bound score estimates](https://dl.acm.org/doi/abs/10.1145/956863.956944)
+* **BlockMax WAND:** [Enhanced WAND that organizes posting lists into blocks for more efficient scoring](https://dl.acm.org/doi/10.1145/2009916.2010048)
+* **Delta Encoding:** [Storing differences between consecutive values rather than absolute values](https://en.wikipedia.org/wiki/Delta_encoding)
+* **Variable-length Encoding:** [Using only the minimum number of bits needed to represent a value](https://en.wikipedia.org/wiki/Variable-length_code)
+
+## Summary
+
+BlockMax WAND significantly improves Weaviate's document scoring speed by optimizing the inverted index and using advanced compression techniques. It reduces the number of documents inspected during keyword searches, leading to faster query times and reduced disk space usage.
+
+Key improvements include:
+
+* **BlockMax WAND:** Divides posting lists into blocks with local max impact, enabling more efficient skipping and avoiding unnecessary data loading;
+* **varenc Compression:** Compresses term frequencies and doc IDs, reducing storage requirements by **50-90%**. Delta encoding further enhances doc ID compression;
+* **Performance Gains:** Reduces p50 query times to **10-20%** of the original, and significantly increases Queries Per Second (QPS).
+
+This is the first set of steps towards making Weaviate's hybrid search billion scale ready. Stay tuned for more improvements in the future!
+These optimizations make keyword searches in Weaviate faster and more resource-efficient.
+
+:::caution 🚧 Technical Preview
+BlockMax WAND algorithm is available in `v1.29` as a **technical preview**. This means that the feature is still under development and may change in future releases, including potential breaking changes. **We do not recommend using this feature in production environments at this time.**
+[Instructions on how to enable it are available here.](/developers/weaviate/concepts/indexing#blockmax-wand-algorithm)
+:::
+
+
+import WhatsNext from '/_includes/what-next.mdx'
+
+<WhatsNext />
+
diff --git a/blog/2025-03-04-weaviate-agents/index 2.mdx b/blog/2025-03-04-weaviate-agents/index 2.mdx
new file mode 100644
index 0000000000..24db9c5938
--- /dev/null
+++ b/blog/2025-03-04-weaviate-agents/index 2.mdx	
@@ -0,0 +1,63 @@
+---
+title: 'Welcome to the Next Era of Data and AI: Meet Weaviate Agents'
+slug: weaviate-agents
+authors: [alvin]
+date: 2025-03-04
+tags: ['release', 'agents']
+image: ./img/hero.png
+subtitle: "Simplify data pipelines, improve user experiences, and power intelligent applications with agentic workflows"
+description: "We’re excited to announce Weaviate Agents, a new suite of agentic services in Weaviate designed to simplify data orchestration and accelerate generative AI development."
+---
+![Welcome to the Next Era of Data and AI: Meet Weaviate Agents](./img/hero.png)
+
+For more time than I’d like to admit, I’ve been immersed in data; how people use it and get value from it—both as an engineer and as a product leader. During that time, I’ve come to realize that most organizations need to harness powerful machine intelligence to get to the next level of data insights. They are often held back by fragmented tooling, rigid data layers and schemas from hell. Even with the latest innovations in AI, teams can have the world’s greatest data sources but have to spend all their time bolting parts together to make it run before they can get to insights.
+
+I’m happy to share how Weaviate is simplifying the entire process of orchestrating generative AI applications. [Weaviate Agents](/developers/agents), our new suite of agentic services, embody a mindset shift in data management and AI development. For us this is not just another feature release, it’s a window into how we see AI-driven data experiences evolving in the near future.
+
+## The Evolution of Database Interaction
+
+SQL first set the standard for database queries and became its lingua franca. It was powerful if your data was perfectly structured, you knew the schema inside out and the data patterns within that schema. ORMs (Object-Relational Mappers) provided an abstraction away from writing SQL, which reduced some of the burden but did not solve the basic problem: what is the data, what are the inferred relationships and what are the semantic relationships. Next came RAG (Retrieval-Augmented Generation), opening up entirely new ways to search unstructured data using vectors. But that left a huge gap between structured, unstructured and even the multi-modal worlds of data.
+
+Weaviate Agents are domain experts of Weaviate APIs but also your schema and data stored in Weaviate. They can interpret natural language instructions, automatically figure out the underlying searches or transformations, and then chain tasks together. They’re pre-trained on Weaviate’s APIs, making them fluent in “how to do data work” within Weaviate—without special syntax or data engineering needed. 
+
+## Meet the First Three Weaviate Agents
+
+Weaviate Agents leverage Weaviate’s vector database and LLMs for data storage, retrieval, and transformations. This turnkey approach means you have less steps to manage in your data pipeline. By delegating tasks to Agents—whether you’re querying, transforming, or personalizing data—you drastically reduce operational overhead, minimize error-prone scripts, and get to insights (or user impact) faster.
+
+1. **Query Agent:** Think of the [Query Agent](https://weaviate.io/product/query-agent) like a concierge for your data. You submit a question in natural language—it decides which information is relevant, formulates a search or aggregation behind the scenes, retrieves the results, and even ranks them for you. By removing the need to piece together APIs and write elaborate prompts, you can focus on the business logic of your application instead of the minutiae.
+
+2. **Transformation Agent:** Many of us have spent countless hours writing or rewriting scripts to clean up, label, or augment data. With the [Transformation Agent](https://weaviate.io/product/transformation-agent), all of that complexity is reduced into a simple prompt. Need to translate product descriptions into five languages? Done. Want to re-categorize your entire user base with updated taxonomy? Also done. Create a new product description for a Gen-Z audience? Yes, even that.
+
+3. **Personalization Agent:** Personalization is no longer a “nice-to-have”, it’s core to user experience. The [Personalization Agent](https://weaviate.io/product/personalization-agent) can dynamically recommend or re-rank results based on user behavior and preferences, helping you transcend static recommendation engines, elaborate reranking rules and weights and deliver real-time, context-aware experiences.
+
+## GenAI 2.0: Why Agentic Architectures, Why Now?
+
+With the model ecosystem moving at a break-neck pace, the doors are opening to the reality of Agentic AI. Where semi and fully-automonous systems can reason and execute tasks. This new era will require a new level of data sophistication and orchestration. But the common frustration for engineering and data teams still surfaces: even the best AI models won’t deliver real-world impact if you’re stuck wrestling with fragmented systems. 
+
+Weaviate Agents simplify the complex data workflows required for the next generation of AI. They enhance Weaviate’s “batteries-included” stack that minimizes operational overhead for building and maintaining AI and agentic applications:
+
+* **AI-Native Vector Database:** Storage and retrieval of structured and unstructured data at scale, with powerful vector and hybrid search built-in.   
+* **Weaviate Embeddings:** Built-in embedding service that simplifies the process of creating high-quality vector embeddings.   
+* **Weaviate Agents:** Pre-trained agentic workflows for completing complex data tasks.
+
+## Weaviate Agents: A Practical Example
+
+Now let's look at how this all might look in practice.
+
+Imagine you work for an e-commerce marketplace where users search for products with queries like “Red summer dresses between $45 and $95.” While a vector search might retrieve red summer dresses, it won’t filter results by price because embedding models don’t inherently understand filtering constraints. To bridge this gap, companies typically build a query understanding pipeline that interprets user intent, extracts relevant filters, and constructs an appropriate database query. However, building this pipeline requires time and deep subject matter expertise. With the **Query Agent**, this functionality comes out of the box. It understands both Weaviate’s query architecture and your data model, allowing you to accurately translate user queries into precise, filterable searches combined with semantic search—without the overhead of custom pipeline development.
+
+Following on from the previous example, your company wants to utilize the social content that has been created about your products. We want to take all those reviews and summarize them to create a new top level description of your products. With the **Transformation Agent**, you can automatically use the data you have collected, and with a simple natural language prompt create the new description and insert the new value - instantly making it available for semantic search! 
+
+Now, at that same e-commerce marketplace, you want to improve user retention and enhance the shopping experience by delivering personalized search results. Instead of offering a one-size-fits-all ranking, the **Personalization Agent** leverages user context and past interactions to dynamically rerank results, ensuring customers see the most relevant products first.
+
+So this is an obvious e-commerce set of use cases, but Weaviate Agents are intended to operate on any data stored, making them applicable to any industry where you need to simplify your interactions with data \- either to find better insights, create and augment the data you have, or get better ranked results. Weaviate Agents optimize results with less manual effort and need for complex infrastructure. Developers can focus on innovation while Weaviate handles the heavy lifting.
+
+## What’s Next?
+Weaviate Query Agent is now available in Public Preview. [Read the docs](/developers/agents/query), and access it via [Weaviate Serverless Cloud](https://auth.wcs.api.weaviate.io/auth/realms/SeMI/protocol/openid-connect/auth?client_id=wcs-frontend&scope=openid%20email%20profile&response_type=code&redirect_uri=https%3A%2F%2Fconsole.weaviate.cloud%2Fapi%2Fauth%2Fcallback%2Fkeycloak&state=hq8gLNGjVQliM-J80i5kzrala69zk1xLcqTjIiC7Ofc&code_challenge=bMCkpkeP7Ep6sthJIdGiq-gFUekWPBTbczgXo3Rw8jI&code_challenge_method=S256) or our free 14-day sandbox. We’ll be sharing technical tutorials and developer enablement materials for Query Agent in the coming days. Keep an eye on our blog and follow us on [LinkedIn](https://www.linkedin.com/company/weaviate-io/) for the latest. 
+
+The Innovation Lab is always cooking, so stay tuned for updates. Transformation and Personalization Agents are up next! 
+
+import WhatsNext from '/_includes/what-next.mdx'
+
+<WhatsNext />
+
diff --git a/blog/2025-03-06-agentic-workflows/index 2.mdx b/blog/2025-03-06-agentic-workflows/index 2.mdx
new file mode 100644
index 0000000000..e8822d0f4b
--- /dev/null
+++ b/blog/2025-03-06-agentic-workflows/index 2.mdx	
@@ -0,0 +1,266 @@
+---
+title: 'What Are Agentic Workflows? Patterns, Use Cases, Examples, and More'
+slug: what-are-agentic-workflows
+authors: [mary, prajjwal, leonie, tuana]
+date: 2025-03-06
+tags: ['agents', 'concepts']
+image: ./img/hero.jpg
+description: "Agentic workflows give AI agents structure, purpose, and adaptability. This article breaks down their components, patterns, and practical applications."
+---
+![What Are Agentic Workflows? Patterns, Use Cases, Examples, and More](./img/hero.jpg)
+
+AI agents. Agentic AI. Agentic architectures. Agentic workflows. Agents are everywhere. But what are they really? And can they actually do anything?
+
+New technology brings with it a muddled mixture of confusing terminology, wild expectations, and self-proclaimed online experts. In this article, we cut through the noise and hype surrounding AI agents to explain and illustrate a critical tenet of agentic AI: agentic workflows.
+
+Agents, completely on their own, can’t do much. They need to be given roles, goals, and structure to achieve their goals. This is where workflows come in.
+
+Understanding agentic workflows allows you to understand how and why AI agents operate as they do. To help you get there, we’ll go through the key components of AI agents, give you a concise definition of agentic workflows, explain what makes a workflow agentic, elaborate on key recurring patterns in agentic workflows, detail real-world examples and use cases, and give an honest overview of the benefits and challenges to using agentic workflows.
+
+## What are AI agents?
+
+**AI agents** are systems that combine LLMs for reasoning and decision-making with tools for real-world interaction, enabling them to complete complex tasks with limited human involvement. Agents are assigned specific roles and given varying degrees of autonomy to accomplish their end goal. They are also equipped with memory, allowing them to learn from past experiences and enhance their performance over time.
+
+For a more in-depth explanation of AI agents, their history, and tools for building them, check out our recent blog post, [Agents Simplified: What we mean in the context of AI](/blog/ai-agents).
+
+To better understand how AI agents fit into agentic workflows, we’ll explore the core components of AI agents.
+
+### Components of AI Agents
+
+Although AI agents are designed for semi-autonomous decision-making, they rely on a larger framework of components to function properly. This framework consists of LLMs that enable the agent to reason effectively, tools that help the agent complete its tasks, and memory that allows the agent to learn from past experiences and improve responses over time.
+
+![components-of-ai-agents.jpg](./img/components-of-ai-agents.jpg)
+
+#### Reasoning
+
+Part of what make AI agents so effective is their capacity for iterative reasoning, essentially allowing the agent to actively “think” throughout the entire problem-solving process. The reasoning capabilities of an AI agent stem from its underlying LLM and serve two primary functions: planning and reflecting.
+
+In the **planning** phase, the agent performs **task decomposition**, the process of breaking down a more complex problem into smaller, actionable steps. This technique allows for agents to approach tasks systematically and allows them to use different tools for different tasks. It also allows for **query decomposition**, in which complex queries are broken down into simpler queries, which improves the accuracy and reliability of responses from the LLM.
+
+Agents also reason through **reflecting** on the outcomes of their actions. This allows them to evaluate and iteratively adjust their plan of action based on results and data pulled from external sources.
+
+#### Tools
+
+LLMs possess static, parametric knowledge, meaning their understanding is confined to the information encoded during training. To expand their capabilities beyond their original dataset, agents can leverage external **tools**, like web search engines, APIs, databases, and computational frameworks. This means that the agent has access to real-time external data to guide its decision-making and accomplish tasks that require it interact with other applications.
+
+Tools are often paired with permissions, such as the ability to query APIs, send messages, or access specific documents or database schemas. The table below outlines several common tools for AI agents along with the tasks they perform.
+
+| **Tool** | **Task** |
+| --- | --- |
+| Internet search | Retrieve and summarize real-time information. |
+| Vector search | Retrieve and summarize external data. |
+| Code interpreter | Iteratively run code generated by agents. |
+| API | Retrieve real-time information and perform tasks with external services and applications. |
+
+When the LLM selects a tool to help achieve a task, it engages in a behavior called **function calling**, extending its capabilities beyond simple text generation and allowing it to interact with the real-world.
+
+The choice of which tool to use can be predetermined by the end user or be left to the agent. Letting the agent dynamically select tools can be helpful for solving more complex tasks but can add unnecessary complexity for simpler workflows, when predefined tools would be more efficient.
+
+#### Memory
+
+Learning from past experiences and remembering the context in which actions take place are part of what set agentic workflows apart from purely LLM-driven workflows. **Memory** is a key component that enables the capture and storage of context and feedback across multiple user interactions and sessions. Agents have two main types of memory: short-term memory and long-term memory.
+
+**Short-term memory** stores more immediate information like conversation history, which helps the agent determine which steps to take next to complete its overall goal. **Long-term memory** stores information and knowledge accumulated over time, throughout multiple sessions, allowing for personalization of the agent and improved performance over time.
+
+## What are Agentic Workflows?
+
+In general, a **workflow** is a series of connected steps designed to achieve a specific task or goal. The simplest types of workflows are deterministic, meaning they follow a predefined sequence of steps and are unable to adapt to new information or changing conditions. For example, an automated expense approval workflow could look like this: “if expense is tagged as ‘Food and Meals’ and is less than $30, automatically approve.”
+
+Some workflows, however, leverage LLMs or other machine learning models or techniques. These are often referred to as **AI workflows**, and can be agentic or non-agentic. In a non-agentic workflow, a LLM is prompted with an instruction and generates an output. For example, a text summarization workflow would take a longer passage of text as its input, prompt a LLM to summarize it, and simply return the summary. However, just because a workflow uses a LLM, doesn’t necessarily mean that it’s agentic.
+
+An **agentic workflow** is a series of connected steps *dynamically executed by an agent*, or series of agents, to achieve a specific task or goal. Agents are granted permissions by their users, which  give them a limited degree of autonomy to gather data, perform tasks, and make decisions to be executed in the real-world. Agentic workflows also leverage the core components of AI agents including, their capacity for reasoning, ability to use tools to interact with their environment, and persistent memory to completely transform traditional workflows into responsive, adaptive, and self-evolving processes.
+
+![types-of-workflows.jpg](./img/types-of-workflows.jpg)
+
+### What makes a workflow agentic?
+
+An AI workflow becomes agentic when one or more agents guide and shape the progression of tasks. Adding agents to an existing non-agentic workflow creates a hybrid approach that combines the reliability and predictability of structured workflows with the intelligence and adaptability of LLMs. Agentic workflows are defined by their ability to:
+
+- **Make a plan.** An agentic workflow starts with planning. The LLM is used to break down complex tasks into smaller sub-tasks through task decomposition and then determines the best execution route.
+- **Execute actions with tools.** Agentic workflows use a set of predefined tools paired with permissions in order to accomplish tasks and carry out their generated plan.
+- **Reflect and iterate.** Agents can assess results at each step, adjust the plan if needed, and loop back until the outcome is satisfactory.
+
+As you can see, we need to differentiate between three types of workflows: traditional non-AI workflows, non-agentic AI workflows, and agentic workflows. The difference between a traditional, rule-based workflow and an AI workflow is the use of predefined steps vs. the use of AI models to accomplish a task. Second, the difference between non-agentic and agentic AI workflows is the use of static AI models vs. dynamic AI agents. This makes the agentic workflow more adaptive an dynamic than a non-agentic workflow.
+
+### The difference between agentic architectures and workflows
+
+With any emerging technology, comes a flood of new terminology. While some may use the terms “agentic architectures” and “agentic workflows” interchangeably, they actually have an important distinction. 
+
+An **agentic workflow**, is the *series of steps* taken by an agent to achieve a certain goal. These steps may include using LLMs to create a plan, break down tasks into subtasks, using tools like internet search to accomplish tasks, and using LLMs to reflect on the outcomes of tasks and adjust their overall plan.
+
+An **agentic architecture,** on the other hand, is the technical framework and *overall system design* used to achieve a given task. Agentic architectures are diverse and creative but always contain at least one agent with decision-making and reasoning capabilities, tools the agent can use to accomplish its goals, and systems for short-term and long-term memory.
+
+:::note
+
+Explore the most powerful agentic architectures, visually illustrated for instant understanding. Download the free e-book [here](http://weaviate.io/ebooks/agentic-architectures?utm_source=agentic_workflows_blog&utm_medium=post&utm_campaign=agentic_architectures&utm_content=cta1).
+
+:::
+
+## Patterns in Agentic Workflows
+
+Recall that an agentic workflow is the structured series of steps taken to complete a specific task, also known as a final target. So when we talk about agentic workflows, we talk about specific patterns of behavior that enable agents to achieve their final target. The core components of AI agents, as we mentioned earlier, play a key role in agentic workflow patterns. The capacity for agents to reason facilitates both the planning and reflection patterns, while their ability to use tools to interact with their environment underlies the tool use pattern.
+
+### Planning Pattern
+
+The planning design pattern allows agents to autonomously break down more complex tasks into  series of smaller and simpler tasks, a process known as **task decomposition**. Task decomposition leads to better results because it reduces the cognitive load on the LLM, improves reasoning, and minimizes hallucinations and other inaccuracies.
+
+Planning is especially effective when the method to achieve a final target is unclear and adaptability in the problem solving process is paramount. For instance, an AI agent instructed to fix a software bug would likely use the planning to pattern to break down the task into subtasks like reading the bug report, identifying the relevant code sections, generating a list of potential causes, and finally selecting a specific debugging strategy. If the first attempt to fix the bug doesn’t work, the agent can read the error messages after execution and adapt its strategy. 
+
+While planning can help agents better tackle more complex tasks, it can also lead to less predictable results than more deterministic workflows. As a result, it’s best to only use the planning pattern with tasks that require intense problem-solving and multi-hop reasoning.
+
+![planning-pattern.jpg](./img/planning-pattern.jpg)
+
+### Tool Use Pattern
+
+A significant constraint of generative LLMs is their reliance on pre-existing training data, meaning they cannot retrieve real-time information or verify facts beyond what they have previously learned. As a result, they may generate non-factual responses or “guess” when they don’t know the answer. [Retrieval Augmented Generation (RAG)](/blog/introduction-to-rag) helps mitigate this limitation by providing the LLM with relevant, real-time external data, enabling more accurate and contextually grounded responses.
+
+Tool use, however, goes beyond naive RAG by allowing the LLM to *dynamically interact* with the real world, as opposed to simply retrieving data from it. In agentic workflows, the **tool use** pattern expands the capabilities of agents by allowing them to interact with external resources and applications, real-time data, or other computational resources.
+
+Common tools include APIs, information retrieval (e.g. vector search), web browsers, machine learning models, and code interpreters. These tools are used to perform specific tasks, like searching the web, retrieving data from an external database, or reading or sending emails that help the agent achieve their target. 
+
+![tool-use-pattern.jpg](./img/tool-use-pattern.jpg)
+
+### Reflection Pattern
+
+Reflection is a powerful agentic design pattern that is relatively simple to implement and can lead to significant gains in improvement for agentic workflows. The **reflection pattern** is a self-feedback mechanism in which an agent iteratively evaluates the quality of its outputs or decisions before finalizing a response or taking further action. These critiques are then used to refine the agent's approach, correct errors, and improve future responses or decisions.
+
+Reflection is particularly useful when the agent is unlikely to succeed in accomplishing its target goal on the first attempt, such as writing code. In this case, an agent may generate a code snippet, run it in a sandbox or execution environment, and iteratively feed errors back into the LLM with instructions to refine the code until it executes successfully. 
+
+The power of reflection lies in the agent’s ability to critique its own outputs and dynamically integrate those insights into the workflow, enabling continuous improvement without direct human feedback. These reflections can be encoded in the agent’s memory, allowing for more efficient problem-solving during the current user session and enabling personalization by adapting to user preferences and improve future interactions.
+
+![reflection-pattern.jpg](./img/reflection-pattern.jpg)
+
+## Agentic Workflows Use Cases
+
+Atomic design patterns, like planning and tool use, can be combined in creative ways to effectively leverage agentic AI for a variety of tasks across diverse domains. In addition to combining design patterns, AI agents can be provided with different combinations of tools and even be granted the ability to dynamically select tools as needed. They can also be integrated with human feedback loops and given varying levels of autonomy and decision-making powers. 
+
+These diverse configurations allow agentic workflows to be tailored for a wide range of tasks across industries. To demonstrate this, we outline two especially powerful use cases: agentic RAG and agentic research assistants.
+
+### Agentic RAG
+
+[Retrieval-Augmented Generation (RAG)](/blog/introduction-to-rag) is a framework that augments the knowledge of a LLM by providing it with relevant data retrieved from an external data source. [Agentic RAG](/blog/what-is-agentic-rag) incorporates one or more agents into the RAG pipeline. 
+
+During the planning phase, an agent can break down complex queries into smaller subqueries through query decomposition or determine whether it needs to ask the user for additional information to complete the request.
+
+An AI agent can also be used to evaluate the relevance and accuracy of retrieved data and responses before it’s passed on to the user. If the response is not satisfactory, the agent can reformulate the query, revisit the query decomposition step, or even create a new plan for responding to the query.
+
+![agentic-search-workflow.jpg](./img/agentic-search-workflow.jpg)
+
+:::note
+
+Agentic workflows like this one can be built with different agentic architectures. If you are curious about potential architectures for the above workflow, download our [free e-book](http://weaviate.io/ebooks/agentic-architectures?utm_source=agentic_workflows_blog&utm_medium=post&utm_campaign=agentic_architectures&utm_content=cta2) on agentic architectures! 
+
+:::
+
+### Agentic Research Assistants
+
+Agentic research assistants, also referred to as “deep research” by some AI companies, generate in-depth reports and detailed insights on complex topics by scouring the web and all sorts of external data. These leverage agentic RAG to retrieve information from the web and other external sources in response to user queries. What sets these assistants apart from traditional RAG, however, is their ability to *synthesize and analyze* information, as opposed to simply retrieving relevant data from external sources to enhance the output generated by a LLM. 
+
+This unique ability is attributed to a few features. First, agentic research assistant generally use LLMs that have been fine-tuned specifically for web browsing, task decomposition, and dynamic planning. Second, agents in these workflows actively seek user guidance, requesting additional information or clarification to better understand the final goal. Third, these agents are able to adapt their plans and change course depending on the information they retrieve. This means that they can pursue new, interesting angles when synthesizing novel information and query multiple data sources consecutively until they get the necessary data.
+
+As a result, agentic research assistants are able to gain deeper insights, identify trends over time, and compile full reports on topics as opposed to simply retrieving existing knowledge. At the time of writing, [OpenAI](https://openai.com/index/introducing-deep-research/), [Perplexity](https://www.perplexity.ai/hub/blog/introducing-perplexity-deep-research), and [Google](https://blog.google/products/gemini/google-gemini-deep-research/) all have their own version of Deep Research publicly available.
+
+### Agentic Coding Assistants
+
+Agentic coding assistants can generate, refactor, refine, and debug code with minimal human intervention. Non-agentic coding assistants, like the first version of GitHub Copilot, are powered by generative LLMs fine-tuned to generate code, but are limited to doing just that — generating code. 
+
+What makes a coding assistant agentic is its ability to interact with its environment by executing generated code and iteratively refine it based on execution results, errors, or feedback. These assistants can also be enabled with permissions to make changes to an existing code base by creating commits and PRs, like Anthropic’s [Claude Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview), an important step in automating the software development process. Agentic coding assistants can also be used to suggest terminal commands and other code changes and additions and wait for explicit human approval before execution, like Cursor’s [Agent](https://docs.cursor.com/agent), allowing humans to be fully in control of the agent. Additionally, and importantly, agentic coding assistants can learn from their mistakes by encoding them in long-term memory, allowing them to grow smarter over time.
+
+## Agentic Workflow Examples
+
+Now that we’ve outlined some use cases for agentic workflows, we’ll take a more detailed look at the individual steps of the workflows of two real-world agents: [Claygent](https://www.clay.com/claygent) and [ServiceNow AI Agents](https://www.servicenow.com/products/ai-agents.html). Each workflow uses its own unique combination of patterns and tools, gives its agents varying degrees of autonomy and decision-making capabilities, and relies on different levels of human feedback and involvement.
+
+### Claygent (Clay)
+
+Lead research and data enrichment can be a tedious task for growth and sales teams. [Clay](https://www.clay.com/), a data enrichment and outreach automation company, streamlines this process with [Claygent](https://www.clay.com/claygent)—an AI-powered research agent that continuously scans the web and internal databases to deliver real-time, actionable insights.
+
+Say you want to use Claygent to enrich a LinkedIn profile based on a list of names and email addresses and then send a personalized introduction message. First, you specify the fields of data you’re looking for (e.g. work experience, education, skills), which is injected into a pre-configured prompt template. The agent’s LLM processes the query, uses a web scraping tool to scour the web for a LinkedIn URL, and extracts the desired data from the LinkedIn profile. This data can then be sent to another LLM that you can instruct to summarize or analyze the enriched data however you want. The same LLM (or a different one) can then be used to create a personalized outreach message for each profile.
+
+Claygent is an example of a relatively flexible agentic workflow that can be customized in creative ways, while still providing guidance to agents through pre-configured prompt templates for specific tasks.
+
+### ServiceNow AI Agents
+
+[ServiceNow](https://www.servicenow.com/) is a cloud-based platform that streamlines and automates workflows across IT, operations, HR, and customer service domains. Their ServiceNow Platform now includes access to AI agents, intended to automate repetitive tasks and pre-existing workflows, while still leaving humans in full control of making decisions.
+
+Here’s an example of how an agentic workflow can help resolve a technical support case. The workflow is triggered when a customer submits a ticket for technical support. The information from the ticket is then passed to one or more agents that perform RAG on an internal IT support knowledge base. The agent summarizes the findings, analyzes similar cases, and generates a summary for the IT support specialist. Finally, it generates a recommendation for how to proceed, which the specialist can either approve or deny. 
+
+ServiceNow AI Agents represent an innovative but more cautious approach to deploying agents in production, giving them strict roles and tasks to accomplish and limited, if any, autonomy to make decisions that affect the end user or customer.
+
+:::note
+
+Want to build your own agentic workflow? Check out [Building Agentic Workflows with Inngest](/blog/inngest-ai-workflows), where we show how to create an agentic dinner planner.
+
+:::
+
+## Benefits and Limitations of Agentic Workflows
+
+AI agents have rapidly moved beyond the machine learning community and into the mainstream. Given all the excitement, anticipation, and expectations around agentic AI, it can be difficult to separate hype from reality and understand its true capabilities and limitations. In this section, we give you a balanced view of the benefits, challenges, and limitations of agentic workflows.
+
+### Benefits of Agentic Workflows
+
+Agentic workflows go beyond traditional automation by enabling AI agents to plan, adapt, and improve over time. Unlike deterministic workflows, which follow fixed rules, agentic workflows can dynamically respond to complexity, refine their approach through feedback, and scale to handle more advanced tasks. This adaptability makes them particularly valuable in scenarios where flexibility, learning, and decision-making are essential. 
+
+Let’s take a closer look at the benefits of agentic workflows:
+
+- **Flexibility, adaptability, and customizability.**
+Static, deterministic workflows struggle to adapt to evolving situations and unexpected difficulties. Agentic workflows, on the other hand, offer the flexibility to adjust and evolve based on the task difficulty, ensuring they always stay relevant and give the best solution. They can also be customized by combining different patterns, enabling a modular design that allows iterative upgrades as needs and complexity grows.
+- **Improved performance on complex tasks.**
+By breaking down complex tasks into smaller manageable steps (through task decomposition and planning), agentic workflows significantly outperform deterministic, zero-shot approaches.
+- **Self-correcting and continuous learning.**
+The reflection pattern allows agentic workflow to evaluate their own actions, refine strategies, and improve outcomes over time. Utilizing both short- and long-term memory, they learn from past experiences to become more effective and personalized with each iteration.
+- **Operational efficiency and scalability.**
+Agentic workflows can automate repetitive tasks with high accuracy (if built right), reducing manual effort and operational costs in specific scenarios. They can also scale easily, making them ideal for handling larger workloads or complex systems.
+
+Keep in mind that AI agents are still an emerging technology, and that this list of benefits is likely to expand as researchers and users discover novel ways of incorporating agents into workflows. 
+
+### Challenges and Limitations of Agentic Workflows
+
+Despite their benefits and innovative features, AI agents also come with a number of challenges and limitations. Because of their probabilistic nature, AI agents inherently add complexity to workflows. And just because agents *can* be used to automate processes, doesn’t mean that they *should* be used. Here are a few of the most notable challenges and limitations of agentic workflows:
+
+- **Unnecessary complexity for simple tasks.**
+    
+    AI agents can add overhead when used for straightforward workflows like form entry or basic data extraction. In cases where deterministic, rules-based automation is sufficient, introducing agents may lead to inefficiencies, extra expense, and possibly reduced performance.
+    
+- **Reduced reliability as a result of increased autonomy.**
+    
+    As agents gain more decision-making power within a workflow, their probabilistic nature can introduce unpredictability, making outputs less reliable and harder to control. Implementing and actively maintaining guardrails for agents and continually reviewing their granted permissions is critical.
+    
+- **Ethical and practical considerations.**
+    
+    Not all decisions should be delegated to AI systems. Using agents in high-stakes or sensitive areas requires careful oversight to ensure responsible deployment and prevent unintended consequences.
+    
+
+Given these limitations, we recommend taking time to reflect on whether using an agent is truly necessary in a given workflow. Some questions to help you determine this may include:
+
+- Is the task complex enough to require adaptive decision-making, or would a deterministic approach suffice?
+- Would a simpler AI-assisted tool (such as RAG without an agent) achieve the same outcome?
+- Does the workflow involve uncertainty, changing conditions, or multi-step reasoning that an agent could handle more effectively?
+- What are the risks associated with giving the agent autonomy, and can they be mitigated?
+
+## Summary
+
+Agentic workflows are powerful tools to help automate the completion of complex tasks that require decision-making and reasoning. In this article, we reviewed the core components of AI agents, including, memory, tools, and reasoning capabilities and how they contribute to agentic workflows. We also discussed common workflow patterns, like planning, tool use, and reflection that can be used in isolation or combination to create dynamic workflows. Furthermore, we outlined two particularly effective use cases, [agentic RAG](/blog/what-is-agentic-rag) and agentic research agents, and described the workflows behind two agents already on the market, Clay’s Claygent and ServiceNow’s AI Agents. Finally, we touched on the benefits of agentic workflows as well as their limitations and challenges. 
+
+The technology behind AI agents is continuously evolving, as is our understanding of them. This article is intended to give you a basic understanding how AI agents function in workflows but is by no means an exhaustive exploration of the topic. 
+
+For a more detailed view and explanation of specific agentic architectures, download our free [e-book](http://weaviate.io/ebooks/agentic-architectures?utm_source=agentic_workflows_blog&utm_medium=post&utm_campaign=agentic_architectures&utm_content=cta3).
+
+[![Alt Text](./img/download-now-cta.jpg)](http://weaviate.io/ebooks/agentic-architectures?utm_source=agentic_workflows_blog&utm_medium=post&utm_campaign=agentic_architectures&utm_content=cta4)
+
+### Resource guide
+
+📃 [A Survey on the Memory Mechanism of Large Language Model based Agents](https://arxiv.org/abs/2404.13501) (arXiv paper)
+
+📃 [Agent Design Pattern Catalogue: A Collection of Architectural Patterns for Foundation Model Based Agents](https://arxiv.org/pdf/2405.10467) (arXiv paper)
+
+🎬 [Advanced AI Agents with RAG](https://www.youtube.com/watch?v=UoowC-hsaf0&list=PLTL2JUbrY6tVmVxY12e6vRDmY-maAXzR1&ab_channel=Weaviate%E2%80%A2VectorDatabase) (YouTube)
+
+📝 [What is Agentic RAG](/blog/what-is-agentic-rag)
+
+📝 [Agentic Design Patterns Part 2, Reflection](https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-2-reflection/?ref=dl-staging-website.ghost.io) (Blog post)
+
+
+import WhatsNext from '/_includes/what-next.mdx'
+
+<WhatsNext />
+
diff --git a/blog/2025-03-11-transformation-agent/index 2.mdx b/blog/2025-03-11-transformation-agent/index 2.mdx
new file mode 100644
index 0000000000..ca157e2474
--- /dev/null
+++ b/blog/2025-03-11-transformation-agent/index 2.mdx	
@@ -0,0 +1,163 @@
+---
+title: 'Introducing the Weaviate Transformation Agent'
+slug: transformation-agent
+authors: [charles-pierse, tuana, alvin]
+date: 2025-03-11
+tags: ['concepts', 'agents', 'release']
+image: ./img/hero.png
+description: "Learn how the new Transformation Agent will change the way we manage data. Say goodbye to the tedious tasks of database management!"
+---
+![Introducing the Weaviate Transformation Agent](./img/hero.png)
+
+
+Following our introduction of the [**`QueryAgent`**](/blog/query-agent) last week, we’re back with yet another one: The **`TransformationAgent`** is now ready in public preview for all [Weaviate Serverless Cloud](/deployment/serverless) users (including free Sandbox users)!
+
+
+When we talk about AI agents, we often discuss them in the context of advanced information retrieval, assistants in our research if you will. But, agentic AI has so much more to offer. What if we could create agents that not only do research for us, but also take actions or complete tasks for us? The Transformation Agent is an example of one of these agentic workflows. Simply put, it’s an agentic database management application which uses natural language instructions and LLMs to transform data in Weaviate collections.
+
+:::note 
+This blog comes with an accompanying [recipe](https://colab.research.google.com/github/weaviate/recipes/blob/main/weaviate-services/agents/transformation-agent-get-started.ipynb#scrollTo=Uiu5C8n7v-Xp) that you can use to get started.
+
+⚠️ Since this service is currently in preview, please do not demo it on production collections.
+:::
+
+## What is the Transformation Agent
+
+This is our first step into a future of database management where we can start to leave the tedious task of designing updates and changes for our database to an LLM.
+
+You can configure the `TransformationAgent` to access any given collection in Weaviate Cloud and provide it with a list of transformation operations you’d like to perform on it.
+
+For example, think of a scenario where you may have quarterly reports from teams in your company in a collection. With the transformation agent, you can define new properties such as “team” or “quarter” with the instructions “Based on the contents of the report, identify what team it’s from” or “Based on the report, identify what quarter the report belongs to”. The agent will take care of 2 of the most important steps in this scenario:
+
+1.  It will determine the team and quarter of the report with the use of an LLM
+2.  It will also then create the additional properties and write them to each report object in Weaviate.
+
+Not only that, the agent will do this for all objects in our collection. While these may seem like simple steps, it completely removes the burden of creating the right API calls to add the new property from the maintainer of the database, not to mention the step for generating the summaries in the first place.
+
+:::note 
+❗️ For our first preview release of this agent, each Weaviate organization is limited to 10000 object transformations per day. This limit will be incremented over time.
+:::
+
+![Transfomation Agent](img/agent.png)
+
+We know the stakes are higher with such inherent access over your collections. So, we have provided templates in the form of the Operations which specify intended behavior. The agent is only able to act within those specific parameters, and for now, only supports operations that modify existing objects in Weaviate. Let’s start by looking at the transformation operations.
+
+### Transformation Operations
+
+For the first preview version of the `TransformationAgent`, we support two transformations:
+
+-   **Append Property:** Which allows you to add a new property to all of the objects within your collection. This transformation expects:
+    -   An instruction: Describing what the LLM should do to create this new property
+    -   The property name: The name of the new property to create
+    -   The properties to view: The name(s) of the existing properties in the object that the LLM may use as context when generating the new property
+    -   The data type: Which is the data type that the new property will be written to Weaviate as. (e.g. text, text array, integer..)
+-   **Update Property:** Which allows you to update the contents of a property for all objects within your collection. This transformation expects
+    -   The property name: The name of the (existing) property we want to update
+    -   An instruction: Describing what the LLM should do to update the property
+    -   The properties to view: The name(s) of the existing properties in the object that the LLM may use as context when updating property
+
+![Operation Workflows](img/workflows.png)
+
+A Transformation Agent may be given multiple operations to run. For example we may ask for 3 operations that append new properties, and 1 operation that updates an existing one. When we run the agent, this will spin up their own workflows for each operation.
+
+Weaviate will manage the task queue, as well as ensuring that each operation completes without ever running on the same property at the same time.
+
+Let’s have a look at an example.
+
+### Our Vision for the Future of Transformation Agents
+
+As we iterate on and improve the `TransformationAgent`, our vision is to provide a service that will allow you to do so much more than currently possible:
+
+-   Prepare data cleaning processes with simple natural language.
+-   Deduplicate your data.
+-   Add new objects to your collection.
+-   Extract metadata, images, dates and other key information out into their own properties.
+
+In short, we are aiming to significantly simplify the process of maintaining your database and its contents with a simple natural language interface.
+
+## Creating Transformation Agents
+
+> 👩‍🍳 For this announcement, we’ve also released an accompanying [recipe](https://colab.research.google.com/github/weaviate/recipes/blob/main/weaviate-services/agents/transformation-agent-get-started.ipynb#scrollTo=Uiu5C8n7v-Xp) to help you get started. For questions or feedback, you can [join the ‘Agents’ topic in the Weaviate Forum](https://forum.weaviate.io/c/agents/10).
+
+As an example scenario, think of a collection called “ArxivPapers” which has objects with just two properties:
+
+-   title: A title of a research paper
+-   abstract: The abstract of a research paper
+
+In the accompanying recipe, you can get access to this dataset and write it to a collection in Weaviate Cloud. Using the Explorer tool in the console, you can take a look at the contents. For example, an object in this collection looks like this:
+
+![First state of object](img/screenshot1.png)
+
+We can use the `TransformationAgent` to add more properties to this collection such as “topics”, which lists the topics covered in the paper, “french_abstract”, which is a translation of the abstract, and more.
+
+To get started, we need to install the Weaviate Python client. The `TransformationAgent` (as well as the `QueryAgent`) is now ready to preview via the Weaviate Python client, for all Serverless Weaviate Cloud users (including free Sandboxes).
+
+```bash
+pip install weaviate-client[agents]  
+
+```
+
+### Define Transformation Operations
+
+The best way to get started with using the `TransfomationAgent` is by starting to think about what kind of transformation operations you’d like to execute. For our demo example, let’s say we’d like to do a few things:
+
+1.  Create a list of topics based on the abstract.
+2.  Translate the abstract to French.
+3.  Have a boolean flag indicating wether it’s a survey paper or not
+
+Take the example of creating a list of topics. We first would define this operation:
+
+```python
+from weaviate.agents.classes import Operations
+from weaviate.collections.classes.config import DataType
+
+add_topics = Operations.append_property(
+    property_name="topics",
+    data_type=DataType.TEXT_ARRAY,
+    view_properties=["abstract", "title"],
+    instruction="""Create a list of topic tags based on the title and abstract.
+    Topics should be distinct from eachother. Provide a maximum of 5 topics.
+    Group similar topics under one topic tag.""",
+)
+
+```
+
+As you can see, while we provide the bulk of the information of what has to happen in the `instruction`, we still provide the new `property_name`, `data_type` and `view_properties` explicitly. This ensures that the new property being generated can be consistent across all of our objects.
+
+### Initializing and Running the Agent
+
+Once we have our operations, we can initialize a `TransformationAgent`. For example, below we have an agent that has the `add_topics` as well as the additional operations on our wish list.
+
+:::note
+Follow the recipe to create all the operations.
+:::
+
+```python
+from weaviate.agents.transformation import TransformationAgent
+
+agent = TransformationAgent(
+    client=client,
+    collection="ArxivPapers",
+    operations=[
+        add_topics,
+        add_french_abstract,
+        is_survey_paper,
+    ],
+)
+
+agent.update_all()
+
+```
+
+Once we’ve updated all the objects using our new `agent` the same object we saw above looks like the following:
+
+![Last state of object](img/screenshot2.png)
+
+## Summary
+
+The Transformation Agent represents a significant leap forward in database management, offering a natural language interface for Weaviate Cloud users to modify and enhance their collections. By combining LLM capabilities with structured database operations, it simplifies complex tasks like property creation or updates. This preview release, featuring append and update operations, marks our first step toward a future where database transformations become more intuitive and accessible, reducing the technical barriers traditionally associated with database management.
+
+
+import WhatsNext from '/_includes/what-next.mdx'
+
+<WhatsNext />
\ No newline at end of file
diff --git a/blog/2025-03-15-query-agent/index 2.mdx b/blog/2025-03-15-query-agent/index 2.mdx
new file mode 100644
index 0000000000..bfac6198b3
--- /dev/null
+++ b/blog/2025-03-15-query-agent/index 2.mdx	
@@ -0,0 +1,165 @@
+---
+title: 'Introducing the Weaviate Query Agent'
+slug: query-agent
+authors: [charles-pierse, tuana, alvin]
+date: 2025-03-05
+tags: ['concepts', 'agents', 'release']
+image: ./img/hero.png
+description: "Learn about the Query Agent, our new agentic search service that redefines how you interact with Weaviate’s database!"
+---
+![Introducing the Weaviate Query Agent](./img/hero.png)
+
+
+We’re incredibly excited to announce that we’ve released a brand new service for our [Serverless Weaviate Cloud](https://weaviate.io/deployment/serverless) users (including free Sandbox users) to preview, currently in Alpha: the _**Weaviate Query Agent!**_ Ready to use now, this new feature provides a simple interface for users to ask complex multi-stage questions about your data in Weaviate, using powerful foundation LLMs. In this blog, learn more about what the Weaviate Query Agent is, and discover how you can build your own!
+
+Let’s get started.
+
+:::note 
+This blog comes with an accompanying [recipe](https://github.com/weaviate/recipes/tree/main/weaviate-services/agents/query-agent-get-started.ipynb) for those of you who’d like to get started.
+:::
+
+## What is the Weaviate Query Agent
+
+AI Agents are semi- or fully- autonomous systems that make use of LLMs as the brain of the operation. This allows you to build applications that are able to handle complex user queries that may need to access multiple data sources. And, over the past few years we’ve started to build such applications thanks to more and more powerful LLMs capable of function calling, frameworks that simplify the development process and more.
+
+:::note 
+To learn more about what AI Agents are, read our blog [”Agents Simplified: What we mean in the context of AI”](https://weaviate.io/blog/ai-agents). 
+:::
+
+**With the Query Agent, we aim to provide an agent that is inherently capable of handling complex queries over multiple Weaviate collections.** The agent understands the structure of all of your collections, so knows when to run searches, aggregations or even both at the same time for you.
+
+Often, AI agents are described as LLMs that have access to various tools (adding more to its capabilities), which are also able to make a plan, and reason about the response.
+
+Our Query Agent is an AI agent that is provided access to multiple Weaviate collections within a cluster. Depending on the user’s query, it will be able to decide which collection or collections to perform searches on. So, you can think of the Weaviate Query Agent as an AI agent that has tools in the form of Weaviate Collections.
+
+In addition to access to multiple collections, the Weaviate Query Agent also has access to two internal agentic search workflows:
+
+-   Regular [semantic search](/blog/vector-search-explained) with optional filters
+-   Aggregations
+
+In essence, we’ve released a multi-agent system that can route queries to one or the other and synthesise a final answer for the user.
+
+![Query Agent](img/query-agent.png)
+
+### Routing to Search vs Aggregations
+
+Not all queries are the same. While some may require us to do semantic search using embeddings over a dataset, other queries may require us to make [aggregations](https://weaviate.io/developers/weaviate/api/graphql/aggregate) (such as counting objects, calculating the average value of a property and so on). We can demonstrate the difference with a simple example. Think of two queries assuming we have a dataset containing the [Weaviate Blog](https://weaviate.io/blog):
+
+1.  What are the components of an AI agent discussed in the “Agents Simplified” blog?
+2.  How many blog posts has Leonie published?
+
+For question 1, we may need to filter to the “Agents Simplified” blog and search for “AI agent components”. Whereas for question 2, the task is closer to a counting task. We have to count how many blogs appear for the author Leonie.
+
+Weaviate is a [vector database](/blog/what-is-a-vector-database), meaning for case 1, we have [embeddings](/blog/vector-embeddings-explained) stored on which we can perform regular semantic search on. However, for case 2, we need to create an [Aggregation Query](https://weaviate.io/developers/weaviate/api/graphql/aggregate).
+
+The good news is, the Weaviate Query Agent is capable of handling both. Depending on the user query, the agent will generate a search query, or an aggregation query, or in some cases both.
+
+## Creating Query Agents
+
+:::note 
+👩‍🍳 For this announcement, we’ve also released an accompanying [recipe](https://colab.research.google.com/github/weaviate/recipes/blob/main/weaviate-services/agents/query-agent-get-started.ipynb) to help you get started. Read on for key points on how to use the Query Agent, or alternatively follow along with the recipe itself. For questions or feedback, you can [join the ‘Agents’ topic in the Weaviate Forum](https://forum.weaviate.io/c/agents/10). 
+:::
+
+The `QueryAgent` and all upcoming agents we will release for preview will be available via the Weaviate Python Client:
+
+```bash
+pip install weaviate-client[agents] 
+```
+
+To initialize a new `QueryAgent`, we have to take a few simple steps:
+
+-   We give it access to our serverless cluster via `client`.
+-   We provide it with a list of `collections` which we grant it access to perform searches on.
+-   Optionally, we may also provide our own custom `system_prompt` to provide it instructions on how to generate responses.
+
+:::note 
+For the time being, the `QueryAgent` is freely available to all Weaviate Cloud Serverless and Sandbox users, however there is a rate limit of 100 queries per day on an organization basis. 
+:::
+
+### Giving Access to Collections
+
+For this intro, we’ve created a recipe which uses 2 collections (and we’ve added an extra 2 for you to optionally play around in the recipe).
+
+-   E-commerce: A collection that has a list of clothes, their brands and prices.
+-   Brands: A collection that lists more information on brands, their country of origin, parent brands and so on.
+
+We’ll use these datasets to create an ‘e-commerce assistant’ agent
+
+```python
+from weaviate.agents.query import QueryAgent
+
+agent = QueryAgent(client=your_client, collections=["Ecommerce", "Brands"])
+```
+
+### Running the Query Agent
+
+Once initialized, the `QueryAgent` can accept user queries. The response is then returned within a `QueryAgentResponse` object which includes information on what searches or aggregations were performed, which collections were used, and even whether the agent has concluded if the answer is complete or not.
+
+```python
+response = agent.run("I like the vintage clothes, can you list me some options that are less than $200?")
+```
+
+The example above may return the following `QueryAgentResponse`:
+
+```bash
+original_query='I like the vintage clothes, can you list me some options that are less than $200?' 
+collection_names=['Ecommerce'] 
+searches=[[QueryResultWithCollection(queries=['vintage clothes'], filters=[[IntegerPropertyFilter(property_name='price', operator=<ComparisonOperator.LESS_THAN: '<'>, value=200.0)]], filter_operators='AND', collection='Ecommerce')]] 
+aggregations=[]
+sources= [Source(object_id='5e9c5298-5b3a-4d80-b226-64b2ff6689b7', collection='Ecommerce'), Source(object_id='48896222-d098....', collection='Ecommerce')...]
+usage=Usage(requests=3, request_tokens=7689, response_tokens=1488, total_tokens=9177, details=None) 
+total_time=13.9723
+aggregation_answer=None 
+has_aggregation_answer=False 
+has_search_answer=True 
+is_partial_answer=False 
+missing_information=[] 
+final_answer="Here are some vintage-style clothing options under $200 that you might like:\\n\\n1. **Vintage Philosopher Midi Dress** -..."
+```
+
+The `QueryAgentResponse` aims to be as interpretable as possible. We include the `searches` and `aggregations` so that you can understand the actions carried out. You can check `sources` to see which exact objects were used to generate answers from. We also include `missing_information` to allow the agent to inform you when it’s incapable of answering the original query.
+
+For example, with the response above we can see that the `QueryAgent` has performed a search on the Ecommerce collection. Not only that, but we’re also able to see what the search is, as well as what filters were used:
+
+```bash
+searches=[[QueryResultWithCollection(queries=['vintage clothes'], filters=[[IntegerPropertyFilter(property_name='price', operator=<ComparisonOperator.LESS_THAN: '<'>, value=200.0)]], filter_operators='AND', collection='Ecommerce')]] 
+```
+
+### Running a Follow Up Query
+
+Optionally, you may also chose to provide the response from the previous interaction as context to the next one. This way, you’re always able to ask follow up questions and the `QueryAgent` is able to infer what some of the missing information might be. For example, as a follow up to the previous question we may run the code below:
+
+```python
+new_response = agent.run("What about some nice shoes, same budget as before?", context=response)
+```
+
+In this case, you can observe that the `new_response` includes the following `searches` and `final_answer`. Notice that although we didn’t provide the budget in the query, the agent was able to infer that we still want to adhere to the budget of $200.
+
+```bash
+searches=[[QueryResultWithCollection(queries=['nice shoes'], filters=[[IntegerPropertyFilter(property_name='price', operator=<ComparisonOperator.LESS_THAN: '<'>, value=200.0)]], filter_operators='AND', collection='Ecommerce')]]
+final_answer="Here are some nice shoe options under $200 - 1. **Parchment Boots by Nova Nest** - $145 ..."
+```
+
+### Modifying the System Prompt
+
+In addition to deciding which of your collections the `QueryAgent` has access to, you may also chose to provide a custom `system_prompt`. This allows you to provide the agent with instructions on how it should behave. For example, below we provide a system prompt which instructs the agent to always respond in the users language:
+
+```python
+multi_lingual_agent = QueryAgent(
+    client=client, collections=["Ecommerce", "Brands"],
+    system_prompt="You are a helpful assistant that always generated the final response in the users language."
+    " You may have to translate the user query to perform searches. But you must always respond to the user in their own language."
+)
+```
+
+## Summary
+
+The Weaviate Query Agent represents a significant step forward in making vector databases more accessible and powerful. By combining the capabilities of LLMs with Weaviate's own search and aggregation features, we've created a tool that can handle complex queries across multiple collections while maintaining context and supporting multiple languages. The resulting agent can be used on its own, as well as within a larger agentic or multi-agent application.
+
+Whether you're building applications that require semantic search, complex aggregations, or both, the Query Agent simplifies the development process while providing the flexibility to customize its behavior through system prompts. As we continue to develop and enhance this feature, we look forward to seeing how our community will leverage it to build even more powerful AI-driven applications.
+
+Ready to get started? Check out our [recipe](https://colab.research.google.com/github/weaviate/recipes/blob/main/weaviate-services/agents/query-agent-get-started.ipynb), join the discussion in our forum, and start building with the Weaviate Query Agent today!
+
+import WhatsNext from '/_includes/what-next.mdx'
+
+<WhatsNext />
\ No newline at end of file
diff --git a/blog/2025-04-02-weaviate-1-30-release/_core-1-30-include 2.mdx b/blog/2025-04-02-weaviate-1-30-release/_core-1-30-include 2.mdx
new file mode 100644
index 0000000000..31b1aab05f
--- /dev/null
+++ b/blog/2025-04-02-weaviate-1-30-release/_core-1-30-include 2.mdx	
@@ -0,0 +1,221 @@
+import ThemedImage from '@theme/ThemedImage';
+
+Weaviate `v1.30` includes a host of new features and improvements. It introduces API-based database user management, runtime RAG configurations, multi-vector (ColBERT-like) embedding quantization.
+
+It also brings BlockMax WAND and multi-vector embeddings to general availability (GA), indicating their readiness for production use. There are other enhancements, including xAI model integrations and runtime configuration management, and more.
+
+Here are the release ⭐️*highlights*⭐️!
+
+![Weaviate 1.30 is released](./img/hero.png)
+
+- [BlockMax WAND in GA](#blockmax-wand-in-ga)
+- [Multi-vector embeddings - GA & quantization](#multi-vector-embeddings)
+- [Database user management API](#database-user-management-api)
+- [Generative (RAG) capability improvements](#generative-capability-improvements)
+- [xAI model integration](#xai-model-integration)
+- [Other enhancements](#other-enhancements)
+
+## BlockMax WAND in GA
+
+BlockMax WAND [significantly speeds up keyword and hybrid searches](/developers/weaviate/concepts/indexing#blockmax-wand-algorithm) in Weaviate. Originally introduced as a technical preview in `1.28`, it is now generally available in `v1.30`. In fact, it is now the default indexing algorithm for all new Weaviate instances from this version onwards.
+
+At a high level, BlockMax WAND is an algorithm that optimizes the scoring of documents in a search index for lexical (keyword) queries. This can be especially useful for large datasets, for example large e-commerce catalogs or a library of complex (legal/medical/domain-specific) documents.
+
+It does this by pre-computing statistics for blocks of documents in the index, allowing it to quickly skip over blocks that are unlikely to contain relevant documents. We have seen up to a 10x speedup in keyword searches due to BlockMax WAND.
+
+import BMWExampleCalc from './img/bmw_example.png';
+
+<Figure>
+  <img
+    alt="Example screenshot of BlockMax WAND calculation - from BlockMax WAND blog"
+    src={BMWExampleCalc}
+    width="600"
+  />
+    <figcaption><small>Example screenshot of BlockMax WAND calculation from our <a href="/blog/blockmax-wand/">BlockMax WAND blog</a>.<br/><a href="https://weaviate.io/blog/2025-02-26-blockmax-wand/">Read the full blog post</a> <br /><br />
+  </small></figcaption>
+</Figure>
+
+Existing instances' data can be migrated to use BlockMax WAND by following [this guide](/developers/weaviate/more-resources/migration/weaviate-1-30). This is a one-time operation, and once completed, the instance will use BlockMax WAND for all future searches.
+
+If you are going to create a new Weaviate instance, you do not need to do anything - BlockMax WAND will be used by default.
+
+Enjoy the speedup! 🚀🚀🚀
+
+:::info Related resources
+- [BlockMax WAND blog](/blog/blockmax-wand/)
+- [Concepts: Indexing - BlockMax WAND](/developers/weaviate/concepts/indexing#blockmax-wand-algorithm)
+- [Migration Guide: BlockMax WAND](/developers/weaviate/more-resources/migration/weaviate-1-30)
+:::
+
+## Multi-vector embeddings
+
+ColBERT or ColPali-like [multi-vector embeddings](/developers/weaviate/tutorials/multi-vector-embeddings) are now generally available in Weaviate for production use. Here is an illustration showing the difference between single-vector and multi-vector embeddings.
+
+import MVEmbeddingVizLight from './img/single_multi_vector_comparison_light.png';
+import MVEmbeddingVizDark from './img/single_multi_vector_comparison_dark.png';
+
+<ThemedImage
+  alt="Single vs Multi-vector embedding comparison visualization"
+  sources={{
+    light: MVEmbeddingVizLight,
+    dark: MVEmbeddingVizDark,
+  }}
+  width="600"
+/>
+
+Multi-vector embeddings allow you to store and query multi-vector embeddings such as those from ColBERT, ColPali and ColQwen models. This approach enables more precise searching through "late interaction" - a technique that matches individual parts of texts rather than comparing them as whole units.
+
+This was introduced in `v1.29` as a technical preview, and is now generally available in `v1.30`. This means that the feature is considered stable and ready for production use.
+
+In addition to this, we are very pleased to announce that multi-vector embeddings can now be quantized in Weaviate for reduced memory footprint.
+
+import MVEmbeddingQuantizationLight from './img/multi_vector_quantization_light.png';
+import MVEmbeddingQuantizationDark from './img/multi_vector_quantization_dark.png';
+
+<ThemedImage
+  alt="Vector quantization now available fo multi-vector embeddings"
+  sources={{
+    light: MVEmbeddingQuantizationLight,
+    dark: MVEmbeddingQuantizationDark,
+  }}
+  width="600"
+/>
+
+Quantization is a technique that reduces the size of the vectors by approximating them with lower precision representations. Multi-vector embeddings are typically larger than single-vector embeddings. So quantization may be even more important than for single-vector embeddings.
+
+We know this would be a welcome feature for those of you looking to go to production with multi-vector embeddings. Quantization is available for all multi-vector embeddings, regardless of what model it came from.
+
+If you have been waiting for multi-vector embeddings to be generally available, or if you are interested in quantization, now is the time to try it out!
+
+:::info Related resources
+- [Multi-vector embeddings tutorial](/developers/weaviate/tutorials/multi-vector-embeddings)
+- [How-to Configure: Vector Quantization](/developers/weaviate/configuration/compression)
+:::
+
+## Database user management API
+
+User management is a whole lot more flexible from `v1.30`. Weaviate now supports management of database users through an API in addition to environment variable-based database users, and OIDC users.
+
+This means that there are broadly three ways to manage database users in Weaviate:
+- Through an external identity provider (OIDC)
+- Through environment variables (as before; root users must be managed this way)
+- Through the database user management API (new)
+
+Administrators can now create and delete database users using the Weaviate client libraries, or the REST API. Even better, changes to the set of API-based database users will take effect without restarting the Weaviate instance.
+
+You can create, delete, and even rotate these database users' API keys without restarting Weaviate. This is a big improvement over the previous method of managing database users, which required restarting Weaviate to apply changes.
+
+Individual users' access can be granted, revoked or made secure again in real-time without the need for downtime. It can be combined with role-based access control (RBAC) to provide a powerful and flexible access control system.
+
+:::info Related resources
+- [Database user management API](/developers/weaviate/configuration/rbac/manage-users)
+:::
+
+## Generative capability improvements
+
+Weaviate's retrieval-augmented generation (RAG) capabilities are now easier to use and more powerful, with runtime options for model providers, and the ability to add images to the input.
+
+From `v1.30`, you can specify at query time which model provider (e.g. Cohere, Google, OpenAI, etc.) to use for generative capabilities, as well as a specific model and other types. For example:
+
+```python
+# Set the provider, model, and other options at query time to override the defaults
+gen_provider = GenerativeProvider.cohere(model="command-r-plus")
+
+response = your_collection.generate.near_text(
+    query="European summer destinations",
+    limit=10,
+    generative_provider=gen_provider,  # This overrides the default provider / settings
+    grouped_task="Suggest some summer trip ideas involving some of these destination"
+)
+print(response)
+```
+
+This means that you can have a default provider & model for your Weaviate collection, and also override at query time for specific requests.
+
+For example, you might want to use a different model for a specific query, or use a different temperature settings for a specific query. This is now possible, giving you more flexibility and control over your generative capabilities.
+
+Additionally, you can now add images to the input of the generative model as context. This can help you to get more out of modern vision language models from providers such as Anthropic, Google, and OpenAI, for example.
+
+:::info Related resources
+- [How-to: Search: Configure a generative model provider](/developers/weaviate/search/generative#configure-a-generative-model-provider)
+:::
+
+
+## xAI model integration
+
+Weaviate's suite of [model integrations](/developers/weaviate/model-providers/) now includes support for xAI's generative AI models.
+
+To use xAI's generative AI models with Weaviate, take a look at the [xAI model integration page](/developers/weaviate/model-providers/xai/) for detailed instructions on how to configure Weaviate with xAI models and start using them in your applications.
+
+:::info Related resources
+- [Model provider integrations: xAI](/developers/weaviate/model-providers/xai)
+:::
+
+## Other enhancements
+
+### Runtime config management
+
+Some system configuration options can now be set and changed at runtime, where they were previously only available at startup.
+
+Weaviate will now periodically look for the presence of a configuration file to read settings for enabling async replication and autoschema, as well as the maximum number of collections that can be created.
+
+This means that you can now change these settings without restarting Weaviate, which can be useful for managing Weaviate instances in production.
+
+For detailed instructions on how to set this up, and what settings are available, refer to the [Runtime config management page](/developers/weaviate/config-refs/env-vars/runtime-config).
+
+:::info Related resources
+- [References: Configuration: Runtime config management](/developers/weaviate/config-refs/env-vars/runtime-config)
+- [References: Configuration: Environment variables](/developers/weaviate/config-refs/env-vars)
+:::
+
+### Collection count limits
+
+There is now a default limit on the number of collections that can be created in each Weaviate instance. This has two benefits.
+
+One, it prevents a user from creating too many collections, which can slow down the system. Two, it acts as a trigger to consider whether the architecture of the system is correct, and whether a multi-tenant approach might be more appropriate.
+
+The default limit is set to 1000 collections per Weaviate instance. You can change this limit by setting `MAXIMUM_ALLOWED_COLLECTIONS_COUNT` in the environment variables.
+
+However, if you finding yourself hitting or even nearing this limit, we advise you to check out this guide on [scaling limits with collections](/developers/weaviate/starter-guides/managing-collections/collections-scaling-limits) to see if you can optimize your Weaviate instance.
+
+This is a good opportunity to consider whether you need to create so many collections, or whether you can use a multi-tenant approach instead.
+
+:::info Related resources
+- [Guide: Scaling limits with collections](/developers/weaviate/starter-guides/managing-collections/collections-scaling-limits)
+- [References: Configuration: Environment variables](/developers/weaviate/config-refs/env-vars)
+:::
+
+### Tokenizer concurrency limits
+
+Weaviate's non-English tokenizers now have a concurrency limit to prevent them from consuming too many resources. By default, the limit is set to Go's CPU core count (`GOMAXPROCS`) - but you can adjust this limit to suit your needs.
+
+This can help you to balance the needs between performance and resource consumption.
+
+If you need to change this limit, you can do so by setting the `TOKENIZER_CONCURRENCY_COUNT` environment variable.
+
+:::info Related resources
+- [References: Configuration: Environment variables](/developers/weaviate/config-refs/env-vars)
+- [References: Collection definition: Limit tokenizer concurrency](/developers/weaviate/config-refs/schema#limit-the-number-of-gse-and-kagome-tokenizers)
+:::
+
+### RBAC updates
+
+The engineering team continue to make even more improvements to the role-based access control (RBAC) API to allow further granular control over user permissions. The latest updates include the ability to filter for tenants for `Data` and `Tenant` permissions.
+
+See the [RBAC documentation](/developers/weaviate/configuration/rbac) for more information.
+
+:::info Related resources
+- [How-to configure: Role-based access control](/developers/weaviate/configuration/rbac)
+:::
+
+## Summary
+
+Ready to Get Started?
+
+Enjoy the new features and improvements in Weaviate `1.30`. The release is available open-source as always [on GitHub](https://github.com/weaviate/weaviate/releases/tag/v1.30.0), and will be available for new Sandboxes on [Weaviate Cloud](https://console.weaviate.cloud/) very shortly.
+
+For those of you upgrading a self-hosted version, please check the [migration guide](/developers/weaviate/more-resources/migration#general-upgrade-instructions) for detailed instructions.
+
+It will be available for Serverless clusters on Weaviate Cloud soon as well.
+
+Thanks for reading, see you next time 👋!
diff --git a/blog/2025-04-02-weaviate-1-30-release/index 2.mdx b/blog/2025-04-02-weaviate-1-30-release/index 2.mdx
new file mode 100644
index 0000000000..f976da0400
--- /dev/null
+++ b/blog/2025-04-02-weaviate-1-30-release/index 2.mdx	
@@ -0,0 +1,18 @@
+---
+title: Weaviate 1.30 Release
+slug: weaviate-1-30-release
+authors: [jp]
+date: 2025-04-08
+image: ./img/hero.png
+tags: ['release', 'engineering']
+description: "Read about BlockMax WAND and multi-vector embeddings in GA, API-based user management, RAG improvements, xAI model support, and more!"
+
+---
+
+import Core130 from './_core-1-30-include.mdx';
+
+<Core130 />
+
+import WhatsNext from '/_includes/what-next.mdx'
+
+<WhatsNext />
diff --git a/blog/2025-04-09-late-interaction-overview/index 2.mdx b/blog/2025-04-09-late-interaction-overview/index 2.mdx
new file mode 100644
index 0000000000..b95e4652ff
--- /dev/null
+++ b/blog/2025-04-09-late-interaction-overview/index 2.mdx	
@@ -0,0 +1,315 @@
+---
+title: 'An Overview of Late Interaction Retrieval Models: ColBERT, ColPali, and ColQwen'
+slug: late-interaction-overview
+authors: [leonie, danny, victoria]
+date: 2025-04-09
+tags: ['concepts', 'search']
+image: ./img/hero.png
+description: "Late interaction allow for semantically rich interactions that enable a precise retrieval process across different modalities of unstructured data, including text and images."
+---
+![An Overview of Late interaction retrieval models](./img/hero.png)
+
+import maxsim from './img/maxsim-animation.mp4';
+
+
+## Why Late Interaction?
+
+Late interaction retrieval models are changing how we find and retrieve information. These models allow for semantically rich interactions that enable a precise retrieval process across different modalities of unstructured data, including text and images. By decomposing query and document embeddings into lower-level representations, late interaction models use multi-vector retrieval and improve both accuracy and scalability compared to no-interaction and full-interaction dense retrieval models.
+
+Late interaction models can be used across various retrieval tasks, helping researchers and practitioners find relevant documents in large document collections, such as financial documents or legal contracts. These models are particularly helpful in downstream tasks requiring efficient retrieval processes, such as search engines or Generative AI applications, such as [Retrieval-Augmented Generation (RAG) pipelines](https://weaviate.io/blog/introduction-to-rag), where semantic precision can improve information extraction and understanding.
+
+## 3 Types of Interaction in Dense Retrieval Models
+
+Dense retrieval models can be categorized based on their type of “interaction”.  A dense retrieval model is a model that uses some type of neural network architecture to retrieve relevant documents for a search query. In this context, “interaction” refers to the process of assessing how well a document matches a given search query by comparing their representations. This section discusses the different types of interactions in dense retrieval models.
+
+### No-interaction Retrieval Models
+
+Traditional methods for retrieval commonly use “no-interaction” retrieval models. In this case, the search query and documents are processed **separately**:
+
+1. During the indexing phase, one [dense vector embedding](https://weaviate.io/blog/vector-embeddings-explained) is precomputed for each document. The document is first split up into separate tokens, all tokens are passed together through a neural network model, typically a transformer model, to obtain separate dense embeddings for each token. Finally, the token embeddings are calculated into a single vector representation via pooling.
+2. During the online querying phase, one vector is computed for the search query to [search](https://weaviate.io/blog/vector-search-explained) for similar documents from the pre-computed document embeddings with a similarity metric (e.g., dot product, cosine similarity).
+
+**Advantages** of no-interaction retrieval models are primarily that they are *fast* and *computationally efficient*. The vector representations of the documents are already pre-computed and indexed offline, and at query time, only the query embeddings have to be computed. This makes this solution scalable for large-scale information retrieval systems and vector search applications.
+
+**Disadvantages** of no-interaction retrieval models lie in the lack of interaction between the search query and the documents. Because the search query and the documents are processed separately and compressed into single vector embeddings, this approach can miss contextual nuances. Any contextual nuances shared from one token to another have to be compressed before the comparison takes place - meaning similarity is compared between the whole document and the whole query, rather than on a token-by-token level.
+
+These characteristics make no-interaction models great for first-stage retrieval, where candidates are retrieved from a large document collection, like in vector search. Examples of no-interaction retrieval models are bi-encoders, such as OpenAI’s `text-embedding-3-small`.
+
+![No Interaction](./img/no-interaction.jpg)
+
+### Full interaction Retrieval Models
+
+Full interaction retrieval models process the search query and documents together. That’s why they are sometimes also called **early interaction** or **all-to-all interaction** retrieval models. In contrast to no-interaction models, all-to-all interaction models process everything at query time. For this, the query is concatenated with each document, and then they are processed together for full cross-attention.
+
+**Advantages** of full interaction retrieval models stem from them being *contextually rich*, meaning they effectively capture nuanced contextual relationships between the search query and the document. 
+
+**Disadvantages** of full interaction models are due to them being *extremely computationally expensive* because all documents have to be processed at query time. That’s why full interaction models are not scalable for large amounts of documents. 
+
+These characteristics make full interaction models great for second-stage retrieval, like reranking a curated set of candidate documents. Examples of full interaction retrieval models are BERT and other models with cross-encoder architectures, which are often popular in reranking models.
+
+![Full Interaction](./img/full-interaction.jpg)
+
+### Late interaction retrieval models
+
+Whilst no-interaction models are fast but can be inaccurate, all-to-all interaction models are accurate but slow, late interaction retrieval models promise to be both. Late interaction models are inspired by both of the above retrieval models.
+
+Similar to no-interaction retrieval models, late interaction models have a dual encoder architecture and precompute embeddings for each document offline (in advance of the interaction stage). However, in late interaction, the embeddings that are stored are for *each token*, rather than a single embedding for the whole document. Instead of pooling these token embeddings,  the pooling mechanism is replaced with the late interaction mechanism. That means that the token-level multi-vector embeddings are kept to calculate the interaction between the search query and document multi-vector representations, similar to full interaction retrieval models, but at a later stage.
+
+**Advantages** of late interaction retrieval models are that they are both *scalable* and *contextually rich*. 
+
+**Disadvantages** of late interaction are related to their storage requirements - they require an embedding for each token, which requires a lot more storage for a complete set of vectors. 
+
+However, token embeddings can be stored at a much lower dimension and quantization level to decrease computational costs and increase storage efficiency - mitigating some of these issues. For example, in ColBERTv2, aggressive quantization is performed to reduce the size of each vector from 256 bytes to 36 bytes (2-bit compression) or even 20 bytes (1-bit compression), whilst maintaining a good level of accuracy ([see the ColBERTv2 section later](#colbert-v2-and-its-improvements)).
+
+Examples of late interaction retrieval models are ColBERT, ColPali, and ColQwen, which are discussed in the following sections.
+
+![Late Interaction](./img/late-interaction.jpg)
+
+## How does the late interaction mechanism work?
+
+The late interaction mechanism calculates the maximum similarity score between the query tokens and the document tokens. It *requires* lower-level representations of input documents or images, such as their tokens (word parts) or image patches (small image/document sections), and uses these for comparison instead of a global document representation. Late interaction was first introduced in the [ColBERT paper](https://arxiv.org/abs/2004.12832).
+
+![Late Interaction Formula](./img/late-interaction-formula.jpg)
+
+Let’s say we have a text query and text documents, for simplicity of explanation. Instead of pooling the vector embeddings into a single vector representation for the query and the document, we keep the token-level embeddings and apply the MaxSim operator to calculate the relevance scores:
+
+1. For **each query token**, we compute its [similarity score (e.g., dot product or cosine similarity)](https://weaviate.io/blog/distance-metrics-in-vector-search) with **every document token**.
+2. Then, for **each query token**, we keep only the maximum similarity score (e.g., maximum dot product or maximum cosine similarity).
+3. Finally, we **sum all of those token-level maximum similarity scores** into a final relevance score.
+
+<figure>
+  <video width="100%" autoplay loop muted controls>
+    <source src={maxsim} type="video/mp4" />
+    Your browser does not support the video tag.
+  </video>
+</figure>
+
+## What is ColBERT?
+
+The [ColBERT paper](https://arxiv.org/abs/2004.12832), released in 2020, introduced both the original ColBERT model and the late interaction mechanism. ColBERT stands for “Contextualized Late Interaction over BERT”. It is a model fine-tuned on the BERT language model, which was one of the first big transformer models in NLP. ColBERT however uses the late interaction mechanism instead of the typical all-to-all interaction that BERT uses, and is specialized towards generating contextualized embeddings for queries and documents. Since BERT is a text-only model (not multimodal), ColBERT is also a text-only model.
+
+Fun fact: It’s pronounced /koʊlˈbɛər after Stephen Colbert in reference to his Late Show.
+
+### ColBERT Model Architecture
+
+Since ColBERT is a fine-tune of BERT, its architecture is almost identical to BERT. BERT-base was trained with 110M parameters (340M for BERT-large), and different variations of ColBERT were trained from BERT-base and BERT-large on different training datasets. The [version of ColBERT you’ll find on Hugging Face](https://huggingface.co/colbert-ir/colbertv2.0) is fine-tuned from BERT-base and has around 110M parameters.
+
+However, there are a few differences between ColBERT and BERT. Firstly, BERT-base used a fixed embedding dimension of 768 (1024 for BERT-large), whereas ColBERT uses a smaller representation dimension of 128 for each vector in an effort to reduce the storage requirements of each vector. This is achieved via a projection layer in ColBERT to make the final output a 128-dimensional vector. 
+
+The same fine-tuned BERT model is used in ColBERT as both the query encoder and the document encoder separately, but two additional special tokens, [Q] and [D], are prepended to the query and document respectively to 'mark' them as such. This marker is included in model training, so that the model learns to distinguish the query and document via these special tokens.
+
+A full forward pass of ColBERT is given in order by:
+1. Tokenize the query/document (and include special token [Q] or [D])
+2. Pass the tokens through a 110M fine-tuned BERT-base transformer model
+3. Project the final BERT representations with a special projection layer to reduce the embedding dimension to 128 instead of 768
+4. Compare all tokens between query and document using late interaction and MaxSim.
+
+![ColBERT](./img/colbert.jpg)
+
+### ColBERT v2 and its improvements
+
+Whilst ColBERT introduced the concept of late interaction, ColBERTv2 refined it. The main downside of ColBERT is the storage requirements - needing a full vector for *each* token in *each* document. ColBERTv2 improves over the original ColBERT’s weakness of huge memory storage requirements, whilst also improving the quality of retrieval. 
+
+The increased quality of ColBERTv2 comes from a process called **denoised supervision**, which involves distillation (passing on knowledge) from a larger teacher model and hard negative mining (showing similar but incorrect retrievals to a given query). 
+
+Distillation was from the teacher model, [MiniLM](https://arxiv.org/pdf/2002.10957). This cross-encoder model uses all-to-all interaction, like BERT, which means it has a high accuracy but is more inefficient to use in practice. Distillation means that this model can be used to teach ColBERTv2, whilst ColBERTv2 retains its increased computational efficiency thanks to late interaction.
+
+Hard negative mining involves framing the retrieval with documents that are similar, but not relevant to, the query, so the model learns to distinguish between the correct document, and documents similar to this correct one. This reduces the number of false negatives ColBERTv2 will retrieve.
+
+The improved storage efficiency of ColBERTv2 comes from **residual compression.** Residual compression is a technique which stores ‘centroids’ in high precision (more accurate vectors with high storage cost), and ‘residual’ vectors in low precision (less accurate vectors with lower storage cost). Each token embedding is a combination of a centroid and its own, unique, residual vector. These vector centroids take up more space but there are fewer of them; according to the ColBERTv2 paper, they recommend using
+
+$$
+\lfloor16 \times \sqrt{n_{docs} \times d_{embedding}}\rfloor
+$$
+
+many vectors, which is a value proportional to the number of overall embeddings in the dataset.
+
+Using this residual compression technique reduces the space footprint massively - the MS MARCO training dataset had a 154GB index on ColBERTv1, whereas on COLBERTv2 this technique allowed it to be stored as low as 16GB for 1-bit compression, or 25GB for 2-bit compression. The space footprint from ColBERTv2 was reduced by 6-10x compared to ColBERTv1, and brings this value closer to what a typical single vector model would require for this dataset.
+
+### Advantages and Limitations of ColBERT
+
+One advantage of ColBERT is its impact on *retrieval performance*. Because it’s scalable and contextually rich, it can improve retrieval accuracy and, therefore, overall performance in RAG applications and ranking pipelines compared to typical dense vector embedding models. 
+
+Another positive side-effect of keeping token-level embeddings for similarity scoring in ColBERT is an added feature of *explainability*. Similar to keyword matching, you can get more interpretable insights for your vector search results because you can see which tokens have a high similarity score between the search query and the relevant document.
+
+![Explanability comparison](./img/explanability-comparison.jpg)
+
+However, the multi-vector approach of the original ColBERT model has one big disadvantage of the increased space footprint. Because with ColBERT you now have one vector embedding for *each token* in a document instead of *one* vector embedding for one document, the storage requirements and memory usage balloon up, but this can be mitigated somewhat with the quantization methods introduced by ColBERTv2.
+
+Let’s compare the storage requirements of dense vector embeddings and multi-vector embeddings. Assume that each document consists of 100 tokens, and we are storing a variable number of documents. The table below gives the storage cost, in megabytes, to store all of the documents as their respective embeddings.
+<table>
+<caption style={{ captionSide: 'bottom', textAlign: 'center' }}><i>The storage costs calculated assume that each value in the single vector embedding is quantized slightly at 16-bits per dimension, and the multi-vector embedding is heavily quantized to 2-bits per dimension, with the centroids assumed as 16-bit precision, and the number of centroids is given by the formula in the previous section.</i></caption>
+    <tr class="header-row">
+        <th colspan="2"></th>
+        <th colspan="3">Single Vector Embeddings</th>
+        <th colspan="3">Multi Vector Embeddings<br></br>(ColBERTv2 quantization)</th>
+    </tr>
+    <tr>
+        <th colspan="2"></th>
+        <th class="bold">d=768</th>
+        <th class="bold">d=1024</th>
+        <th class="bold">d=1536</th>
+        <th class="bold">d=64</th>
+        <th class="bold">d=96</th>
+        <th class="bold">d=128</th>
+    </tr>
+    <tr>
+        <th rowspan="5">Number of Documents (n)<br></br>(100 tokens per document)</th>
+        <th class="bold">n=100</th>
+        <td>0.15 MB</td>
+        <td>0.20 MB</td>
+        <td>0.31 MB</td>
+        <td>0.36 MB</td>
+        <td>0.55 MB</td>
+        <td>0.73 MB</td>
+    </tr>
+    <tr>
+        <th class="bold">n=500</th>
+        <td>0.77 MB</td>
+        <td>1.02 MB</td>
+        <td>1.54 MB</td>
+        <td>1.26 MB</td>
+        <td>1.89 MB</td>
+        <td>2.52 MB</td>
+    </tr>
+    <tr>
+        <th class="bold">n=1000</th>
+        <td>1.54 MB</td>
+        <td>2.05 MB</td>
+        <td>3.07 MB</td>
+        <td>2.25 MB</td>
+        <td>3.37 MB</td>
+        <td>4.50 MB</td>
+    </tr>
+    <tr>
+        <th class="bold">n=2000</th>
+        <td>3.07 MB</td>
+        <td>4.10 MB</td>
+        <td>6.14 MB</td>
+        <td>4.12 MB</td>
+        <td>6.17 MB</td>
+        <td>8.23 MB</td>
+    </tr>
+    <tr>
+        <th class="bold">n=10000</th>
+        <td>15.36 MB</td>
+        <td>20.48 MB</td>
+        <td>30.72 MB</td>
+        <td>18.05 MB</td>
+        <td>27.07 MB</td>
+        <td>36.10 MB</td>
+    </tr>
+</table>
+
+ Even with heavy quantization, storing 128-dimensional vectors with ColBERTv2 costs significantly more than storing a high-dimensional single vector for the entire document/chunk. As the size of the documents and the number of tokens scale up, the storage cost will also balloon, which could be unsustainable for some scenarios. However, the increased contextual richness and retrieval performance may well be worth the extra storage.
+
+
+![Single Vector vs Multi Vector](./img/single_vector_vs_multi_vector.png)
+
+### Use Cases of ColBERT
+
+Because of its improved correlation of textual nuances, ColBERT is great for retrieval tasks that require a higher level of accuracy, like legal text-based RAG pipelines or financial document verification. 
+
+You can find a RAG code example with ColBERT in Python in [our documentation](https://weaviate.io/developers/weaviate/tutorials/multi-vector-embeddings).
+
+## What are ColPali and ColQwen?
+
+ColPali and ColQwen are multimodal late interaction retrieval models that use vision language models (VLMs) instead of text-only models. ColPali stands for Contextualized Late Interaction over PaliGemma, while ColQwen stands for Contextualized Late Interaction over Qwen2 (that’s why it’s also called ColQwen2). 
+
+Unlike conventional approaches to PDF retrieval that require Optical Character Recognition (OCR) and advanced chunking to separate different content formats (i.e textual content and visual content, like graphs or images) when doing complex document processing, ColPali and ColQwen treat the entire PDF as an image, and thus these models extend ColBERT's concept to visual content. 
+
+![Multimodal Late Interaction](./img/multimodal-late-interaction.jpg)
+
+ColPali and ColQwen process complex documents as images through a similar pipeline:
+
+1. **Document Image Transformation**: The entire document is treated as an image rather than separating textual content and visual content, such as text, tables, and graphics
+2. **Patch Generation**: The document image is divided into a series of uniform image patches of predefined dimensions
+3. **Vision Encoding**: Each image patch is processed by a vision encoder model, transforming visual information into initial image embeddings
+4. **Contextualization**: The patch embeddings are passed to a VLM (PaliGemma in the case of ColPali, Qwen2 in the case of ColQwen), which contextualizes them within the document's overall structure
+5. **Vector Space Projection**: The contextualized patch embeddings are mapped into a text vector space, creating representations in the same search space that maintain awareness of surrounding patches
+6. **Query Processing**: User queries are embedded by the language model component to create token-level embeddings in the same vector space as the document patches
+7. **Late Interaction Matching**: Query tokens are matched to document patches using a MaxSim operator
+
+This multimodal late interaction approach preserves contextual relationships between patches, similar to how tokens in ColBERT have context within their entire sentence. So when examining specific query terms, the models can identify and highlight the most relevant visual components and image patches in the document.
+
+### ColPali Model Architecture
+
+[ColPali](https://arxiv.org/abs/2407.01449) was created in a similar way to ColBERT. What BERT is to ColBERT, PaliGemma is to ColPali. Essentially, instead of using BERT as the query and document encoder, ColPali uses the PaliGemma Vision LLM, which is separated into two parts for the query and document. Since queries will always be text-only, it uses Gemma (the language-only part of PaliGemma) as the query encoder and PaliGemma (the full vision model) as the vision encoder. Just like ColBERT, ColPali stores the pre-computed document vectors offline, and uses late interaction to compare to online embedded queries at runtime. ColPali has:
+
+- ~3B parameters
+- 128 embedding dimension size
+- max 1024 patch size
+
+![ColPali](./img/colpali.jpg)
+
+### ColQwen Model Architecture
+
+[ColQwen](https://huggingface.co/vidore/colqwen2-v0.1) is extremely similar to ColPali, with a different vision language model, Qwen2 Vision LLM, as the encoder. ColQwen uses Qwen2 LM as the query encoder and Qwen2-VL as the vision encoder. Whilst having a similar architecture to ColPali, the details are different due to using a completely different encoder model:
+
+- ~2B parameters
+- 128 embedding dimension
+- max 768 patch size
+
+Additionally, ColQwen’s vision language backbone model is licensed under a permissive `apache2.0` license. 
+
+![ColQwen](./img/colqwen.jpg)
+
+### Advantages and Limitations of ColPali and ColQwen
+
+The advantages of multimodal late interaction retrieval models like ColPali and ColQwen are that they allow for RAG applications without a complex preprocessing or chunking strategy for the PDF documents. Both text and images in PDF documents can be simply treated together as 'screenshots', which the vision models in ColPali and ColQwen can see and process. This also means figures in documents can be placed contextually with the text of the document, and these documents are processed similarly to how a human would read them.
+
+But while ColPali and ColQwen both simplify the normally complex pipeline for PDF document processing, they also requires more resources to store all individual image patch embeddings, even with compression mechanisms, similar to ColBERT.
+
+### Use Cases of ColPali and ColQwen
+
+Because of their multimodal capabilities ColPali and ColQwen are great for retrieval pipelines that process complex documents like PDFs, containing different components like text, images, graphs, tables, and formulas.
+
+You can find a multimodal RAG over PDF documents code example with ColPali in Python in [our recipes repository on GitHub](https://github.com/weaviate/recipes/blob/main/weaviate-features/multi-vector/multi-vector-colipali-rag.ipynb).
+
+## Summary
+
+This article discussed the late interaction mechanism and its retrieval models, such as ColBERT, ColPali, and ColQwen. In contrast to no-interaction models and full-interaction models, which are either effective or scalable, late interaction models are fast and effective retrieval models. That’s why they are a great fit for applications that require an effective and efficient retrieval process.
+
+The added multi-modality in ColPali and ColQwen allows us to build RAG pipelines over complex documents, such as PDF files, by converting them into images. Although late interaction retrieval models require more memory because of the multi-vector embeddings, they also offer a richer contextual understanding of both text and image files. That’s why multimodal late interaction retrieval models are especially useful in multimodal rag pipelines for processing complex documents, such as PDF files.
+
+If you want to start playing around with the discussed late interaction models, you can find code examples here:
+
+- RAG code example with ColBERT in [our documentation](https://weaviate.io/developers/weaviate/tutorials/multi-vector-embeddings).
+- Multimodal RAG over PDF documents code example with ColPali in Python in [our recipes repository on GitHub](https://github.com/weaviate/recipes/blob/main/weaviate-features/multi-vector/multi-vector-colipali-rag.ipynb).
+
+## FAQ
+
+### What’s the difference between ColBERT and dense vector methods?
+
+The difference between ColBERT and dense vector methods is that these traditional dense methods pool token-wise embeddings into a single representation while ColBERT embeddings keep the token-wise representations in a multi-vector embedding. It would be inefficient to keep token-wise representations from traditional dense embedding models because they are not trained for this purpose, whereas multi-vector embedding models like ColBERT are. The benefit of keeping the token-wise representation is that it allows for a more detailed understanding of the semantic similarity between the query and the search results. Additionally, there’s no loss of context as embeddings remain at the token level.
+
+### What’s the difference between BERT vs. ColBERT?
+
+The difference between BERT and ColBERT is that the offline computation of document embeddings allows ColBERT to be faster at run-time compared to BERT. BERT requires embedding queries and documents together in order to provide a comparison, whilst ColBERT has the document embeddings pre-computed and stored.
+
+On a side note, ColBERT uses a fine-tuned version of BERT as encoders for both the query and the documents.
+
+### What’s the difference between ColBERT vs. ColBERTv2?
+
+The difference between ColBERT and ColBERTv2 is the improved storage efficiency and higher retrieval effectiveness in the newer model. Any multi-vector model suffers from high storage costs due to the requirement of storing multiple vectors per document, which scales highly with the number of tokens in a document. ColBERTv2 alleviates the storage issues by heavily quantizing the token vector embeddings, and storing a subset of ‘nearby’ centroid vectors in higher precision. Combining the centroids with the quantized vectors gives a very good approximation to the true token embedding whilst minimizing the storage costs of each vector.
+
+Additionally, ColBERTv2 gains strong retrieval performance over ColBERT by distilling knowledge from a larger, more impractically sized model (MiniLM), as well as by training the model to learn to distinguish between *similar* *but* *incorrect* documents and the *correct* documents for a given query.
+
+### What’s the difference between ColBERT vs. ColPali and ColQwen?
+
+The difference between ColBERT vs. ColPali and ColQwen lies in the modalities these retrieval models are able to handle. Because ColBERT uses BERT, it is able to handle only text. In contrast, ColPali and ColQwen use VLMs and are therefore multimodal and can handle both text and images. While the search query for all models is a text input, the type of documents to search over is different between the models. ColBERT processes documents in text format, while ColPali and ColQwen process images, including PDF documents treated as images.
+
+### What’s the difference between ColPali vs. ColQwen?
+
+The main difference between ColPali and ColQwen is that ColQwen uses the Qwen vision LM, whereas ColPali uses the PaliGemma VLM (and Gemma LM for embeddings). Additionally, the Qwen2 VLM has 1 billion parameters fewer than the PaliGemma VLM, making ColQwen a smaller model. ColQwen also has a smaller image patch size, which can result in lower storage and computational costs. 
+
+## Resources
+
+- ColBERT paper: [https://arxiv.org/abs/2004.12832](https://arxiv.org/abs/2004.12832)
+- ColBERT v2 paper: [https://arxiv.org/abs/2112.01488](https://arxiv.org/abs/2112.01488)
+- ColPali paper: [https://arxiv.org/abs/2407.01449](https://t.co/Xw8djk7bSg)
+- PaliGemma paper: [https://arxiv.org/pdf/2407.07726](https://arxiv.org/pdf/2407.07726)
+
+import WhatsNext from '/_includes/what-next.mdx'
+
+<WhatsNext />
\ No newline at end of file
diff --git a/blog/2025-04-15-personalization-agent/index 2.mdx b/blog/2025-04-15-personalization-agent/index 2.mdx
new file mode 100644
index 0000000000..440cc071a3
--- /dev/null
+++ b/blog/2025-04-15-personalization-agent/index 2.mdx	
@@ -0,0 +1,283 @@
+---
+title: 'Introducing the Weaviate Personalization Agent'
+slug: personalization-agent
+authors: [charles-pierse, tuana, alvin]
+date: 2025-04-15
+tags: ['concepts', 'agents', 'release']
+image: ./img/hero.png
+description: "Learn about how you can use our new agentic personalization service to provide user-catered recommendations from Weaviate collections."
+---
+![Introducing the Weaviate Personalization Agent](./img/hero.png)
+
+Over the past few weeks, we’ve brought to you the [`QueryAgent`](/blog/query-agent), the [`TransformationAgent`](/blog/transformation-agent), and today, we’re back with our latest addition to the Weaviate Agents: the `PersonalizationAgent`.
+
+This new service that we’re releasing for all [Weaviate Serverless Cloud](/deployment/serverless) and Sandbox users to preview today is only the beginning of our agentic services geared to retrieve personalized objects from Weaviate collections based on custom instructions, user profiles and their past interactions with your products.
+
+In this blog, we’ll walk through this preview release of the `PersonalizationAgent`, how you can get started by creating your own, as well as what our vision is for the future of this service
+
+:::note 
+This blog comes with two accompanying recipes to help you get started, one of which we will look into in detail here: the ['food recommender' agent recipe](https://colab.research.google.com/github/weaviate/recipes/blob/main/weaviate-services/agents/personalization-agent-get-started-recipes.ipynb) and the [‘movie recommender’ agent recipe](https://colab.research.google.com/github/weaviate/recipes/blob/main/weaviate-services/agents/personalization-agent-get-started-movies.ipynb). In this blog, we’ll focus on the food recommender.
+:::
+
+## What is the Personalization Agent
+
+To put it simply, you can think of this first release of the `PersonalizationAgent` as an agentic ranking service that takes into account a users “persona” and past “interactions” while returning the most relevant objects to them from your collection.
+
+For many applications, delivering personalized search is critical to ensure that results are evolving over time to support ever changing user interests. For example, if you’re a clothing brand and want to help users find exactly what they’re looking for, in their style. Or, if you host a website full of recipes and you’d like to surface recipes that a user _might_ prefer over others to the top. The Weaviate `PersonalizationAgent` is a service that aims to do just this for any given Weaviate collection. And it does this by employing a few new approaches:
+
+1.  By introducing a new class called `Persona`, which essentially represents an end-user and optionally any additional metadata about them that may be useful.
+2.  By introducing `PersonaInteraction` alongside it, which represents positive or negative weighted interactions with a given object in your collection.
+3.  Finally and possibly most importantly, by creating a sister collection in the background, where we can store these personas and persona interactions.
+
+![Personalization Agent](img/personalization-agent.png)
+
+Using all of this extra information about our end-users, the `PersonalizationAgent` will employ both classic ML methods and LLMs, to fetch and rank objects from our collection, specifically catered for an individual user.
+
+That’s a lot, so let’s break it down.
+
+### Personas
+
+A persona is essentially what it says on the tin, a representation of a person. However, what kind of personal information we need, or what might be valuable information may differ depending on the application. So, for the `PersonalizationAgent` we’ve provided a helper class that can be initialized based on any persona property that you need.
+
+For example, if we were to create an application centered around recipes, we may want to have a persona that lists their likes, dislikes and maybe their favorite cuisines. But, if we’re creating a movie recommendation service, maybe we need our personas to include ages, favorite genres, and top 3 favorite movies.
+
+Here, it is completely up to us (who are creating an agentic personalization service for end-users), to decide on what specific (non-changing) information is most useful to include about each user.
+
+A `PersonalizationAgent` is initialized with a blueprint of what properties we want in each new user (persona) we introduce to the agent. That blueprint comes in the form of a dictionary indicating property name, and datatype:
+
+```python
+user_properties = {
+    "favorite_cuisines": DataType.TEXT_ARRAY,
+    "likes": DataType.TEXT_ARRAY,
+    "dislikes": DataType.TEXT_ARRAY,
+}
+
+```
+
+Once we know what those properties are, we can add as many users to our agent as needed. For each user, we initialize a `Persona` with a unique ID, and provide the requested properties.
+
+```python
+Persona(persona_id=persona_id,
+        properties={
+            "favorite_cuisines": ["Italian", "Thai"],
+            "likes": ["chocolate", "salmon", "pasta", "most veggies"],
+            "dislikes": ["okra", "mushroom"],
+        })
+
+```
+
+### Persona Interactions
+
+Once we have a Persona for a given user, we can technically already start to provide them with personalized data simply based on their profile. However, the agent is also able to take into account any past interactions the user may have had with the objects in our collection.
+
+An interaction in this case is a simple class that represents positive or negative sentiment to any given object, by any given persona. Each interaction has a `weight` that can be set to anything between -1.0 to 1.0. For example, 1.0 might be the weight given for an object marked as a users ‘favorite’, while 0.8 might be for something they ‘liked’ and -0.5 for something they disliked etc.
+
+Imagine a ‘like’ and ‘dislike’ button for each entry on a webpage. When a user hits ‘like’, this may mean its time to add a new interaction with weight 1 for that specific object in the Weaviate collection.
+
+For example, a positive interaction by `persona_a` for `item_a` is:
+
+```python
+PersonaInteraction(persona_id=persona_a, item_id=item_a, weight=1)
+
+```
+
+As we create these personas and interactions, the `PersonalizationAgent` will start storing them in a separate “collection_interactions” collection.
+
+The `PersonalizationAgent` will vectorize all persona and interaction data as it comes in. When the agent is asked to provide personalized rankings for a user, it will not only use the vectors for the actual objects in the original collection, but also the vectors for personas and interactions from the second agent created collection. But, more on how personalized rankings are actually made in the next section.
+
+### Personalized Ranking and Filtering
+
+Personalized rankings of objects are based on a mixture of both classic ML clustering based on nearest vectors, and a generative LLM with the `PersonalizationAgent`. Based on a persona and their interactions, Weaviate will retrieve the nearest objects based on positive and negative user preferences. We can then choose to base our recommendations to the user solely based on these ‘clusters’ of user preferences. Or, and possibly more interestingly (and more ‘agentically’) - we can also employ a method where we _first_ fetch recommended objects based on these clusters, followed by using an LLM to re-rank them based on all the information it’s given about the user. We will see how to do this in the following section where we will create our very own `PersonalizationAgent`.
+
+### Our Vision for the Future of Personalization Agents
+
+While today’s release is ready for all Weaviate Serverless Cloud and Sandbox users to preview, it’s just a start. This release, as you will also see below, is centered around getting the relevant objects for the user, from a collection. Soon, we will also add personalized Weaviate [queries](/developers/weaviate/search), which will allow us to incorporate all of the personalization above into any of the Weaviate collection queries. Another update we hope to make alongside this will be to allow these agents to be used alongside and with the [`QueryAgent`](/blog/query-agent), allowing us to create a multi agent Weaviate collection search system that can also provide personalized responses for some collections.
+
+## Creating Personalization Agents
+
+:::note 
+👩‍🍳 For this announcement, we’ve also released an accompanying ‘food recommender’ agent [recipe](https://colab.research.google.com/github/weaviate/recipes/blob/main/weaviate-services/agents/personalization-agent-get-started-recipes.ipynb) to help you get started. For questions or feedback, you can [join the ‘Agents’ topic in the Weaviate Forum](https://forum.weaviate.io/c/agents/10).
+:::
+
+To get started, we need to install the Weaviate Python client. The `PersoanlizationAgent` (as well as the `QueryAgent` and `TransformationAgent`) is now ready to preview via the Weaviate Python client, for all Weaviate Serverless Cloud users (including free Sandboxes).
+
+```bash
+pip install weaviate-client[agents]
+```
+
+As an example scenario, we’ve prepared two recipes: one with which you can create a food recommender service based on users’ likes, dislikes and favorite cuisines, another which has you create a movie recommender service based on a users age, favorite genres etc.
+
+Let’s take a look at the food recommender service here. To get started, we need a collection. In this case, let’s imagine a collection called “Recipes” which lists names of dishes and short descriptions about each dish, as well as the cuisine that the dish belongs to.
+
+A `PersonalizationAgent` is initialized with the following:
+
+-   The name of the `reference_collection` which the agent will return personalized recommendations for.
+-   (Optionally) `user_properties` which lists the names and datatypes of properties we might want to know about each user.
+
+In this case, we initialize the following agent:
+
+```python
+from weaviate.agents.personalization import PersonalizationAgent
+
+agent = PersonalizationAgent.create(client=client,
+									reference_collection="Recipes",
+									user_properties={"favorite_cuisines": DataType.TEXT_ARRAY,
+													 "likes": DataType.TEXT_ARRAY,
+													 "dislikes": DataType.TEXT_ARRAY
+													 },
+								    )
+
+```
+
+Think of `user_properties` as a blueprint of the information that may be useful to the agent about each user (persona) when returning personalized objects from the collection.
+
+In the case of our food recommender service, we’ve gone for “favorite_cuisines”, “likes” and “dislikes”. Soon, when our agent is able to also filter on top of returning rankings, we may also like to include “intolerances” for this type of recommender service for example.
+
+### Creating Personas and Adding New Users
+
+Once we have our agent and a blueprint of what info we need on each individual, we can start adding users to our agent.
+
+For the `PersonalizationAgent`, we add new users with the `Persona` class, which is initialized with a unique `persona_id` and the `properties` that our agent expect, defined when initializing it with `user_properties`. Let’s imagine Jane Doe, she likes salmon, but hates mushroom. Her `Persona` may be initialized as the following:
+
+```python
+from weaviate.agents.classes import Persona
+
+jane_doe = Persona(persona_id=jane_doe_id,
+				   properties={"favorite_cuisines": ["Italian", "Thai"],
+					           "likes": ["chocolate", "salmon", "pasta", "most veggies"],
+					           "dislikes": ["okra", "mushroom"],
+					         },
+				  )
+```
+
+Once we have a new Persona, we add it to the agent with `add_persona`:
+
+```python
+agent.add_persona(jane_doe)
+```
+
+### The Interactions Collection & Adding New Interactions
+
+One key thing that the `PersonalizationAgent` does in the background is that once initialized, it creates a “sister collection” for the reference collection. We call this the persona interactions collection. For our food recommender service, since the reference collection is called “Recipes”, our interactions collection will be called “Recipes_persona_interactions”.
+
+As we start adding new personas, the agent will start inserting (and vectorizing) objects that represent the persona to this interactions collection. For example notice the screenshot of the object below that represents our very own `jane_doe`:You may also notice that there are two other properties as well: `positive` and `negative`. This is where the agent will start to insert the personas interactions on specific objects in the regerence collection (Recipes).
+
+Later, when returning objects that the specific user may be interested in, the agent will use not only the reference collection, but also the interactions collection. This way, it can base its ranking on both the profile of the persona, as well as their past interactions (both positive and negative) with the objects in our collection (in this case, with dishes listed in “Recipes”).
+
+For example, let’s assume the following rule for our food recommender service:
+
+```
+1.0: favorite meal
+0.8: user liked the dish
+0.5: user viewed the recipe page
+-0.5: user disliked the dish
+-1.0: user absolutely hated the dish 👎
+
+```
+
+Our `jane_doe` may have the following interactions:
+
+```python
+from weaviate.collections.classes.filters import Filter
+
+reviewed_foods = [
+    "Chicken Tikka Masala",
+    "Matcha Ice Cream",
+    "Fiorentina Steak",
+    "Duck Confit",
+    "Pappardelle with Porcini"
+]
+
+reviews_dict = {
+    recipe.properties["title"]: recipe
+    for recipe in recipes_collection.query.fetch_objects(
+        filters=Filter.by_property("title").contains_any(reviewed_foods), limit=20
+    ).objects
+}
+
+interactions = [
+    PersonaInteraction(
+        persona_id=jane_doe_id, item_id=reviews_dict["Chicken Tikka Masala"].uuid, weight=0.8
+    ),
+    PersonaInteraction(
+        persona_id=jane_doe_id, item_id=reviews_dict["Matcha Ice Cream"].uuid, weight=0.8
+    ),
+    PersonaInteraction(
+        persona_id=jane_doe_id, item_id=reviews_dict["Fiorentina Steak"].uuid, weight=0.8
+    ),
+    PersonaInteraction(
+        persona_id=jane_doe_id, item_id=reviews_dict["Duck Confit"].uuid, weight=1.0
+    ),
+    PersonaInteraction(
+        persona_id=jane_doe_id, item_id=reviews_dict["Pappardelle with Porcini"].uuid, weight=-1.0
+    ),
+
+]
+
+agent.add_interactions(interactions=interactions)
+
+```
+
+It’s worth noting at this point that the more interactions we have for an individual, the more the agent has context about that person. Thus, the personalized ranking gets better as we add more and more interactions.
+
+### Personalized Rankings
+
+The idea behind how the agent returns personalized objects is simple: First, we cluster the objects based on user persona and past interactions using classic ML clustering. The first retrieval of objects actually happens only based on this clustering. So, for a simpler (and cheaper) personalized ranking, we can leave it there.
+
+But next - and optionally - we may choose to get objects using what we call agent ranking. This is where Weaviate will use an LLM to then rerank the objects returned after the first retrieval, based on the additional context provided via the user persona and interactions.
+
+Additionally, and again optionally, we may also provide a custom instruction to our agent. This instruction can describe what kind of personalization and ranking we aim to achieve with our application. This serves as an efficient way of providing yet more context to the agent about the ranking task at hand.
+
+For example, for our food recommender service, we may want to create a service that recommends a diverse set of dishes. We set `use_agent_ranking` to `True` and call `get_objects`:
+
+```python
+response = agent.get_objects(persona_id,
+                             limit=50, 
+                             use_agent_ranking=True,
+                             instruction="""Your task is to recommend a diverse set of dishes to the user 
+                             taking into account their likes and dislikes. It's especially important to avoid their dislikes.""",
+)
+
+```
+
+The response we get from the agent not only includes the re-ranked 50 of the most relevant objects for the user, but also the agents `ranking_rationale` as to why the ranking was done as such. For example, the ranking for our `jane_doe` may look like the following:
+
+```python
+print(response.ranking_rationale)
+for i, obj in enumerate(response.objects[:10]):
+    print(f"*****{i}*****")
+    print(obj.properties["title"])
+    print(obj.properties["description"])
+    print(obj.properties["labels"])
+
+```
+
+```
+We've curated a diverse selection of dishes that avoids your dislikes 
+and highlights your favorite cuisines like Italian and Thai. 
+Look forward to enjoying delicious Italian classics and unique dishes that 
+resonate with your love for pasta and veggies.
+
+*****0*****
+Pizza Margherita
+A simple yet iconic pizza with San Marzano tomatoes, mozzarella di bufala, fresh basil, and extra-virgin olive oil, encapsulating the Neapolitan pizza tradition.
+Italian
+*****1*****
+Frittata di Zucca e Pancetta
+A fluffy egg omelette with sweet potatoes and pancetta, seasoned with herbs and grated cheese, a beloved dish from the heart of Italy.
+Italian
+*****2*****
+Lasagna alla Bolognese
+Layers of flat pasta sheets, rich Bolognese sauce, and béchamel, baked to perfection.
+Italian
+...
+
+```
+
+## Summary
+
+The `PersonalizationAgent` is Weaviate’s newest agentic service that delivers personalized recommendations by combining user profiles, past interactions, and LLM-powered ranking. It introduces personas, interaction tracking, and an agentic pipeline for serving tailored results from any collection. Available now for all Serverless Cloud and Sandbox users—get started with the provided recipe and start building intelligent, user-aware applications today.
+
+import WhatsNext from '/_includes/what-next.mdx'
+
+<WhatsNext />
\ No newline at end of file
diff --git a/blog/2025-04-29-intro-databricks/index 2.mdx b/blog/2025-04-29-intro-databricks/index 2.mdx
new file mode 100644
index 0000000000..ec4a4fccc2
--- /dev/null
+++ b/blog/2025-04-29-intro-databricks/index 2.mdx	
@@ -0,0 +1,253 @@
+---
+title: 'Build Scalable Gen AI Data Pipelines with Weaviate and Databricks'
+slug: genai-apps-with-weaviate-and-databricks
+authors: [erika, prasad]
+date: 2025-04-29
+tags: [how-to, integrations, partnerships]
+image: ./img/hero.png
+description: "Learn how to build generative AI data pipelines at scale with Weaviate and Databricks"
+---
+![Build Scalable Gen AI Data Pipelines with Weaviate and Databricks ](./img/hero.png)
+
+Building generative AI applications at large enterprises requires data pipelines that integrate data from numerous sources and ensure optimized performance. With [Weaviate](https://weaviate.io/), the vector database built for Gen AI applications, and [Databricks](https://www.databricks.com/), an industry-leading data platform, we’ve created a powerful suite of integrations designed to streamline AI workflows and deliver an exceptional developer experience. From calling models hosted on Databricks to efficient data handling, Weaviate provides a tightly integrated, end-to-end solution. 
+
+The Weaviate [Spark Connector](https://github.com/weaviate/spark-connector)—developed in collaboration with our partners at [SmartCat](https://smartcat.io/)—is the latest addition to this suite, enabling seamless data ingestion into Weaviate through the Apache Spark™s DataFrame API. This post will guide you through incorporating our Spark Connector into your Databricks workflows. We’ll start with a brief walkthrough for setting up your Databricks cluster, defining a Weaviate collection, ingesting data, and running your first hybrid and generative search queries.
+
+You can find the notebook in the Weaviate recipe repository [here](https://github.com/weaviate/recipes/blob/main/integrations/data-platforms/databricks/databricks-spark-connector.ipynb).
+
+## Architecture 
+
+The Spark Connector is a jar library that implements Spark’s DataWriter interface. This means that writing data to Weaviate is as simple as calling Spark’s `write` method on a DataFrame. You don’t have to worry about tedious technical details like batching or data partitioning; the Spark runtime and connector handle these efficiently under the hood.
+
+![Databricks Architecture](./img/databricks-architecture.png)
+
+## Installation
+
+To get the demo notebook running on your Databricks cluster, we’ll go over a couple of quick setup steps starting with installing libraries and then setting environment variables. 
+
+### Installing the Spark Connector 
+
+The `spark-connector` jar is available on [Maven Central](https://central.sonatype.com/artifact/org.apache.spark/spark-connect_2.13), and you can add it to your cluster as a cluster scoped library. Use the coordinate `io.weaviate:spark-connector_2.12:1.3.3`. Go to your cluster settings, open the `Libraries` tab, and select `Install New` → `Maven`. Drop in the coordinate, and you’re all set.
+
+![Install the Weaviate Spark Connector](./img/install-spark.png)
+
+### Adding the Weaviate Client
+
+Since we’re working in Python, you’ll also need the weaviate-client package from [PyPI](https://pypi.org/project/weaviate-client/). You can install this through your cluster's Libraries tab, similar to the spark-connector:
+
+![Install the Weaviate Client](./img/install-weaviate.png)
+
+### Setting Environment Variable
+
+For a secure connection, configure the following environment variables as Secrets in Databricks: 
+
+* `DATABRICKS_TOKEN`:  Your Databricks personal access token   
+* `EMBEDDINGS_ENDPOINT` and `LLM_ENDPOINT`: The serving endpoints for your models served through the Databricks Foundation Model API—one for Embedding task and one for Chat task.   
+* `WEAVIATE_URL` and `WEAVIATE_API_KEY`:  For connecting to your Weaviate instance, especially if using Weaviate Cloud.
+
+## Connect to your Weaviate Cluster
+
+For this demo, we’ll connect to our [Weaviate Cloud](https://console.weaviate.cloud/) cluster and use the Databricks module to vectorize our data and connect to a language model.
+
+```python
+client = weaviate.connect_to_wcs(
+    cluster_url=WEAVIATE_URL,
+    auth_credentials=weaviate.auth.AuthApiKey(WEAVIATE_API_KEY),
+      headers={ "X-Databricks-Token": DATABRICKS_TOKEN}
+)
+
+print(client.is_ready())
+```
+
+## Preparing the Dataset to Ingest 
+
+To keep things simple, we’ll use a sample dataset readily available from the Databricks Marketplace. We are using a dataset which provides a variety of products on Amazon.
+
+`bright_data_amazon_best_seller_products_reviews_products_dataset`
+
+For this tutorial, we’ll focus on just two columns: 
+
+`title`:  Title of the product
+
+`description`:  Description of the product
+
+```python
+table_name = "bright_data_amazon_best_seller_products_reviews_products_dataset.datasets.amazon_best_seller_products"
+
+df = spark.table(table_name).select("title", "description").limit(10)
+display(df)
+```
+
+To uniquely identify each row, we’ll also add a uuid column, and we’ll keep it light by using only the first rows. 
+
+Here’s a quick code snippet showing how to load and transform the dataset to include a `uuid`:
+
+```python
+def create_uuid(title, description):
+    obj = {
+        "title": title,
+        "description": description
+    }
+    return str(generate_uuid5(obj))
+
+create_uuid_udf = udf(create_uuid, StringType())
+
+df = df.withColumn("uuid", create_uuid_udf(df["title"], df["description"]))\
+        .select("uuid", "title", "description")
+
+display(df)
+```
+
+With this simple transformation, our data is prepped and ready for ingestion into Weaviate. 
+
+## Creating the Weaviate Collection 
+
+We need to create a Weaviate collection that matches the DataFrame we just defined. To do this, we define the following: 
+
+```python
+if client.collections.exists("AmazonProducts"):
+    client.collections.delete("AmazonProducts")
+
+collection = client.collections.create(
+    name="AmazonProducts",
+    vectorizer_config=[
+        wvcc.Configure.NamedVectors.text2vec_databricks(
+            name="default", 
+            endpoint=EMBEDDINGS_ENDPOINT
+        )
+    ],
+    generative_config=wvcc.Configure.Generative.databricks(
+        endpoint=LLM_ENDPOINT, 
+        max_tokens=8092
+    ),
+    properties=[
+        wvcc.Property(
+            name="title",
+            data_type=wvcc.DataType.TEXT
+        ),
+        wvcc.Property(
+            name="description",
+            data_type=wvcc.DataType.TEXT
+        )
+    ]
+)
+    
+collection = client.collections.get("AmazonProducts")
+```
+
+We’ve created an AmazonProducts collection with two properties, title and description, which aligns with the columns in our DataFrame. Additionally, the collection is using the [Databricks module](/developers/weaviate/model-providers/databricks) to: 
+
+* Vectorize Objects (`vectorizer_config`): The `text2vec_databricks` configuration enables Weaviate to generate embeddings for the Amazon products using the Databricks endpoint specified in `EMBEDDINGS_ENDPOINT`.   
+* Generative Model (`generative_config`): The `generative_databricks` configuration connects to a Databricks hosted large language model at `LLM_ENDPOINT`, to enable generative search.
+
+## Ingesting the Data 
+
+We can now ingest data into our `AmazonProducts` collection:
+
+```python
+df.write.format("io.weaviate.spark.Weaviate") \
+    .option("batchSize", 200) \
+    .option("scheme", "https") \
+    .option("host", WEAVIATE_URL.replace("https://","")) \
+    .option("apiKey", WEAVIATE_API_KEY) \
+    .option("header:X-Databricks-Token", DATABRICKS_TOKEN) \
+    .option("className", "AmazonProducts") \
+    .option("id", "uuid") \
+    .mode("append") \
+    .save()
+```
+
+In this code snippet, the options used are: 
+
+* `batchSize`: Specifies the number of rows ingested per batch for optimal performance   
+* `host`, `apiKey`, `header:X-Databricks-Token`: Connect securely to your Weaviate instance and authenticate with Databricks   
+* `className`: Points to the `AmazonProducts` collection we created earlier   
+* `id`: Tells Weaviate to use the uuid column as the unique identifier for each entry 
+
+## Search Time
+
+With the data now in Weaviate, we’re all set to run a few queries. We’ll first retrieve the `AmazonProducts` collection so we can use it for each query.
+
+```python
+products = client.collections.get("AmazonProducts")
+```
+
+### Hybrid Search
+
+The alpha parameter determines the weight given to the sparse and dense search methods. `alpha = 0` is pure sparse (bm25) search, whereas `alpha = 1` is pure dense (vector) search.
+
+Alpha is an optional parameter. The default is set to `0.75`.
+
+Let’s find products that can be used to stay dry in the rain:
+
+```python
+response = products.query.hybrid(
+    query="stay dry in the rain",
+    query_properties=["title"],
+    alpha=0.7,
+    limit=2
+)
+
+for item in response.objects:
+    print(json.dumps(item.properties, indent=2), "\n")
+```
+
+### Vector Search
+
+Vector search returns the objects with most similar vectors to that of the query. We’ll run a vector search query to find products that are needed in a home.
+
+```python
+response = products.query.near_text(
+    query="home essentials",
+    limit=2
+)
+
+for item in response.objects:
+    print(json.dumps(item.properties, indent=2), "\n")
+```
+
+### Generative Search (RAG)
+
+We can take it a step further and leverage generative search to explain why each product is needed in my home.
+
+```python
+generate_prompt = "Explain why each product is needed in my home."
+
+response = products.generate.near_text(
+    query="home essentials",
+    return_properties=["title", "description"],
+    grouped_task=generate_prompt,
+    limit=2
+)
+
+print(response.generated)
+```
+
+The output is:
+
+```txt
+Here's why each product is needed in your home:
+
+1. **Bare Home 100% Organic Cotton Queen Sheet Set**: This product is needed in your home for a comfortable and healthy sleep. The sheet set is made of 100% organic cotton, which is breathable, soft, and gentle on your skin. It's also free of toxic chemicals, ensuring a safe and healthy sleep environment. Additionally, the sheet set is machine washable and becomes softer with every wash, making it a practical and durable choice for your bedroom.
+
+2. **ESOW Paper Towel Holder with Shelf Storage**: This product is needed in your home for convenience and organization. The paper towel holder with shelf storage provides a compact and space-saving solution for storing paper towels, spices, cleaning supplies, and other essentials in your kitchen, bathroom, or other rooms. The adhesive wall mount design makes it easy to install, and the stainless steel construction ensures durability and rust-resistance. The shelf storage also helps keep your countertops clutter-free, making it easier to clean and maintain your home.
+
+In summary, the sheet set is essential for a comfortable and healthy sleep, while the paper towel holder with shelf storage is necessary for convenience, organization, and decluttering your home. Both products can contribute to a more comfortable, practical, and enjoyable living space.
+```
+
+## Looking Ahead 
+
+The Spark Connector integration is just the first of many planned collaborations with Databricks, aimed at giving Weaviate users direct access to scalable ETL pipelines and adopting data engineering best practices. Here’s a glimpse of what’s coming: 
+
+* **Deploy and Monitor RAG Agents on Databricks**: For those focused on Retrieval-Augmented Generation (RAG) applications, our upcoming integration with the Databricks Mosaic AI Agent Framework will bring the power of observability and monitoring tools, giving you full control and insight into your AI agents directly on the Databricks Data Intelligence Platform.
+
+* **Data Governance with Unity Catalog**: Our future support for Unity Catalog on Databricks will allow users to manage data access, lineage, and permissions for data stored within Weaviate—all directly through the Databricks Platform. With Unity Catalog, you’ll have comprehensive control over your data, ensuring security and compliance across your workflows and teams. 
+
+These planned integrations are designed to make Weaviate and Databricks a powerful, interconnected ecosystem, empowering Weaviate users to incorporate the Databricks Data Intelligence Platform into their Gen AI stack. Stay tuned—there’s much more to come.
+
+Ready to dive deeper? Learn more about [Weaviate’s partnership with Databricks](https://weaviate.io/partners/databricks) and [our integration](https://weaviate.io/developers/integrations/data-platforms/databricks)! 
+
+
+import WhatsNext from '/_includes/what-next.mdx'
+
+<WhatsNext />
\ No newline at end of file
diff --git a/blog/2025-05-27-enterprise-ai-trends/index 2.mdx b/blog/2025-05-27-enterprise-ai-trends/index 2.mdx
new file mode 100644
index 0000000000..05b730f7a9
--- /dev/null
+++ b/blog/2025-05-27-enterprise-ai-trends/index 2.mdx	
@@ -0,0 +1,75 @@
+---
+title: 'The State of Enterprise AI in 2025: Measured Progress Over Hype'
+slug: enterprise-ai-trends-2025
+authors: [byron]
+date: 2025-05-27
+tags: []
+image: ./img/hero.png 
+description: "What trends we see arise in Enterprise AI in 2025."
+---
+
+![The State of Enterprise AI in 2025: Measured Progress Over Hype](./img/hero.png)
+
+Weaviate recently conducted a survey of 250+ technology leaders at enterprises with 1000+ employees. We saw a common pattern: while AI adoption is accelerating, organizations are taking a thoughtful, strategic approach to implementation rather than rushing to adopt every new advancement.
+
+Let’s explore some of our findings and what it means for your business.
+
+## Traditional Search Still Dominates
+
+![Enterprise ai](./img/Enterprise-AI-Trends.png)
+
+Despite the AI hype cycle, 79% of organizations continue to rely on traditional search methods. This reflects a pragmatic approach to technology adoption, driven by several key factors:
+
+* The rapidly evolving AI landscape makes strategic decision making even more complex.  
+* Organizations, especially those that are building AI applications for the first time, are carefully evaluating ROI potential before significant investments.  
+* Many enterprises face skill gaps in AI implementation.
+
+
+This measured pace of adoption is an opportunity for organizations to build sustainable AI strategies. We’ve noticed that organizations who have successfully implemented AI in production often prioritize giving their engineering teams access to developer-first tools and [online communities](https://weaviate.slack.com/).
+
+## Strategic Focus on Internal Implementation
+
+![enterprise ai](./img/Enterprise-AI-Trends-1.png)
+
+Our research shows that 63% of organizations are prioritizing internal AI use cases before developing customer-facing applications. This data aligns with what we hear across conversations with our open source and enterprise users. Advantages to this approach include:
+
+* Providing a controlled environment for testing and refinement  
+* Allowing teams to build expertise and establish best practices  
+* Creating opportunities to demonstrate ROI through internal efficiency gains
+
+
+This internal-first strategy helps organizations build confidence and capabilities before extending AI implementations to customer-facing applications.
+
+## Key Implementation Challenges
+
+![enterprise ai](./img/Enterprise-AI-Trends-2.png)
+
+Organizations identified three primary roadblocks in their AI adoption journey:
+
+### 1. Budget and Resource Constraints
+
+While AI investments can lead to significant returns, teams must learn to balance costs with expected benefits. We’ve found that AI leaders who evaluate their projects to ensure organizational alignment on business use cases, resource allocation, and strategic timing tend to move through these challenges more easily.
+
+### 2. Performance and Scaling Challenges
+
+As organizations transition from proof-of-concept to production, delivering consistent performance at scale becomes crucial. This requires careful architectural planning and ongoing optimization. Teams who aren’t seasoned AI experts can benefit from the partnership of a vendor that can guide them in [scaling best practices](https://weaviate.io/ebooks/choosing-the-right-database-for-ai) to avoid unnecessary stumbles in their AI journey.
+
+### 3. Compliance and Security Requirements
+
+Enterprises must navigate complex regulatory landscapes while ensuring robust security measures as they progress in AI adoption. We’ve found that organizations who involve compliance and security teams early on in their AI vendor evaluation tend to expedite this part of the procurement process.
+
+## The Path Forward
+
+As enterprise AI adoption matures, a few key trends are emerging. Organizations are moving beyond the proof-of-concept phase — there’s a growing emphasis on production-ready infrastructure that can scale reliably. We’re also seeing an expansion into more sophisticated use cases as more advanced models become available, including wider adoption of [multimodal search](/blog/multimodal-rag#any-to-any-search).
+
+A notable shift is also occurring in *who* builds AI applications. Traditionally non-technical domains, like law or eCommerce, are now requiring AI capabilities. We’re seeing an increasing demand for more accessible development tools and frameworks, enabling companies across industries to build AI applications without requiring engineers to have deep machine learning expertise. Still, it’s still difficult to find talent with practical experience building these new types of AI applications.
+
+It’s also important to recognize that AI is becoming the default expectation for tomorrow's customers. Organizations that take a thoughtful approach to AI implementation are positioning themselves for sustainable success in an AI-native future.
+
+The key to successful adoption isn’t in racing to implement every new feature, but in efficiently building a foundation that aligns with organizational capabilities and objectives. Focusing on sustainable implementation and clear value creation is proving to be the winning strategy for enterprise AI adoption.
+
+👉Want to see our entire report? [Download our 2025 Enterprise AI Trend report here](https://events.weaviate.io/2025-enterprise-ai-trends).
+
+import WhatsNext from '/_includes/what-next.mdx'
+
+<WhatsNext />
\ No newline at end of file
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/hero.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/hero.png
new file mode 100644
index 0000000000..ff61614fae
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/hero.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_01_macApp.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_01_macApp.png
new file mode 100644
index 0000000000..6a1b09e78c
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_01_macApp.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_02_apiSettings.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_02_apiSettings.png
new file mode 100644
index 0000000000..eed7058d7f
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_02_apiSettings.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_03_apiKey.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_03_apiKey.png
new file mode 100644
index 0000000000..4a68499e71
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_03_apiKey.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_04_queryPromptButton.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_04_queryPromptButton.png
new file mode 100644
index 0000000000..96e2fcd877
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_04_queryPromptButton.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_05_iconCopy.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_05_iconCopy.png
new file mode 100644
index 0000000000..63cd9464d9
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_05_iconCopy.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_06_iconReset.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_06_iconReset.png
new file mode 100644
index 0000000000..dad70f6592
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_06_iconReset.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_07_iconSearch.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_07_iconSearch.png
new file mode 100644
index 0000000000..9602ccaff2
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_07_iconSearch.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_08_queryPrompt.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_08_queryPrompt.png
new file mode 100644
index 0000000000..4af6cd6822
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_08_queryPrompt.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_09_queryMenu.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_09_queryMenu.png
new file mode 100644
index 0000000000..217ad84dd7
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_09_queryMenu.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_21_integrate_cloud.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_21_integrate_cloud.png
new file mode 100644
index 0000000000..7eb3f91a9b
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_21_integrate_cloud.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_22_vector_search.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_22_vector_search.png
new file mode 100644
index 0000000000..9357edbafc
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_22_vector_search.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_23_weaviate_cloud.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_23_weaviate_cloud.png
new file mode 100644
index 0000000000..6c57122f97
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_23_weaviate_cloud.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_24_api_keys.png b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_24_api_keys.png
new file mode 100644
index 0000000000..5db62b5bf6
Binary files /dev/null and b/blog/2025-08-30-iOS-apps-and-weaviate-2/img/wcs_summary_24_api_keys.png differ
diff --git a/blog/2025-08-30-iOS-apps-and-weaviate-2/index.mdx b/blog/2025-08-30-iOS-apps-and-weaviate-2/index.mdx
new file mode 100644
index 0000000000..58c08f6a65
--- /dev/null
+++ b/blog/2025-08-30-iOS-apps-and-weaviate-2/index.mdx
@@ -0,0 +1,461 @@
+﻿---
+title: Using Weaviate Cloud Queries in MacOS apps
+
+slug: apple-and-weaviate-2
+authors: [randy, adam]
+date: 2025-08-30
+tags: ['how-to']
+image: ./img/hero.png
+description: "A practical guide on using Weaviate Cloud Queries in MacOS apps."
+---
+
+
+![Weaviate Cloud (WCD) Queries: A Practical Guide](./img/hero.png)
+
+## Introduction
+
+Learning how to effectively use and integrate Weaviate Cloud (WCD) queries into your iOS or macOS applications can be challenging. This article simplifies the process by focusing on the various types of queries available and providing a live tool for you to run them. We’ll offer a detailed explanation of common and easily implemented query types, and you can see the results in real-time. Additionally, Mac users can download our custom application to run live examples directly on their laptops, allowing them to copy and paste these queries as templates for their own projects.
+
+The content is based on Adam Chan's Book Recommendation project found at this Git repo [here](https://github.com/weaviate/BookRecs).
+
+
+![Integrating Weaviate Cloud Queries into Applications](./img/wcs_summary_21_integrate_cloud.png)
+
+## Searching a Vector Database
+
+Imagine you have a giant library with books described by numbers instead of words. A [vector
+similarity search](/blog/vector-search-explained) is like a special tool that helps you find books similar to the one you're holding
+by comparing their numbers. It's faster than reading every book description and works well for
+things like finding similar images or recommending products.
+
+Let's say you're in the library and love a particular mystery novel. This book can be represented
+by a vector, a fancy way of saying it's a bunch of numbers that capture information about the
+book. These numbers might encode things like genre, character count, or how many times certain
+words appear.
+
+Now, you want to find other similar mystery novels. The library (or the Weaviate platform in this
+case) uses the same kind of number code (vector) for all the books. The search tool then
+compares the vector of your favorite book to all the others. Books with similar vectors, meaning
+their numbers are close together, are likely similar mystery novels you might enjoy. This way,
+you can find new reads without sifting through every single book description.
+
+![Vector Search Concept](./img/wcs_summary_22_vector_search.png)
+
+
+## Weaviate Cloud (WCD)
+Weaviate Cloud is a fully managed cloud service that provides a vector database, simplifying the
+creation and deployment of AI applications. It handles the infrastructure, letting developers focus
+on building features like semantic search and recommendation engines. WCD offers flexible
+hosting options, including serverless and enterprise plans, built upon the powerful open-source
+Weaviate database.
+
+![Weaviate Cloud](./img/wcs_summary_23_weaviate_cloud.png)
+
+## Core Concepts
+
+This guide utilizes the Query Helper Application, a custom MacOS app signed by Apple,
+designed for live testing of Weaviate Queries.
+
+Before using the application, you'll need to set up API keys. The following keys are relevant:
+
+1. Weaviate API Key: Provides access to the Weaviate Vector Database holding your
+domain data. For this demo app, the Weaviate API key is pre-defined and cannot be
+changed.
+1. OpenAI API Key: Used by Weaviate to access OpenAI's Large Language Models for
+querying embeddings. You can create a key at OpenAI.
+1. Cohere API Key: Utilized by Weaviate for text embedding, generative AI, and
+reranking. A key can be created at Cohere.
+
+API keys can be configured within the application by clicking the settings icon to open the API Key Settings Panel.
+
+![API Keys](./img/wcs_summary_24_api_keys.png)
+
+
+## Application Overview
+
+Included is a downloadable application for Mac users to run live query examples directly on their laptop.
+
+Feel free to copy and paste these queries into your own projects for use as a template for your own queries.
+
+1. [Query App](#query_app): A working MacOS app that runs queries.
+
+1. [Query Examples](#query_examples): Query prompt examples.
+
+
+For IOS and Mac Developers looking to develop on Weaviate, check out this article: [weaviate.io/blog/apple-and-weaviate](/blog/apple-and-weaviate).
+
+To get familiar with the data used and how the Weaviate Database was created you can reference: [github.com/weaviate/BookRecs](https://github.com/weaviate/BookRecs).
+
+
+<a id="query_app"></a>
+
+## Query Helper Application
+
+An application you can use to perform a live test of Weaviate Queries.
+
+The MacOS app is signed by Apple and can be [downloaded here](https://github.com/randallfonginc/weaviate-cloud-service-query-helper/releases).
+
+<figure>
+
+![Mac Query App](img/wcs_summary_01_macApp.png)
+<figcaption> Query Helper Application Screen </figcaption>
+</figure>
+
+
+### API Settings 
+
+The first order of business is to create the API keys needed to run the application as needed.
+
+![Setting API Keys](./img/wcs_summary_02_apiSettings.png)
+
+### API Key Settings Panel
+
+Of the 3 API Keys, the **Weaviate API key is already defined** for you and cannot be changed for this demo app.
+
+An explanation of each key, how it is used, and a link to how to define your own appears below.
+
+- **Weaviate API Key:** Access key to the Weaviate Vector DB that holds the domain data
+- **OpenAI API Key:** Access key to OpenAI that Weaviate uses to access OpenAI Large Language Models to query embeddings. Create a key at: <https://openai.com>
+- **Cohere API Key:** Access key to Cohere that Weaviate uses for text embedding, generative AI and reranking. Create a key at: <https://cohere.com>
+
+Keys can be defined to the application by clicking on the settings icon to bring up the **API Key Settings Panel**.
+
+![API Key](./img/wcs_summary_03_apiKey.png)
+
+### Query Prompt Buttons
+
+![Query Prompt Button](./img/wcs_summary_04_queryPromptButton.png)
+
+**Query Prompt Features include:**
+
+- **Copy Prompt:** For use in your own apps.    ![Copy Icon](./img/wcs_summary_05_iconCopy.png)
+- **Reset Prompt to the Original One Selected:** Reset, if prompt was changed  ![Reset Icon](./img/wcs_summary_06_iconReset.png) manually.  
+- **Search Prompt:** Begin Search using prompt.![Reset Icon](./img/wcs_summary_07_iconSearch.png)
+
+### Query Prompt
+
+The Query Prompt can be changed manually in the query text box and run against a Weaviate Vector DB giving you the opportunity to experiment with your own ideas.
+
+![Query Prompt](./img/wcs_summary_08_queryPrompt.png)
+
+### Query Menu
+
+Preset query templates are accessed through the Query Menu by selecting the button of your choice.
+
+![Query Menu](./img/wcs_summary_09_queryMenu.png)
+
+
+<a id="query_examples"></a>
+
+## Query Prompt Examples
+
+This article discusses several types of queries, all of which require a Weaviate API key. Depending on the specific query type being executed, an additional OpenAI and/or Cohere API key may also be necessary. While the required Weaviate API key is already provided, users should be aware that further API keys may be needed for certain operations.
+
+**Primary:** Fundamental options for querying data.
+
+- **Basic:** A simple query that retrieves a limited amount of items
+- **Third Item:**  Skip to the third item of a query
+- **Meta Data:** Properties (eg. ID, creation date, last update date, etc) available that go beyond the original values loaded on to the vector database
+
+**Reorder:** Rearranging or prioritizing the data.
+
+- **Move Away:**  Allow users to prioritize similar terms/concepts and deemphasize others. (+Open AI Key)
+- **ReRank:** Change the order in which data elements are displayed, perhaps based on specific criteria. (+Open AI Key, Cohere Key)
+
+**Aggregate:** Summarizing count data.
+
+- **By Category:** Group counts based on specific attributes.
+- **Grand Total:** Total count of the entire dataset.
+
+**Single Result:** RAG processing resulting in a single result.
+
+- **Summary:** Summary of a specific query.  (+Open AI Key, Cohere Key)
+- **Translate To:** Translate data elements or results into different languages.  (+Open AI Key, Cohere Key)
+- **Translate From/To:** Translate data elements or results from different languages to different languages.  (+Open AI Key, Cohere Key)
+
+**Grouped Result:** Grouping or organizing data using RAG
+
+- **Grouped Result:** Perform prompt on all records retrieved from another prompt. (+Open AI Key, Cohere Key)
+
+### Primary
+
+** Basic **
+
+The basic building block for creating a query.
+
+Example: Get the first 5 Books and list the title, number of pages and description.
+
+```
+{
+  Get {
+    Book(limit: 5) {
+      title
+      num_pages
+      description
+    }
+  }
+}
+```
+
+
+**Third Item**
+
+Query starting from the third record.
+
+Example: Using the Basic Query as a starter, skip the first two books and start from the book that follows. Example: Starting from the 3rd book: list the title, number of pages and description of the next 5.
+
+```
+{
+  Get {
+    Book(limit: 5, offset: 2) {
+      title
+      num_pages
+      description
+    }
+  }
+}
+```
+
+
+**Meta Data**
+
+Show information that extends beyond the initial domain data loaded. This is data generated by the system.
+
+Example: Include ID, creation date, last update date.
+
+```
+{
+  Get {
+    Book(limit: 5) {
+      title
+      _additional {
+        id
+        vector
+        certainty
+        score
+        distance
+        creationTimeUnix
+        lastUpdateTimeUnix
+      }
+    }
+  }
+}
+```
+
+### Reorder
+
+**Move Away**
+
+Allow users to prioritize similar concepts and deemphasize others. Example: Emphasize “war” themes and deemphasize “romance”.
+
+```
+{
+  Get {
+    Book(
+      limit: 5
+      nearText: {
+        concepts: ["Mysteries set in Western Europe"]
+        distance: 0.6
+        moveAwayFrom: { concepts: ["romance"], force: 0.45 }
+        moveTo: { concepts: ["war"], force: 0.85 }
+      }
+    ) {
+      title
+      description
+      num_pages
+    }
+  }
+}
+```
+
+
+**Rerank**
+
+Change the order in which data elements are displayed based on specific criteria. This is data generated by the system.
+
+Example: The result is reranked to prioritize descriptions that discuss robots.
+
+```
+{
+  Get {
+    Book(nearText: { concepts: "science fiction" }, limit: 10) {
+      title
+      description
+      num_pages
+      categories
+      _additional {
+        distance
+        rerank(property: "description", query: "robot") {
+          score
+        }
+      }
+    }
+  }
+}
+```
+
+
+### Aggregate
+
+**By Category**
+
+Group counts based on specific attributes.
+
+Example: Show the number of books for each category.
+
+```
+{
+  Aggregate {
+    Book(groupBy: ["categories"]) {
+      meta {
+        count
+      }
+      groupedBy {
+        value
+      }
+    }
+  }
+}
+```
+
+**Grand Total**
+
+Total count of the entire dataset.
+
+Example: Show the count of all books in the database.
+
+```
+{
+  Aggregate {
+    Book {
+      meta {
+        count
+      }
+    }
+  }
+}
+```
+
+### Single Result
+
+**Summary**
+
+Summary of a specific query.
+
+Example: Summarize in 10 words books with a mystery theme that occur in Western Europe.
+
+```
+{
+  Get {
+    Book(
+      nearText: { concepts: ["Search for mysteries set in WesternEurope"] }
+      limit: 5
+    ) {
+      title
+      description
+      _additional {
+        generate(
+          singleResult: {
+            prompt: "Describe the following as a short summary in 10 words:{description}"
+          }
+        ) {
+          singleResult
+          error
+        }
+      }
+    }
+  }
+}
+
+```
+
+
+**Translate To**
+
+Translate data elements or results into different languages.
+
+Example: Search for Mexican books and translate the title into Spanish.
+
+```
+{
+  Get {
+    Book(nearText: { concepts: ["What books are Mexican"] }, limit: 5) {
+      title
+      description
+      _additional {
+        generate(
+          singleResult: { prompt: "Translate the following in Spanish:{title}" }
+        ) {
+          singleResult
+          error
+        }
+      }
+    }
+  }
+}
+```
+
+**Translate From/To**
+
+Translate data elements or results from different languages to different languages. Example: Ask a question in Spanish and translate the title into Spanish.
+
+```
+{
+  Get {
+    Book(nearText: { concepts: ["Que libros son mexicanos"] }, limit: 5) {
+      title
+      description
+      _additional {
+        generate(
+          singleResult: { prompt: "Translate the following in Spanish:{title}" }
+        ) {
+          singleResult
+          error
+        }
+      }
+    }
+  }
+}
+```
+
+### Grouped Result
+
+**Grouped Result**
+
+Perform prompt on all records retrieved from another prompt. Example: Search for books about the governments in Europe.
+
+Summarize what the leaders of the result have in common.
+
+```
+{
+  Get {
+    Book(nearText: { concepts: ["Governments of Europe"] }, limit: 5) {
+      title
+      description
+      _additional {
+        generate(
+          groupedResult: {
+            task: "What do these leaders have in common, if anything?"
+          }
+        ) {
+          groupedResult
+          error
+        }
+      }
+    }
+  }
+}
+```
+
+## Conclusion
+
+Hopefully, you have found this article to be a practical guide to building queries with **Weaviate Cloud (WCD)** on iOS and macOS, even if you're new to the platform. It simplifies the core concept of **vector similarity search**, showing you how to find similar data. The article positions Weaviate Cloud as a fully managed service that handles the technical complexities of AI infrastructure, allowing you to focus on developing engaging features like **recommendation engines** and **semantic search**.
+
+To help you get started, the guide offers a custom **Query Helper Application** for Mac users, which provides a hands-on way to test different queries. The article breaks down these queries into distinct categories, from basic data retrieval to more advanced functions like reordering results, summarizing data, and even translating text. This resource provides you with the foundational knowledge and practical tools—including a live testing app and code examples—to start creating your own sophisticated, AI-driven applications with confidence!
+
+
+
+
+import WhatsNext from '/_includes/what-next.mdx';
+
+<WhatsNext />
diff --git a/developers/academy/theory/010_ai_models_deep_dive/40_embedding_models 2.mdx b/developers/academy/theory/010_ai_models_deep_dive/40_embedding_models 2.mdx
new file mode 100644
index 0000000000..460dfe9023
--- /dev/null
+++ b/developers/academy/theory/010_ai_models_deep_dive/40_embedding_models 2.mdx	
@@ -0,0 +1,136 @@
+---
+title: Embedding models
+description: A look inside embedding models
+---
+
+import ThemedImage from '@theme/ThemedImage';
+
+:::info Embedding models and Weaviate
+
+Embeddings are critical to vector databases such as Weaviate, as they enable vector/semantic search. Vector databases such as Weaviate allow users to store and search through millions, or even billions, of these embeddings with ease.
+
+:::
+
+If generative models are the celebrities of the AI world, embedding models may be its plumbing; they’re not glamorous, but critical parts of the infrastructure.
+
+Let's take a look at what embedding models are, and why they are so important.
+
+## <i class="fa-solid fa-chalkboard-user"></i> How embedding models work
+
+An embedding model seeks to capture a “meaning” of the input provided to it. The output embedding can then be used later for tasks such as classification, clustering, or most commonly, information retrieval.
+
+Commonly used embedding models include Cohere’s `embed-multilingual-v3.0`, OpenAI’s `text-embedding-3-large` and Snowflake’s `snowflake-arctic-embed-l-v2.0`.
+
+These names tend to be more generic and descriptive. Combined with their less glamorous status, these models may not be as well-known.
+
+But they are no less interesting than generative models, and often share many similarities with them. For example, both of them take a text input and convert it to a numerical format with a tokenizer. One key difference is that where a generative model outputs a token at a time, an embedding model outputs a fixed length (or shape) of numbers.
+
+import NNEmbeddingModels from './_img/nn_explained_50_embedding_models.png';
+import NNEmbeddingModelsDark from './_img/nn_explained_50_embedding_models_dark.png';
+
+<ThemedImage
+  alt="Neural Network Basic Diagram"
+  sources={{
+    light: NNEmbeddingModels,
+    dark: NNEmbeddingModelsDark,
+  }}
+  width="400"
+/>
+
+The concept of numerically representing a meaning may be foreign at first. Let's take a step back and look at a simpler example.
+
+### <i class="fa-solid fa-chalkboard-user"></i> An analogy - color encoding
+
+An analog for this approach can be found in how we represent colors as numbers - whether it be the RGB, CMYK or HSL system.
+
+Each system can represent any color as a series of numbers. Take the official web color definition of “red” as an example. In RGB it is `[255, 0, 0]`, in CMYK it is `[0, 100, 100, 0]`, and in HSL it is `[0, 100%, 50%]`.
+
+In other words, each system is a standardized method of representing a color as a set of numbers.
+
+Embedding models work similarly. For example, the phrase “You’re a wizard, Harry.”, may be represented as:
+
+`[0.021, -0.103, 0.036, 0.088, -0.022, ..., 0.056]`
+
+Where the actual sequence length may be quite large, such as 256, 1024, or 1536 values typically.
+
+To be clear, modern embedding models are far more complex than algorithms that convert colors to RGB values. However, the principle is the same - each system consistently converts its inputs into a sequence of numbers.
+
+How is this useful? As it turns out, for a variety of tasks.
+
+## <i class="fa-solid fa-chalkboard-user"></i> Why use embedding models?
+
+Earlier, we likened embedding models to numerical systems like RGB that can encode color. The key benefit of embedding models is similar to that of color encoding systems; they enable meaningful comparisons of the source object.
+
+Going back to RGB colors, “crimson” is `[220, 20, 60]`. You can see that it is quite similar to red’s RGB value of `[255, 0, 0]`, and very different to, say, the RGB value of “aqua”, which is `[0, 255, 255]`.
+
+|  | R | G | B |
+| --- | --- | --- | --- |
+| Red | 255 | 0 | 0 |
+| Crimson | 220 | 20 | 60 |
+| Aqua | 0 | 255 | 255 |
+
+In fact, we can quantify the similarity to a single number. We can use a commonly used metric called a “cosine” similarity. Here is an example implementation using Python:
+
+```python
+import numpy as np
+
+def cosine_similarity(a: list, b: list) -> float:
+    # Calculate the cosine similarity between two input lists
+    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
+```
+
+We can put the results of our comparison into a table, into what is called a similarity matrix. Here, each table cell value is the similarity between its corresponding row and the column.
+
+|  | Red | Crimson | Aqua |
+| --- | --- | --- | --- |
+| Red | 1 | 0.961 | 0 |
+| Crimson | 0.961 | 1 | 0.247 |
+| Aqua | 0 | 0.247 | 1 |
+
+The key takeaway from this demonstration is that the system allows us to compare how similar each color is. Conversely, we now have a way in which we can identify the most similar color to a given color in a bag of colors.
+
+## <i class="fa-solid fa-chalkboard-user"></i> Applications of embedding models
+
+Going back to our embedding models, the value of embedding models is that it allows us to identify the object with the most similar meaning to a given object, in a set of objects.
+
+Take these three pieces of text:
+
+```
+1. "You're not wizened, Harry."
+2. "Harry can wield magic."
+3. "Ron is not a great driver."
+```
+
+Which one might be most similar to `"You're a wizard, Harry."`?
+
+Most of us would probably answer 2. But why? And how would you get a program to answer in the same way? Note that 2 only includes one overlapping word with the query.
+
+That’s the task that embeddings enable. Using embeddings and cosine similarity, we see that:
+
+|  | Rank | Cosine distance |
+| --- | --- | --- |
+| Harry can wield magic. | 1 | 0.238 |
+| You're not wizened, Harry. | 2 | 0.274 |
+| Ron is not a great driver. | 3 | 0.803 |
+
+This concept of similarity is used in semantic search. In modern AI systems, semantic search is a critical component of retrieval augmented generation (RAG), helping to complement generative systems by providing it with accurate, up-to-date context to work with.
+
+Applications of embeddings go even further. Embeddings are used in other AI systems such as recommenders, clustering and classification and so on.
+
+:::tip Advanced topics
+
+In this section, we largely talked about aspects related to text embedding models, where a text input is used to generate a vector embedding.
+<br/>
+
+Just like generative models, the world of embedding models is very big and interesting. Multi-modal embedding models can take various input types and produce compatible embeddings in the same space. And modern embeddings may come in different formats, using techniques such as multi-vector embeddings (e.g. ColBERT) or adaptive-length embeddings.
+<br/>
+
+We may touch on them later on, when we get to further in-depth discussions about specific modalities or model selection.
+
+:::
+
+## Questions and feedback
+
+import DocsFeedback from '/_includes/docs-feedback.mdx';
+
+<DocsFeedback/>
diff --git a/developers/academy/theory/010_ai_models_deep_dive/index 2.mdx b/developers/academy/theory/010_ai_models_deep_dive/index 2.mdx
new file mode 100644
index 0000000000..6447fdb99b
--- /dev/null
+++ b/developers/academy/theory/010_ai_models_deep_dive/index 2.mdx	
@@ -0,0 +1,33 @@
+---
+title: "10 AI models: A gentle deep dive"
+description: A practical introduction to AI models for software engineers or AI builders.
+sidebar_position: 10  # Like a subject number (e.g. CS101)
+---
+
+## <i class="fa-solid fa-chalkboard-user"></i> Unit overview
+
+<!-- Provide context for this course, in addition to the concrete learning goals and outcomes. Why would someone want to do this unit? -->
+
+The world of AI models may be best described as a vast, tall tower of knowledge that is also rapidly expanding. None of you will be surprised to hear that this course will not be able to cover the field comprehensively.
+
+What we intend do, however, is to give you a detailed overview of things that matter for AI builders.
+
+### <i class="fa-solid fa-clipboard-list-check"></i> Prerequisites
+
+- None
+
+## <i class="fa-solid fa-chalkboard-user"></i> Learning objectives
+
+import LearningGoalsExp from '/src/components/Academy/learningGoalsExp.mdx';
+
+<LearningGoalsExp />
+
+import LearningGoals from '/src/components/Academy/learningGoals.jsx';
+
+<LearningGoals unitName="ai_models_deep_dive"/>
+
+## Questions and feedback
+
+import DocsFeedback from '/_includes/docs-feedback.mdx';
+
+<DocsFeedback/>
diff --git a/developers/academy/theory/180_embedding_model_selection/10_introduction 2.mdx b/developers/academy/theory/180_embedding_model_selection/10_introduction 2.mdx
new file mode 100644
index 0000000000..2d3af914d5
--- /dev/null
+++ b/developers/academy/theory/180_embedding_model_selection/10_introduction 2.mdx	
@@ -0,0 +1,107 @@
+---
+title: Overview
+description: Why embedding model selection matters
+---
+
+import ThemedImage from '@theme/ThemedImage';
+
+## <i class="fa-solid fa-chalkboard-user"></i> Why embedding model selection matters
+
+Embedding models are AI models that capture “meanings” of objects.  This [earlier module on AI models](../010_ai_models_deep_dive/index.mdx) showed that embedding models can do this by turning text, images, audio and more into a sequence of numbers.
+
+import NNEmbeddingModels from '../010_ai_models_deep_dive/_img/nn_explained_50_embedding_models.png';
+import NNEmbeddingModelsDark from '../010_ai_models_deep_dive/_img/nn_explained_50_embedding_models_dark.png';
+
+<ThemedImage
+  alt="Neural Network Basic Diagram"
+  sources={{
+    light: NNEmbeddingModels,
+    dark: NNEmbeddingModelsDark,
+  }}
+  width="400"
+/>
+
+As you might imagine, this is not a trivial task. And there have been huge advancements in the field over the last decade or so. As an illustrative example, let’s take a look at the difference between the performance of two models at either end of that time scale.
+
+### <i class="fa-solid fa-chalkboard"></i> An example evaluation
+
+Here is a screenshot from an [example demo application](https://github.com/databyjp/emb_eval_toybox) carrying out embedding evaluation.
+
+In this example, we look for documents that best match the query `“How do I make chocolate chip cookies from scratch”`, out of a candidate document set of 20 documents.
+
+Each of the 20 documents in the set has a `“score”` attribute here, where a more relevant object is indicated with a higher score.
+
+<img
+    src={require('./_img/candidate_documents.png').default}
+    alt="Candidate Documents"
+/>
+
+Now, let’s see what happens when we try to retrieve the best matching objects using two different embedding models. We will use the following two models:
+
+- `FastText (fasttext-en-vectors)` (from 2015; [model card](https://huggingface.co/facebook/fasttext-en-vectors))
+- `snowflake-arctic-embed-l-v2.0` (from 2024; [model card](https://huggingface.co/Snowflake/snowflake-arctic-embed-l-v2.0))
+
+Here is a summary of results from a search, using the `FastText` model from 2015:
+
+<img
+    src={require('./_img/embedding_eval_example_1_fasttext.png').default}
+    alt="Search results from FastText"
+/>
+
+The top result identified by the FastText is quite relevant, as it discusses how to correct some potential issues with cookie making. However, it’s less relevant than the idea result, which is a step-by-step recipe.
+
+The other two, however, are not relevant to the query. While they are recipes, they are not for baking cookies.
+
+It would be fair to say that there’s quite a bit of room for improvement.
+
+Here are the results from the `snowflake-arctic-embed-l-v2.0` model, from 2025:
+
+<img
+    src={require('./_img/embedding_eval_example_2_arctic2.png').default}
+    alt="Search results from Snowflake Arctic"
+/>
+
+We see that the `arctic` embeddings correctly identified the ideal top-ranked result. In fact, the top two expected results are included in the top three results for the `arctic` embeddings. Even the other result is relevant to chocolate chip cookies - although perhaps slightly off topic.
+
+### <i class="fa-solid fa-chalkboard"></i> Evaluation criteria
+
+We could even compare these models using a standard metric, such as `nDCG@k`.
+
+For this scenarios, the two models scored:
+
+| Model | nDCG@10 |
+| --- | --- |
+| `FastText` | 0.595 |
+| `snowflake-arctic-embed-l-v2.0` | 0.908 |
+
+<details>
+  <summary>What is nDCG@k?</summary>
+
+`nDCG` is a metric used to evaluate the returned results in information retrieval. It rewards the model for returning the most relevant results at the top of the list. The `@k` indicates that only the top `k` results are considered.
+
+[Read more](/blog/retrieval-evaluation-metrics#normalized-discounted-cumulative-gain-ndcg)
+
+</details>
+
+The size of embeddings produced is another key factor.
+
+Embeddings can vary greatly in size, from around 300 dimensions to thousands. Imagine a service provider that hosts an AI bot that answers questions about legal cases. A vector database with 1 million documents*, one embedding model (`nv-embed-v2`) could require as much memory as 3.3 TB of memory, while another (`embed-english-light-v3.0` ) might only require 300 GB of memory. (The following chart takes some popular models, and compares how each one would affect memory requirements.)
+
+<img
+    src={require('./_img/memory-reqs-1m-docs.png').default}
+    alt="Estimated memory requirements for 1 million documents"
+/>
+
+These simple examples illustrate some of the impact of embedding model selection. The choice of embedding models can make a huge difference in the quality of your search, your resource requirements, and many more factors.
+
+There have been huge advancements in the landscape of embedding models over the last 10 to 15 years. In fact, innovations in embedding models continue to occur today. You might have heard of some of these names: word2vec, FastText, GloVe, BERT, CLIP, OpenAI ada, Cohere multi-lingual, Snowflake Arctic, ColBERT, and ColPali.
+
+Each model (or architecture) brings with it some improvements. It may be in model architecture, training data, training methodology, modality, or efficiency, for instance.
+
+So in the next few sections, let’s begin to explore a workflow for embedding model selection.
+
+## Questions and feedback
+
+import DocsFeedback from '/_includes/docs-feedback.mdx';
+
+<DocsFeedback/>
diff --git a/developers/academy/theory/180_embedding_model_selection/20_workflow_selection 2.mdx b/developers/academy/theory/180_embedding_model_selection/20_workflow_selection 2.mdx
new file mode 100644
index 0000000000..6d0a17395e
--- /dev/null
+++ b/developers/academy/theory/180_embedding_model_selection/20_workflow_selection 2.mdx	
@@ -0,0 +1,53 @@
+---
+title: Workflow for model selection
+description: Workflow for embedding model selection
+---
+
+import ThemedImage from '@theme/ThemedImage';
+
+## <i class="fa-solid fa-chalkboard-user"></i> Overview
+
+Selecting the right embedding model is a complex task. A big reason for this complexity is that each model will have some strengths and weaknesses that involve trade-offs.
+
+An obvious trade-off is that between model performance, size and cost. Take a look at the chart below, showing a general correlation between model size and retrieval performance for embedding models.
+
+<img
+    src={require('./_img/embedding_model_evaluation.png').default}
+    alt="Embedding model evaluation"
+/>
+
+The chart shows a clear positive relationship between model size and higher performance. This also means that generally, models with better performance will be larger. They will require more memory and compute, which means higher costs and slower speeds.
+
+In other words, a larger model such as `nv-embed-v2` may perform better at retrieval than a smaller model such as `snowflake-arctic-embed-m-v1.5`, but may cost more to run and/or use.
+
+But there are many other dimensions to consider. For example:
+
+- A proprietary model such as a modern `gemini` model may show promising performance, but may not meet a user’s preference for local inference.
+- While a model may perform well at a standard benchmark, it may not perform as well if given material from a specialized domain, such as legal, medical, or coding tasks.
+- A local model may be cheaper to run, but the organization may lack the expertise and resources for long-term infrastructure maintenance.
+
+In the face of this complexity, a systematic approach can help you to make an informed decision based on your specific requirements. This is one such approach:
+
+<ThemedImage
+  alt="Embedding model selection workflow"
+  sources={{
+    light: require('./_img/embedding_model_selection_workflow.png').default,
+    dark: require('./_img/embedding_model_selection_workflow_dark.png').default,
+  }}
+  style={{ maxWidth: "50%" }}
+/>
+
+This workflow is made up of four key stages as illustrated in the diagram above:
+
+1. **Identify your needs**: Clearly articulate a set of requirements or preferences to act as a set of guidelines for the future.
+2. **Compile a list of candidate models**: Screen for a set of potentially suitable models based on your identified needs and available information.
+3. **Perform detailed evaluation**: Run your own evaluations, for your use case, using your chosen data.
+4. **Periodic re-evaluation**: Keep an eye for any changes to your requirements (data, application) or environment (new model, provider)
+
+Next, we will review each stage one by one.
+
+## Questions and feedback
+
+import DocsFeedback from '/_includes/docs-feedback.mdx';
+
+<DocsFeedback/>
diff --git a/developers/academy/theory/180_embedding_model_selection/22_identify_needs 2.mdx b/developers/academy/theory/180_embedding_model_selection/22_identify_needs 2.mdx
new file mode 100644
index 0000000000..ead5d4059f
--- /dev/null
+++ b/developers/academy/theory/180_embedding_model_selection/22_identify_needs 2.mdx	
@@ -0,0 +1,139 @@
+---
+title: Identify needs & compile candidates
+description: Get started with embedding model selection by identifying your needs and compiling a list of candidate models.
+---
+
+import ThemedImage from '@theme/ThemedImage';
+
+## <i class="fa-solid fa-chalkboard-user"></i> Identify needs
+
+A systematic approach to model selection starts with clearly identifying requirements. Organizing these requirements into categories can help ensure you consider all relevant factors when evaluating embedding models.
+
+Here are some of our key considerations:
+
+<img
+    src={require('./_img/identify_needs_overview.png').default}
+    alt="Identify your needs"
+/>
+
+### <i class="fa-solid fa-chalkboard"></i> Data Characteristics
+
+| Factor | Key Questions | Why It Matters |
+| --- | --- | --- |
+| **Modality** | Are you dealing with text, images, audio, or multimodal data? | Models are built for specific modality/modalities.  |
+| **Language** | Which languages must be supported? | Models are trained & optimized for specific language(s), leading to trade-offs in performance. |
+| **Domain** | Is your data general or domain-specific (legal, medical, technical)? | Domain-specific models (e.g. [medical](https://huggingface.co/blog/abhinand/medembed-finetuned-embedding-models-for-medical-ir)) understand specialized vocabulary and concepts.  |
+| **Length** | What's the typical length of your documents and queries? | Input token context windows vary between models, from as small as `256` tokens to `8192` tokens for example. However, longer context windows typically require exponentially higher compute and latency. |
+| **Asymmetry** | Will your queries differ significantly from your documents? | Some models are built for asymmetric query to document comparisons. So queries like `laptop won't turn on` can easily identify documents like `Troubleshooting Power Issues: If your device fails to boot...`. |
+
+### <i class="fa-solid fa-chalkboard"></i> Performance Needs
+
+| Factor | Key Questions | Why It Matters |
+| --- | --- | --- |
+| **Accuracy** (recall) | How critical is it that all the top results are retrieved? | Higher accuracy requirements may justify more expensive or resource-intensive models.  |
+| **Latency** | How quickly must queries be processed? | Larger models with better performance often have slower inference times. For inference services, faster services will cost more. |
+| **Throughput** | What query volume do you anticipate? Will there be traffic spikes? | Larger models with better performance often have lower processing capacity. For inference services, increased throughput will increase costs. |
+| **Volume** | How many documents will you process? | Larger embedding dimensions increase memory requirements for your vector store. This will impact resource requirements and affect costs at scale. |
+| **Task type** | Is retrieval the only use case? Or will it also involve others (e.g. clustering or classification) ? | Models have strengths and weaknesses; a model excellent at retrieval might not excel at clustering. This will drive your evaluation & selection criteria. |
+
+### <i class="fa-solid fa-chalkboard"></i> Operational Factors
+
+| Factor | Key Questions | Why It Matters |
+| --- | --- | --- |
+| **Hardware limitations** | What computational resources are available for hosting & inference? | Hardware availability (costs, GPU/TPU availability) will significantly affect your range of choices. |
+| **API rate limits** | If using a hosted model, what are the provider's limits? | Rate limits can bottleneck applications, or limit potential growth. |
+| **Deployment & maintenance** | What technical expertise and resources are required? | Is self-hosting a model an option, or should you look at API-based hosted options? |
+
+### <i class="fa-solid fa-chalkboard"></i> Business Requirements
+
+| Factor | Key Questions | Why It Matters |
+| --- | --- | --- |
+| **Hosting options** | Do you need self-hosting capabilities, or is a cloud API acceptable? | Self-hosting ➡️ more control at higher operational complexity; APIs ➡️ lower friction at higher dependencies. |
+| **Licensing** | What are the licensing restrictions for commercial applications? | Some model licenses or restrictions may prohibit certain use cases. |
+| **Long-term support** | What guarantees exist for the model's continued availability? | If a model or business is abandoned, downstream applications may need significant reworking. |
+| **Budget** | What are your cost limits and expenditure preferences? | Embedding costs can add up over time, but self-hosting can incur high upfront costs. |
+| **Privacy & Compliance** | Are there data privacy requirements or industry regulations to consider? | Some industries require specific models. And privacy requirements may impose hosting requirements. |
+
+Documenting these requirements creates a clear profile of your ideal embedding model, which will guide your selection process and help you make informed trade-offs.
+
+## <i class="fa-solid fa-chalkboard-user"></i> Compile candidate models
+
+After identifying your needs, create a list of potential embedding models to evaluate. This process helps focus your detailed evaluation on the most promising candidates.
+
+There are hundreds of embedding models available today, with new ones being released regularly. For this many models, even a simple screening process would be too time-consuming.
+
+As a result, we suggest identifying an initial list of models with a simple set of heuristics, such as these:
+
+### <i class="fa-solid fa-chalkboard"></i> Account for model modality
+
+This is a critical, first-step filter. A model can only support the modality/modalities that it is designed and trained for.
+
+Some models (e.g. Cohere `embed-english-v3.0`) are multimodal, while others (e.g. Snowflake’s `snowflake-arctic-embed-l-v2.0`) are unimodal.
+
+No matter how good a model is, a text-only model such as `snowflake-arctic-embed-l-v2.0` will not be able to perform image retrieval. Similarly, a `ColQwen` model cannot be used for plain text retrieval.
+
+### <i class="fa-solid fa-chalkboard"></i> Favor models already available
+
+If your organization already uses embedding models for other applications, these are great starting points. They are likely to have been screened, evaluated and approved for use, and accounts/billing already configured. For local models, this would mean that the infrastructure is already available.
+
+This also extends to models available through your other service providers.
+
+You may be already using generative AI models through providers such as Cohere, Mistral or OpenAI. Or, perhaps your hyperscaler partners such as AWS, Microsoft Azure or Google Cloud provide embedding models.
+
+In many cases, these providers will also provide access to embedding models, which would be easier to adopt than those from a new organization.
+
+### <i class="fa-solid fa-chalkboard"></i> Try well-known models
+
+Generally, well-known or popular models are popular for a reason.
+
+Industry leaders in AI such as Alibaba, Cohere, Google, NVIDIA and OpenAI all produce embedding models for different modalities, languages and sizes. Here are a few samples of their available model families:
+
+| Provider | Model families |
+| --- | --- |
+| Alibaba | `gte`, `Qwen` |
+| Cohere | `embed-english`, `embed-multilingual` |
+| Google | `gemini-embedding`, `text-embedding` |
+| NVIDIA | `NV-embed` |
+| OpenAI | `text-embedding`, `ada` |
+
+There are also other families of models that you can consider.
+
+For example, the `ColPali` family of models for image embeddings and `CLIP` / `SigLIP` family of models for multimodal (image and text) are well-known in their respective domains. Then, `nomic`, `snowflake-arctic`, `MiniLM` and `bge` models are some examples of well-known language retrieval models.
+
+These popular models tend to be well-documented, discussed and widely supported.
+
+As a result, they tend to be easier than the more obscure models to use, evaluate, troubleshoot and use.
+
+### <i class="fa-solid fa-chalkboard"></i> Benchmark leaders
+
+Models that perform well on standard benchmarks may be worth considering. Resources like [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard) can help identify high-performing models.
+
+As an example, the screenshot below shows models on MTEB at a size of fewer than 1 billion parameters, sorted by their `retrieval` performance.
+
+<img
+    src={require('./_img/mteb_by_retrieval.png').default}
+    alt="MTEB example - sorted by retrieval performance"
+/>
+
+It shows some models that we’ve already discussed - such as the `showflake-arctic`,  Alibaba’s `gte`, or BAAI’s `bge` models.
+
+But additionally, you can see already a number of high-performing models that we hadn’t discussed. Microsoft research's `intfloat/multilingual-e5-large-instruct` or JinaAI’s `jinaai/jina-embeddings-v3` model are both easily discoverable here.
+
+Note that as of 2025, the MTEB contains different benchmarks to assess different capabilities, such as the linguistic or modality needs.
+
+When viewing benchmarks, make sure to view the right set of benchmarks, and the appropriate columns. In the example below, note that the page shows results for MIEB (image retrieval), with results sorted by *Any to Any Retrieval*.
+
+<img
+    src={require('./_img/mieb_by_any_to_any.png').default}
+    alt="MIEB example - sorted by any to any retrieval"
+/>
+
+The MTEB is filterable and sortable by various metrics. So, you can arrange it to suit your preferences and add models to your list as you see fit.
+
+You should be able to compile a manageable list of models relatively quickly using these techniques. This list can then be manually reviewed for detailed screening.
+
+## Questions and feedback
+
+import DocsFeedback from '/_includes/docs-feedback.mdx';
+
+<DocsFeedback/>
diff --git a/developers/academy/theory/180_embedding_model_selection/24_initial_screening 2.mdx b/developers/academy/theory/180_embedding_model_selection/24_initial_screening 2.mdx
new file mode 100644
index 0000000000..cd9a50e6a4
--- /dev/null
+++ b/developers/academy/theory/180_embedding_model_selection/24_initial_screening 2.mdx	
@@ -0,0 +1,150 @@
+---
+title: Perform initial screening
+description: Use available information to screen the list of models
+---
+
+import ThemedImage from '@theme/ThemedImage';
+
+## <i class="fa-solid fa-chalkboard-user"></i> Overview
+
+Once you have a list of candidate models down to a reasonable size (say, 10-20 models maximum), you can start to manually review this list.
+
+This step can be a screen process comparing your compiled requirements against available model details. In most cases, publicly available models will also include summary information through model cards or other means such as documentation or even related academic papers.
+
+<img
+    src={require('./_img/model_cards.png').default}
+    alt="Model cards"
+/>
+
+Some of the readily screenable factors, and how to screen models are shown below:
+
+## <i class="fa-solid fa-chalkboard-user"></i> Screening factors
+
+### <i class="fa-solid fa-chalkboard"></i> Context length
+
+Input context length is a critical factor to ensure that meaning from the whole document chunks taken into account. Maximum input context lengths vary widely between models, as shown in these examples:
+
+- `all-MiniLM-L6-v2`: 256 tokens
+- Cohere `embed-english-v3.0`: 512 tokens
+- `snowflake-arctic-embed-l-v2.0`: 8192 tokens
+
+Input text exceeding the context length will be ignored. On the other hand, higher allowable context lengths typically require exponentially higher compute and latency. As a result, this is an important tradeoff that includes an interplay with your text chunking strategy.
+
+:::tip
+
+Consider what a “chunk” of information to retrieve looks like for your use case. Typically, a model with 512 tokens or higher is sufficient for most use cases.
+
+:::
+
+### <i class="fa-solid fa-chalkboard"></i> Model goals & training methodology
+
+Different embedding models are optimized for different use cases. This informs the model architecture, training data and training methodology.
+
+Reviewing the model provider’s descriptions and published training details can provide key insights into its suitability for your use case.
+
+- **Linguistic capabilities**: Some models (e.g. Snowflake’s `snowflake-arctic-embed-l-v2.0`) are multi-lingual, while others are primarily uni-lingual (e.g. Cohere’s `embed-english-v3.0`). These linguistic capabilities come largely from the training data and methodology selection.
+- **Domain exposure**: Models trained on specialized domains (e.g., legal, medical, financial) typically perform better for domain-specific applications.
+- **Primary tasks**: The provider may have been building a general-purpose embedding model, or one that is particularly focussed on particular tasks. Google’s `gemini-embedding` model appears to be designed with a goal of being a jack-of-all-trades type, state of the art model in all tasks and domains ([release blog](https://developers.googleblog.com/en/gemini-embedding-text-model-now-available-gemini-api/)). On the other hand, Snowflake’s `arctic-embed` 2.0 models appear to be focussed on retrieval tasks ([release blog](https://www.snowflake.com/en/engineering-blog/snowflake-arctic-embed-2-multilingual/)).
+- **Base model**: In many cases, an embedding model is trained from an existing model. Any advantages, or shortcomings, of the base model will often carry over to the final model, especially if it is an architectural one such as its context window size or pooling strategy.
+- **Training methods (advanced)**: If you have more experience with model training techniques, this is an area that you can use as heuristics as well. For example, models trained with contrastive learning often perform better for retrieval tasks. Additionally, hard negative mining is a technique that is valuable to enhance contrastive learning.
+
+:::tip
+
+Select a model whose capabilities align with your goals. For example, if your application requires retrieving paragraphs of text chunks in English, French, German, Mandarin Chinese and Japanese, check the model card and training information. Look for its retrieval performance, and whether these languages were included in the training corpus.
+
+:::
+
+### <i class="fa-solid fa-chalkboard"></i> Dimensionality and optimization options
+
+The dimensionality of embeddings affects both performance and resource requirements.
+
+As a rule of thumb, your memory requirements for a vector database (any quantization notwithstanding) may be: `4 bytes` * `n dimensions` * `m objects` * `1.5` where `m` is the size of your database, and `n` is the vector dimensionality (`1.5` to account for overhead).
+
+This means that for, say, 10 million objects, the memory requirements for given models’ full outputs will be:
+
+- NVIDIA `NV-embed-v2`: `246 GB`
+- OpenAI `text-embedding-3-large`: `184 GB`
+- `snowflake-arctic-embed-l-v2.0`: `61 GB`
+- `all-MiniLM-L6-v2`: `23 GB`
+
+As you might imagine, this can add significant costs to your infrastructure needs for the vector database.
+
+At the database end, there are quantization strategies which will reduce the footprint and therefore costs, which we will cover in another course.
+
+However, certain models can also help in this regard as well. [Matryoshka Representation Learning (MRL)](/blog/openais-matryoshka-embeddings-in-weaviate) models like `jina-embeddings-v2` or `snowflake-arctic-embed-l-v2.0` allow for flexible dimensionality reduction by simply truncating the vector. In the case of `snowflake-arctic-embed-l-v2.0`, it can be truncated to `256` dimensions from its original `1024` dimensions, reducing its size to a quarter without much loss in performance.
+
+:::tip
+
+Consider how big your dataset is likely to get to, then select your model accordingly, keeping the resulting system requirements in mind. If the requirements are too high and thus out-of-budget, it may set you back to square one when you need to scale up and go to production.
+
+:::
+
+### <i class="fa-solid fa-chalkboard"></i> Model size and inference speed
+
+Model size directly impacts inference speed, which is critical for applications with latency requirements. Larger models generally offer better performance but at the cost of increased computational demands.
+
+When screening models, consider these aspects:
+
+| Factor | Implications |
+| --- | --- |
+| Parameter count | More parameters typically mean better quality but slower inference and higher memory usage |
+| Architecture efficiency | Some models are optimized for faster inference despite their size |
+| Hardware requirements | Larger models may require specialized hardware (GPUs/TPUs) |
+
+:::tip
+
+Given that the inference speed is a function of the model, inference hardware as well as the  network latencies, review these factors as a system when screening models’ suitability.
+
+:::
+
+### <i class="fa-solid fa-chalkboard"></i> Pricing, availability, and licensing
+
+The practical aspects of model adoption extend beyond technical considerations.
+
+Providers offer various pricing structures:
+
+- **API-based pricing**: Pay-per-token (OpenAI, Cohere)
+- **Compute-based pricing**: Based on hardware utilization (Cloud providers)
+- **Tiered licensing**: Different capabilities at different price points
+- **Open-source**: Free to use, but self-hosting costs apply
+
+Choice of model and inference type will affect model availability:
+
+- **Geographic availability**: Some providers don't operate in all regions
+- **SLA guarantees**: Uptime commitments and support levels
+- **Rate limiting**: Constraints on throughput that may affect your application
+- **Version stability**: How frequently models are deprecated or updated
+
+Additionally, licensing terms vary significantly:
+
+- **Commercial use restrictions**: Some open models prohibit commercial applications
+- **Data usage policies**: How your data may be used by the provider
+- **Export restrictions**: Compliance with regional regulations
+- **Deployment flexibility**: Whether the model can be deployed on-premises or edge devices
+
+Always review the specific terms for each model. For example, while models like CLIP are openly available, they may have usage restrictions that affect your application.
+
+:::tip
+
+These practical considerations can sometimes outweigh performance benefits. A slightly less accurate model with favorable licensing terms and lower costs might be preferable for many production applications.
+
+:::
+
+### <i class="fa-solid fa-chalkboard"></i> Creating your candidate shortlist
+
+After considering these factors, you can create a prioritized shortlist of models to evaluate in more detail. A good approach is to include a mix of:
+
+1. **Benchmark leaders**: High-performing models on standard metrics
+2. **Resource-efficient options**: Models with smaller footprints or faster inference
+3. **Specialized models**: Models that might be particularly well-suited to your domain
+4. **Different architectures**: Including diverse approaches increases the chance of finding a good fit
+
+Aim for 3-5 models in your initial shortlist for detailed evaluation. Including too many models can make the evaluation process unwieldy and time-consuming.
+
+In the next section, we'll explore how to perform detailed evaluations of these candidate models to determine which one best meets your specific requirements.
+
+## Questions and feedback
+
+import DocsFeedback from '/_includes/docs-feedback.mdx';
+
+<DocsFeedback/>
diff --git a/developers/academy/theory/180_embedding_model_selection/28_benchmark_example 2.mdx b/developers/academy/theory/180_embedding_model_selection/28_benchmark_example 2.mdx
new file mode 100644
index 0000000000..674db50543
--- /dev/null
+++ b/developers/academy/theory/180_embedding_model_selection/28_benchmark_example 2.mdx	
@@ -0,0 +1,266 @@
+---
+title: "Custom benchmarks: an example"
+description: An example of running your own benchmarks for embedding model evaluation
+---
+
+import ThemedImage from '@theme/ThemedImage';
+
+## <i class="fa-solid fa-chalkboard-user"></i> Example custom benchmark
+
+Here is how you might perform a custom benchmark.
+
+Imagine your end goal is to implement a [RAG (retrieval augmented generation)](/blog/introduction-to-rag) system over your company's technical documentation (e.g., product documentation, code examples, support forum logs).
+
+You've shortlisted two embedding models (Model A and Model B) to retrieve objects based on MTEB scores and practical considerations. Let’s go through the steps discussed earlier.
+
+### <i class="fa-solid fa-chalkboard"></i> Set benchmark objectives
+
+Since your data comes from different sources, you may be concerned that the targets are very diverse, whether it be writing style (informal forum posts vs formal documentation), text lengths (comprehensive snippets vs short answers) or language (code vs English).
+
+So you may set the goal of testing *how each model deals with the style, length and language variability*.
+
+### <i class="fa-solid fa-code"></i> Determine metrics to use
+
+This is a classic retrieval problem, where some results are more relevant than others. So, we can use an NDCG@k metric. NDCG@k can be calculated as follows:
+
+```python
+def calculate_dcg(relevance_scores: list[int], k: Optional[int] = None) -> float:
+    """
+    Args:
+        relevance_scores: List of relevance scores (0, 1, or 2)
+        k: Number of results to consider. If None, uses all results.
+    """
+    if k is not None:
+        relevance_scores = relevance_scores[:k]
+
+    gains = [2**score - 1 for score in relevance_scores]
+    dcg = 0
+    for i, gain in enumerate(gains):
+        dcg += gain / np.log2(i + 2) if i > 0 else gain
+
+    return dcg
+
+def calculate_ndcg(
+    actual_scores: list[int], ideal_scores: list[int], k: Optional[int] = None
+) -> float:
+    """
+    Args:
+        actual_scores: List of relevance scores in predicted order
+        ideal_scores: List of relevance scores in ideal order
+        k: Number of results to consider
+    """
+    dcg = calculate_dcg(actual_scores, k)
+    idcg = calculate_dcg(ideal_scores, k)
+    return dcg / idcg if idcg > 0 else 0.0
+```
+
+Note: some libraries such as [scikit-learn have built-in implementations](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ndcg_score.html) of NDCG.
+
+### <i class="fa-solid fa-code"></i> Curate a benchmark dataset
+
+The benchmark dataset should be suitable for achieving the stated goal. Since we want to assess *how each model deals with the style, length and language variability*, the dataset might look something like this:
+
+```python
+dataset = {
+	# Search query
+	"query": "How to set up a vector index with binary quantization",
+	# Candidate document set, with scores on a scale of 0-3
+	"documents": [
+		{
+			"id": "doc001",
+			# Highly relevant documentation text
+			"text": "Each collection can be configured to use BQ compression. BQ must be enabled at collection creation time, before data is added to it. This can be done by setting the vector_index_config of the collection to enable BQ compression.",
+			"score": 3
+		},
+		{
+			"id": "doc002",
+			# Highly relevant, long code example
+			"text": "from weaviate.classes.config import Configure, Property, DataType, VectorDistances, VectorFilterStrateg\n\nclient.collections.create(\n    'Article',\n    # Additional configuration not shown\n    vector_index_config=Configure.VectorIndex.hnsw(\n        quantizer=Configure.VectorIndex.Quantizer.bq(\n            cache=True,\n            rescore_limit=1000\n        ),\n        ef_construction=300,\n        distance_metric=VectorDistances.COSINE,\n        filter_strategy=VectorFilterStrategy.SWEEPING  # or ACORN (Available from Weaviate v1.27.0)\n    ),)",
+			"score": 3
+		},
+		{
+			"id": "doc003",
+			# Highly relevant, short code example
+			"text": "client.collections.create(\nname='Movie',\nvector_index_config=wc.Configure.VectorIndex.flat(\nquantizer=wc.Configure.VectorIndex.Quantizer.bq()\n))",
+			"score": 3
+		},
+		{
+			"id": "doc004",
+			# Less relevant forum post, even though the right words appear
+			"text": "No change in vector size after I set up Binary Quantization\nHello! I was curious to try out how binary quantization works. To embed data I use gtr-t5-large model, which creates 768-dimensional vectors. My database stores around 2k of vectors. My python code to turn PQ on is following: client.schema.update_config(\n    'Document',\n    {\n        'vectorIndexConfig': {\n            'bq': {\n                'enabled': True, \n            }\n        }\n    },\n)",
+			"score": 1
+		},
+		# And so on ...
+		{
+			"id": "doc030",
+			# Irrrelevant documentation text
+			"text": "Weaviate stores data objects in collections. Data objects are represented as JSON-documents. Objects normally include a vector that is derived from a machine learning model. The vector is also called an embedding or a vector embedding.",
+			"score": 0
+		},
+	]
+}
+```
+
+The example dataset here contains a mix of document with varying relevance scores. Equally importantly, it includes a mix of document types, lengths, and languages. Ideally, each variable would be sufficiently represented, so that any disparities in retrieval performance would show up.
+
+### <i class="fa-solid fa-code"></i> Run benchmark
+
+Now, follow these steps for each embedding model:
+
+1. Create embeddings of each document and query
+2. Perform retrieval for the top `k` results, using these embeddings
+3. Calculate the quantitative metrics (e.g. NDCG@k)
+4. Collate results (top k results vs true top k labels) for qualitative analysis
+
+In pseudocode form, it might look something like this:
+
+```python
+import numpy as np
+from typing import List, Dict, Any
+
+class Document:
+    """Document with text and relevance score"""
+    def __init__(self, id, text, relevance_score):
+        self.id = id
+        self.text = text
+        self.relevance_score = relevance_score
+
+class EmbeddingModel:
+    """Abstract embedding model interface"""
+    def __init__(self, name):
+        self.name = name
+
+    def embed(self, text):
+        """Generate embedding for text"""
+        return embedding
+
+class BenchmarkRunner:
+    """Runs embedding model benchmarks"""
+    def __init__(self, queries, documents, models):
+        self.queries = queries
+        self.documents = documents
+        self.models = models
+
+    def run(self, k=10):
+        """Run benchmark for all models
+
+        Returns: Dict mapping model names to metrics
+        """
+        results = {}
+
+        for model in self.models:
+            # Get embeddings for all texts
+            query_embeddings = {q: model.embed(q) for q in self.queries}
+            doc_embeddings = {doc.id: model.embed(doc.text) for doc in self.documents}
+
+            # Calculate metrics for each query
+            ndcg_scores = []
+            for query, query_emb in query_embeddings.items():
+                # Get top k documents by similarity
+                top_docs = self._retrieve_top_k(query_emb, doc_embeddings, k)
+
+                # Calculate NDCG
+                ndcg = self._calculate_ndcg(top_docs, query, k)
+                ndcg_scores.append(ndcg)
+
+            # Store results
+            results[model.name] = {
+                'avg_ndcg': np.mean(ndcg_scores),
+                'all_scores': ndcg_scores
+            }
+
+        return results
+
+    def _retrieve_top_k(self, query_emb, doc_embeddings, k):
+        """Retrieve top k docs by similarity"""
+        # Implementation: calculate similarities and return top k
+        pass
+
+    def _calculate_ndcg(self, retrieved_docs, query, k):
+        """Calculate NDCG@k for retrieved documents"""
+        # Implementation: calculate DCG and IDCG
+        pass
+
+# Example usage
+def run_benchmark_example():
+    # 1. Initialize data
+    queries = ["How to set up binary quantization"]
+    documents = [
+        Document("doc1", "BQ can be enabled at collection creation...", 3),
+        # other documents ...
+        Document("doc2", "Weaviate stores data objects in collections...", 0)
+    ]
+
+    # 2. Initialize models
+    models = [
+        # Model implementations...
+    ]
+
+    # 3. Run benchmark
+    runner = BenchmarkRunner(queries, documents, models)
+    results = runner.run(k=5)
+
+    # 4. Print results
+    for model_name, metrics in results.items():
+        print(f"{model_name}: NDCG@5 = {metrics['avg_ndcg']:.4f}")
+```
+
+### <i class="fa-solid fa-code"></i> Evaluate the results
+
+Once the benchmarks are run, you'll have a set of results to analyze. Combine both quantitative metrics and qualitative observations to get a complete picture of model performance.
+
+#### Quantitative analysis
+
+Start by comparing the overall metrics for each model:
+
+```python
+# Example benchmark results
+results = {
+    'Model A': {'avg_ndcg': 0.87, 'all_scores': [0.92, 0.85, 0.84]},
+    'Model B': {'avg_ndcg': 0.79, 'all_scores': [0.95, 0.72, 0.70]}
+}
+
+# Print summary
+for model_name, metrics in results.items():
+    print(f"{model_name}: NDCG@10 = {metrics['avg_ndcg']:.4f}")
+```
+
+Look beyond the averages to understand:
+
+- **Score distribution**: Does a model perform consistently, or excel in some areas while failing in others?
+- **Performance by query type**: Group scores by query characteristics (length, complexity, domain)
+- **Statistical significance**: For larger benchmark sets, determine if differences are statistically significant
+
+#### Qualitative analysis
+
+Examining actual retrieval results often reveals more actionable insights:
+
+1. **Identify patterns in successes and failures**
+    - Does a model struggle with certain document types? (code, long-form text)
+    - Are there consistent mismatches between queries and retrieved documents?
+2. **Compare results across models**
+    - Do models prioritize different aspects of relevance?
+    - Where do models disagree most significantly?
+3. **Domain-specific considerations**
+    - Are technical terms and jargon handled appropriately?
+    - How well do models interpret domain context?
+
+### <i class="fa-solid fa-chalkboard"></i> Making the final decision
+
+With both quantitative and qualitative insights, you can make an informed decision that balances:
+
+- **Raw performance**: Which model achieves the best metrics?
+- **Specific strengths**: Does a model excel in areas most critical to your application?
+- **Practical considerations**: Remember factors like cost, latency, and deployment requirements
+
+And remember to take results of the standard benchmark in this evaluation process as well.
+
+The ideal model isn't necessarily the one with the highest average score. It's the one that best addresses your specific requirements and performs well on the queries and document types that matter most to your application.
+
+Note that this evaluation process isn't just about selecting a model—it's also about understanding its strengths and limitations. This knowledge will help you design more effective systems around the embedding model and set appropriate expectations for its performance.
+
+## Questions and feedback
+
+import DocsFeedback from '/_includes/docs-feedback.mdx';
+
+<DocsFeedback/>
diff --git a/developers/academy/theory/180_embedding_model_selection/40_periodic_re_evaluation 2.mdx b/developers/academy/theory/180_embedding_model_selection/40_periodic_re_evaluation 2.mdx
new file mode 100644
index 0000000000..86a0093ee5
--- /dev/null
+++ b/developers/academy/theory/180_embedding_model_selection/40_periodic_re_evaluation 2.mdx	
@@ -0,0 +1,28 @@
+---
+title: Periodic re-evaluation
+description: Perform periodic re-evaluation of embedding models
+---
+
+import ThemedImage from '@theme/ThemedImage';
+
+Selecting the best embedding model is a major milestone, but it is worth noting that this is not a one-time activity.
+
+The field evolves rapidly, with new models being released regularly that may offer significant improvements. Furthermore, when integrated at system level, the embedding model may behave differently to expectations.
+
+Therefore, consider a periodic, or monitoring for a need to, re-evaluate your embedding model choices:
+
+- **Monitor benchmark leaderboards**: Check resources like MTEB to identify promising new models
+- **Track performance metrics**: If you notice performance degradation in your application, it may trigger a review.
+- **Review changing requirements**: As your data distribution, languages, or domains change, your original model selection criteria may need updating
+
+When significant changes occur in either your requirements or available models, simply repeat the selection and evaluation process described in this module.
+
+This can ensure consistent, repeatable principles are applied to your embedding model selection process as both your application and the technology landscape evolve.
+
+By treating model selection as an ongoing process rather than a fixed decision, you'll maintain the quality and effectiveness of your AI applications over time.
+
+## Questions and feedback
+
+import DocsFeedback from '/_includes/docs-feedback.mdx';
+
+<DocsFeedback/>
diff --git a/developers/academy/theory/180_embedding_model_selection/index 2.mdx b/developers/academy/theory/180_embedding_model_selection/index 2.mdx
new file mode 100644
index 0000000000..46732ee9f4
--- /dev/null
+++ b/developers/academy/theory/180_embedding_model_selection/index 2.mdx	
@@ -0,0 +1,33 @@
+---
+title: "180 Embedding model evaluation & selection"
+description: "Learn how to evaluate and select embedding models for your use case."
+sidebar_position: 180  # Like a subject number (e.g. CS101)
+---
+
+## <i class="fa-solid fa-chalkboard-user"></i> Unit overview
+
+<!-- Provide context for this course, in addition to the concrete learning goals and outcomes. Why would someone want to do this unit? -->
+
+Embedding models form a cornerstone of modern retrieval systems. Recent developments and subsequent proliferation of embedding models have greatly improved their capabilities. But this also makes model selection a very challenging task with a vast set of ever-expanding options.
+
+This module will tackle how to navigate this landscape, and teach skills to screen, evaluate and select models.
+
+### <i class="fa-solid fa-clipboard-list-check"></i> Prerequisites
+
+- None
+
+## <i class="fa-solid fa-chalkboard-user"></i> Learning objectives
+
+import LearningGoalsExp from '/src/components/Academy/learningGoalsExp.mdx';
+
+<LearningGoalsExp />
+
+import LearningGoals from '/src/components/Academy/learningGoals.jsx';
+
+<LearningGoals unitName="embedding_model_selection"/>
+
+## Questions and feedback
+
+import DocsFeedback from '/_includes/docs-feedback.mdx';
+
+<DocsFeedback/>
diff --git a/developers/contributor-guide/weaviate-core/support-new-runtime-configs 2.md b/developers/contributor-guide/weaviate-core/support-new-runtime-configs 2.md
new file mode 100644
index 0000000000..8301b07574
--- /dev/null
+++ b/developers/contributor-guide/weaviate-core/support-new-runtime-configs 2.md	
@@ -0,0 +1,89 @@
+---
+title: Adding runtime configurations
+sidebar_position: 6
+image: og/contributor-guide/weaviate-core.jpg
+# tags: ['contributor-guide']
+---
+
+:::caution Technical preview
+
+Runtime configuration management was added in **`v1.30`** as a **technical preview**.
+<br/>
+
+This means that the feature is still under development and may change in future releases, including potential breaking changes.
+**We do not recommend using this feature in production environments at this time.**
+
+:::
+
+Weaviate supports runtime configuration management, allowing certain environment variables to be updated and read by Weaviate on the fly without the need for restarts. This feature helps you adapt settings in real time and fine-tune your instance based on evolving needs.
+
+For more information on how to **use runtime configuration**, look at the [user guide](/developers/weaviate/config-refs/env-vars/runtime-config.md). This document talks about how to add support to the configs to be changed dynamically during runtime.
+
+## Add support for configuration changes during runtime
+
+We have two core types used to manage your configs dynamically: `runtime.DynamicType` and `runtime.DynamicValue`. These look roughly like this:
+
+```go
+// DynamicType represents different types that is supported in runtime configs
+type DynamicType interface {
+	~int | ~float64 | ~bool | time.Duration | ~string
+}
+
+// DynamicValue represents any runtime config value. Its zero value is fully usable.
+// If you want zero value with different `default`, use `NewDynamicValue` constructor.
+type DynamicValue[T DynamicType] struct {
+	...[private fields]
+}
+```
+
+This means `DynamicType` currently supports the types: `~int`, `~float64`, `~bool`, `~string`, `time.Duration`.
+
+If you want a config option to support dynamic updates, follow these high-level steps. For example, suppose you have a config called `MaxLimit` of type `int`.
+
+```go
+type Config struct {
+	....
+	MaxLimit int
+}
+```
+
+### 1. Convert `int` -> `DynamicValue[int]` (or the appropriate type)
+
+```go
+type Config struct {
+	MaxLimit *runtime.DynamicValue[int]
+}
+```
+
+Also update the config parsing code (usually `FromEnv()` in `weaviate/usecases/config/environment.go`).
+
+```go
+	config.MaxLimit = runtime.NewDynamicValue(12) // default value for your config is `12` now
+```
+
+### 2. Add it to `config.WeaviateRuntimeConfig`
+
+```go
+type WeaviateRuntimeConfig struct {
+	...
+	MaxLimit *runtime.DynamicValue[int] `json:"max_limit" yaml:"max_limit"`
+}
+```
+
+### 3. Register your config in `runtime.ConfigManager`
+
+This usually happens in `initRuntimeOverrides()` in `adaptors/handlers/rest/configure_api.go`.
+
+```go
+	registered := &config.WeaviateRuntimeConfig{}
+	...
+	registered.MaxLimit = appState.ServerConfig.Config.MaxLimit
+```
+
+### 4. Consume the dynamic value via `value.Get()`
+
+To access the current value of the config, use `config.MaxLimit.Get()` instead of referencing `config.MaxLimit` directly. This ensures you get the updated value dynamically.
+
+:::info
+When `RUNTIME_OVERRIDES_ENABLED=false`, your config still behaves as a static config and uses the default value (`12` in this example) provided via `NewDynamicValue(12)`.
+:::
diff --git a/developers/integrations/data-platforms/boomi/index 2.md b/developers/integrations/data-platforms/boomi/index 2.md
new file mode 100644
index 0000000000..672f61e8c8
--- /dev/null
+++ b/developers/integrations/data-platforms/boomi/index 2.md	
@@ -0,0 +1,23 @@
+---
+title: Boomi
+sidebar_position: 4
+image: og/integrations/home.jpg
+---
+
+[Boomi](https://boomi.com/) is an integration platform as a service, API management, master data management and data preparation solution.
+
+## Boomi and Weaviate
+Weaviate is integrated into Boomi's platform through Boomi's REST Client Connector. This integration allows users to perform operations such as data ingestion and retrieval within Weaviate directly from Boomi's low-code environment.
+
+
+## Our Resources 
+[**Hands on Learning**](#hands-on-learning): Build your technical understanding with end-to-end tutorials.
+
+### Hands on Learning
+
+| Topic | Description | Resource |
+| --- | --- | --- |
+| Start Connecting with Weaviate | A replication of Weaviate's Quickstart tutorial on the Boomi platform using the REST Client connector. | [Article](https://community.boomi.com/s/article/Start-Connecting-with-Weaviate) [Tutorial](https://discover.boomi.com/solutions/start-connecting-with-weaviate) | 
+| AI Agent: Weaviate Quickstart Q&A Agent | Learn how to use the Boomi Agent Designer and Weaviate as the vector store for RAG based agents. | [Tutorial](https://discover.boomi.com/solutions/ai-agent-weaviate-quickstart-qa-agent) |
+
+
diff --git a/developers/integrations/data-platforms/box/index 2.md b/developers/integrations/data-platforms/box/index 2.md
new file mode 100644
index 0000000000..4676f75939
--- /dev/null
+++ b/developers/integrations/data-platforms/box/index 2.md	
@@ -0,0 +1,16 @@
+---
+title: Box
+sidebar_position: 5
+image: og/integrations/home.jpg
+---
+
+[Box](https://www.box.com/home) is a cloud-based content management platform that enables organizations to securely store, share, and collaborate on files.
+
+## Box and Weaviate
+By connecting Box with Weaviate, you can transform your stored files into a powerful semantic search system or enable Retrieval Augmented Generation (RAG). This setup allows for advanced retrieval and AI-driven content generation based on your Box documents.
+
+## Hands on Learning
+
+| Topic | Description | Resource |
+| --- | --- | --- |
+| Weaviate + Box RAG Demo | Learn how to embed the content stored in Box into Weaviate and then use the Query Agent to search through the documents. | [Notebook](https://github.com/weaviate/recipes/blob/main/integrations/data-platforms/box/weaviate_box.ipynb) |
\ No newline at end of file
diff --git a/developers/integrations/data-platforms/confluent/index 2.md b/developers/integrations/data-platforms/confluent/index 2.md
new file mode 100644
index 0000000000..b2ae37dad9
--- /dev/null
+++ b/developers/integrations/data-platforms/confluent/index 2.md	
@@ -0,0 +1,36 @@
+---
+title: Confluent
+sidebar_position: 6
+image: og/integrations/home.jpg
+---
+
+Confluent is a fully managed Apache Kafka service that offers real-time data streaming, seamless integration across major cloud providers, high performance, and robust security features.
+
+Learn more at [Confluent Cloud](https://www.confluent.io/confluent-cloud/).
+
+## Confluent and Weaviate
+Stream data from Confluent Cloud to Weaviate with the [Weaviate Confluent Connector](https://github.com/weaviate/confluent-connector). 
+
+For setup and usage details, see the [connector README](https://github.com/weaviate/confluent-connector/blob/main/README.md).
+
+
+## Our Resources 
+The resources are broken into two categories: 
+1. [**Hands on Learning**](#hands-on-learning): Build your technical understanding with end-to-end tutorials.
+
+2. [**Read and Listen**](#read-and-listen): Develop your conceptual understanding of these technologies.
+
+### Hands on Learning
+
+| Topic | Description | Resource | 
+| --- | --- | --- |
+| PySpark Notebook | Learn how to use PySpark | [Notebook](https://github.com/weaviate/confluent-connector/blob/main/notebooks/01_demo_pyspark.ipynb) |
+| Confluent-Weaviate Connector with Embedded | This notebook shows you how to use the confluent-weaviate connector with Weaviate Embedded. | [Notebook](https://github.com/weaviate/confluent-connector/blob/main/notebooks/02_demo_confluent_weaviate.ipynb) |
+| Confluent-Weaviate Connector with Weaviate Cloud | This notebook shows you how to use the confluent-weaviate connector with Weaviate Cloud. | [Notebook](https://github.com/weaviate/confluent-connector/blob/main/notebooks/03_demo_confluent_wcs.ipynb) |
+| Confluent-Weaviate Connector with Weaviate Cloud and Databricks | Learn how to integrate the confluent-weaviate connector with Weaviate Cloud and Databricks. | [Notebook](https://github.com/weaviate/confluent-connector/blob/main/notebooks/04_demo_confluent_databricks.ipynb) |
+
+
+### Read and Listen
+| Topic | Description | Resource | 
+| --- | --- | --- |
+| Make Real-Time AI a Reality with Weaviate + Confluent | Learn how to build an application using Weaviate and Confluent. | [Blog](/blog/confluent-and-weaviate) |
diff --git a/developers/wcs/img/weaviate-cloud-api-key-management 2.png b/developers/wcs/img/weaviate-cloud-api-key-management 2.png
new file mode 100644
index 0000000000..eac452a8ca
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-api-key-management 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-api-key-rotate 2.png b/developers/wcs/img/weaviate-cloud-api-key-rotate 2.png
new file mode 100644
index 0000000000..851761a4b8
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-api-key-rotate 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-cluster-details 2.png b/developers/wcs/img/weaviate-cloud-cluster-details 2.png
new file mode 100644
index 0000000000..3e0268fdb8
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-cluster-details 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-create-collection 2.png b/developers/wcs/img/weaviate-cloud-create-collection 2.png
new file mode 100644
index 0000000000..a61bd2a4b9
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-create-collection 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-create-new-cluster 2.png b/developers/wcs/img/weaviate-cloud-create-new-cluster 2.png
new file mode 100644
index 0000000000..e718d12d8f
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-create-new-cluster 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-disable-embeddings 2.png b/developers/wcs/img/weaviate-cloud-disable-embeddings 2.png
new file mode 100644
index 0000000000..94fc3d2eb3
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-disable-embeddings 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-edit-support-plan 2.png b/developers/wcs/img/weaviate-cloud-edit-support-plan 2.png
new file mode 100644
index 0000000000..58a58083cb
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-edit-support-plan 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-endpoint-urls 2.png b/developers/wcs/img/weaviate-cloud-endpoint-urls 2.png
new file mode 100644
index 0000000000..cb0e04e171
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-endpoint-urls 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-mfa 2.png b/developers/wcs/img/weaviate-cloud-mfa 2.png
new file mode 100644
index 0000000000..c3eb19da6f
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-mfa 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-organization-settings 2.png b/developers/wcs/img/weaviate-cloud-organization-settings 2.png
new file mode 100644
index 0000000000..c8da11b049
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-organization-settings 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-query-tool-preview 2.png b/developers/wcs/img/weaviate-cloud-query-tool-preview 2.png
new file mode 100644
index 0000000000..81a01ab44b
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-query-tool-preview 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-register 2.png b/developers/wcs/img/weaviate-cloud-register 2.png
new file mode 100644
index 0000000000..cc2d44b29a
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-register 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-roles-create 2.png b/developers/wcs/img/weaviate-cloud-roles-create 2.png
new file mode 100644
index 0000000000..9fd09aa804
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-roles-create 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-roles-edit 2.png b/developers/wcs/img/weaviate-cloud-roles-edit 2.png
new file mode 100644
index 0000000000..1d9d835471
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-roles-edit 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-sandbox-cluster 2.png b/developers/wcs/img/weaviate-cloud-sandbox-cluster 2.png
new file mode 100644
index 0000000000..918862d526
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-sandbox-cluster 2.png differ
diff --git a/developers/wcs/img/weaviate-cloud-select-support-plan 2.png b/developers/wcs/img/weaviate-cloud-select-support-plan 2.png
new file mode 100644
index 0000000000..79fa4779fa
Binary files /dev/null and b/developers/wcs/img/weaviate-cloud-select-support-plan 2.png differ
diff --git a/developers/weaviate/concepts/img/bm25_operators_light 2.png b/developers/weaviate/concepts/img/bm25_operators_light 2.png
new file mode 100644
index 0000000000..44c369c4ff
Binary files /dev/null and b/developers/weaviate/concepts/img/bm25_operators_light 2.png differ
diff --git a/developers/weaviate/model-providers/_includes/integration_nvidia_embedding 2.png b/developers/weaviate/model-providers/_includes/integration_nvidia_embedding 2.png
new file mode 100644
index 0000000000..1d372382c5
Binary files /dev/null and b/developers/weaviate/model-providers/_includes/integration_nvidia_embedding 2.png differ
diff --git a/developers/weaviate/model-providers/_includes/integration_nvidia_rag 2.png b/developers/weaviate/model-providers/_includes/integration_nvidia_rag 2.png
new file mode 100644
index 0000000000..175949f006
Binary files /dev/null and b/developers/weaviate/model-providers/_includes/integration_nvidia_rag 2.png differ
diff --git a/developers/weaviate/model-providers/_includes/integration_nvidia_rag_grouped 2.png b/developers/weaviate/model-providers/_includes/integration_nvidia_rag_grouped 2.png
new file mode 100644
index 0000000000..50e57e9888
Binary files /dev/null and b/developers/weaviate/model-providers/_includes/integration_nvidia_rag_grouped 2.png differ
diff --git a/developers/weaviate/model-providers/_includes/integration_nvidia_reranker 2.png b/developers/weaviate/model-providers/_includes/integration_nvidia_reranker 2.png
new file mode 100644
index 0000000000..7496b79564
Binary files /dev/null and b/developers/weaviate/model-providers/_includes/integration_nvidia_reranker 2.png differ
diff --git a/developers/weaviate/model-providers/_includes/integration_xai_rag 2.png b/developers/weaviate/model-providers/_includes/integration_xai_rag 2.png
new file mode 100644
index 0000000000..753608447c
Binary files /dev/null and b/developers/weaviate/model-providers/_includes/integration_xai_rag 2.png differ
diff --git a/developers/weaviate/model-providers/jinaai/embeddings-colbert 2.md b/developers/weaviate/model-providers/jinaai/embeddings-colbert 2.md
new file mode 100644
index 0000000000..40bc64db5b
--- /dev/null
+++ b/developers/weaviate/model-providers/jinaai/embeddings-colbert 2.md	
@@ -0,0 +1,363 @@
+---
+title: ColBERT Embeddings
+sidebar_position: 23
+image: og/docs/integrations/provider_integrations_jinaai.jpg
+# tags: ['model providers', 'jinaai', 'embeddings']
+---
+
+:::info Added in `v1.29`
+:::
+
+# Jina AI ColBERT Embeddings with Weaviate
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock';
+import PyConnect from '!!raw-loader!../_includes/provider.connect.py';
+import TSConnect from '!!raw-loader!../_includes/provider.connect.ts';
+import GoConnect from '!!raw-loader!/_includes/code/howto/go/docs/model-providers/1-connect/main.go';
+import PyCode from '!!raw-loader!../_includes/provider.vectorizer.py';
+import TSCode from '!!raw-loader!../_includes/provider.vectorizer.ts';
+import GoCode from '!!raw-loader!/_includes/code/howto/go/docs/model-providers/2-usage-text/main.go';
+
+Weaviate's integration with Jina AI's APIs allows you to access their models' capabilities directly from Weaviate.
+
+[Configure a Weaviate vector index](#configure-the-vectorizer) to use a Jina AI ColBERT embedding model, and Weaviate will generate embeddings for various operations using the specified model and your Jina AI API key. This feature is called the *vectorizer*.
+
+At [import time](#data-import), Weaviate generates text object embeddings and saves them into the index. For [vector](#vector-near-text-search) and [hybrid](#hybrid-search) search operations, Weaviate converts text queries into embeddings.
+
+![Embedding integration illustration](../_includes/integration_jinaai_embedding.png)
+
+## Requirements
+
+### Weaviate configuration
+
+Your Weaviate instance must be configured with the Jina AI ColBERT vectorizer integration (`text2colbert-jinaai`) module.
+
+<details>
+  <summary>For Weaviate Cloud (WCD) users</summary>
+
+This integration is enabled by default on Weaviate Cloud (WCD) serverless instances.
+
+</details>
+
+<details>
+  <summary>For self-hosted users</summary>
+
+- Check the [cluster metadata](../../config-refs/meta.md) to verify if the module is enabled.
+- Follow the [how-to configure modules](../../configuration/modules.md) guide to enable the module in Weaviate.
+
+</details>
+
+### API credentials
+
+You must provide a valid Jina AI API key to Weaviate for this integration. Go to [Jina AI](https://jina.ai/embeddings/) to sign up and obtain an API key.
+
+Provide the API key to Weaviate using one of the following methods:
+
+- Set the `JINAAI_APIKEY` environment variable that is available to Weaviate.
+- Provide the API key at runtime, as shown in the examples below.
+
+<Tabs groupId="languages">
+
+ <TabItem value="py" label="Python API v4">
+    <FilteredTextBlock
+      text={PyConnect}
+      startMarker="# START JinaAIInstantiation"
+      endMarker="# END JinaAIInstantiation"
+      language="py"
+    />
+  </TabItem>
+
+ <TabItem value="js" label="JS/TS API v3">
+    <FilteredTextBlock
+      text={TSConnect}
+      startMarker="// START JinaAIInstantiation"
+      endMarker="// END JinaAIInstantiation"
+      language="ts"
+    />
+  </TabItem>
+
+  <TabItem value="go" label="Go">
+    <FilteredTextBlock
+      text={GoConnect}
+      startMarker="// START JinaAIInstantiation"
+      endMarker="// END JinaAIInstantiation"
+      language="goraw"
+    />
+  </TabItem>
+
+</Tabs>
+
+## Configure the vectorizer
+
+[Configure a Weaviate index](../../manage-data/collections.mdx#specify-a-vectorizer) as follows to use a Jina AI ColBERT embedding model:
+
+<Tabs groupId="languages">
+  <TabItem value="py" label="Python API v4">
+    <FilteredTextBlock
+      text={PyCode}
+      startMarker="# START BasicColBERTVectorizerJinaAI"
+      endMarker="# END BasicColBERTVectorizerJinaAI"
+      language="py"
+    />
+  </TabItem>
+
+  <TabItem value="js" label="JS/TS API v3">
+    <FilteredTextBlock
+      text={TSCode}
+      startMarker="// START BasicColBERTVectorizerJinaAI"
+      endMarker="// END BasicColBERTVectorizerJinaAI"
+      language="ts"
+    />
+  </TabItem>
+
+  <TabItem value="go" label="Go">
+    <FilteredTextBlock
+      text={GoCode}
+      startMarker="// START BasicColBERTVectorizerJinaAI"
+      endMarker="// END BasicColBERTVectorizerJinaAI"
+      language="goraw"
+    />
+  </TabItem>
+
+</Tabs>
+
+### Select a model
+
+You can specify one of the [available models](#available-models) for the vectorizer to use, as shown in the following configuration example.
+
+<Tabs groupId="languages">
+  <TabItem value="py" label="Python API v4">
+    <FilteredTextBlock
+      text={PyCode}
+      startMarker="# START ColBERTVectorizerJinaCustomModel"
+      endMarker="# END ColBERTVectorizerJinaCustomModel"
+      language="py"
+    />
+  </TabItem>
+
+  <TabItem value="js" label="JS/TS API v3">
+    <FilteredTextBlock
+      text={TSCode}
+      startMarker="// START ColBERTVectorizerJinaCustomModel"
+      endMarker="// END ColBERTVectorizerJinaCustomModel"
+      language="ts"
+    />
+  </TabItem>
+
+  <TabItem value="go" label="Go">
+    <FilteredTextBlock
+      text={GoCode}
+      startMarker="// START ColBERTVectorizerJinaCustomModel"
+      endMarker="// END ColBERTVectorizerJinaCustomModel"
+      language="goraw"
+    />
+  </TabItem>
+
+</Tabs>
+
+You can [specify](#vectorizer-parameters) one of the [available models](#available-models) for Weaviate to use. The [default model](#available-models) is used if no model is specified.
+
+import VectorizationBehavior from '/_includes/vectorization.behavior.mdx';
+
+<details>
+  <summary>Vectorization behavior</summary>
+
+<VectorizationBehavior/>
+
+</details>
+
+### Vectorizer parameters
+
+The following examples show how to configure Jina AI-specific options.
+
+Note that `dimensions` is not applicable for the `jina-colbert-v1` model.
+
+<Tabs groupId="languages">
+  <TabItem value="py" label="Python API v4">
+    <FilteredTextBlock
+      text={PyCode}
+      startMarker="# START FullColBERTVectorizerJinaAI"
+      endMarker="# END FullColBERTVectorizerJinaAI"
+      language="py"
+    />
+  </TabItem>
+
+  <TabItem value="js" label="JS/TS API v3">
+    <FilteredTextBlock
+      text={TSCode}
+      startMarker="// START FullColBERTVectorizerJinaAI"
+      endMarker="// END FullColBERTVectorizerJinaAI"
+      language="ts"
+    />
+  </TabItem>
+
+  <TabItem value="go" label="Go">
+    <FilteredTextBlock
+      text={GoCode}
+      startMarker="// START FullColBERTVectorizerJinaAI"
+      endMarker="// END FullColBERTVectorizerJinaAI"
+      language="goraw"
+    />
+  </TabItem>
+
+</Tabs>
+
+## Data import
+
+After configuring the vectorizer, [import data](../../manage-data/import.mdx) into Weaviate. Weaviate generates embeddings for text objects using the specified model.
+
+<Tabs groupId="languages">
+
+ <TabItem value="py" label="Python API v4">
+    <FilteredTextBlock
+      text={PyCode}
+      startMarker="# START BatchImportExample"
+      endMarker="# END BatchImportExample"
+      language="py"
+    />
+  </TabItem>
+
+ <TabItem value="js" label="JS/TS API v3">
+    <FilteredTextBlock
+      text={TSCode}
+      startMarker="// START BatchImportExample"
+      endMarker="// END BatchImportExample"
+      language="ts"
+    />
+  </TabItem>
+
+  <TabItem value="go" label="Go">
+    <FilteredTextBlock
+      text={GoCode}
+      startMarker="// START BatchImportExample"
+      endMarker="// END BatchImportExample"
+      language="goraw"
+    />
+  </TabItem>
+
+</Tabs>
+
+:::tip Re-use existing vectors
+If you already have a compatible model vector available, you can provide it directly to Weaviate. This can be useful if you have already generated embeddings using the same model and want to use them in Weaviate, such as when migrating data from another system.
+:::
+
+## Searches
+
+Once the vectorizer is configured, Weaviate will perform vector and hybrid search operations using the specified Jina AI model.
+
+![Embedding integration at search illustration](../_includes/integration_jinaai_embedding_search.png)
+
+### Vector (near text) search
+
+When you perform a [vector search](../../search/similarity.md#search-with-text), Weaviate converts the text query into an embedding using the specified model and returns the most similar objects from the database.
+
+The query below returns the `n` most similar objects from the database, set by `limit`.
+
+<Tabs groupId="languages">
+
+ <TabItem value="py" label="Python API v4">
+    <FilteredTextBlock
+      text={PyCode}
+      startMarker="# START NearTextExample"
+      endMarker="# END NearTextExample"
+      language="py"
+    />
+  </TabItem>
+
+ <TabItem value="js" label="JS/TS API v3">
+    <FilteredTextBlock
+      text={TSCode}
+      startMarker="// START NearTextExample"
+      endMarker="// END NearTextExample"
+      language="ts"
+    />
+  </TabItem>
+
+  <TabItem value="go" label="Go">
+    <FilteredTextBlock
+      text={GoCode}
+      startMarker="// START NearTextExample"
+      endMarker="// END NearTextExample"
+      language="goraw"
+    />
+  </TabItem>
+
+</Tabs>
+
+### Hybrid search
+
+:::info What is a hybrid search?
+A hybrid search performs a vector search and a keyword (BM25) search, before [combining the results](../../search/hybrid.md#change-the-ranking-method) to return the best matching objects from the database.
+:::
+
+When you perform a [hybrid search](../../search/hybrid.md), Weaviate converts the text query into an embedding using the specified model and returns the best scoring objects from the database.
+
+The query below returns the `n` best scoring objects from the database, set by `limit`.
+
+<Tabs groupId="languages">
+
+ <TabItem value="py" label="Python API v4">
+    <FilteredTextBlock
+      text={PyCode}
+      startMarker="# START HybridExample"
+      endMarker="# END HybridExample"
+      language="py"
+    />
+  </TabItem>
+
+ <TabItem value="js" label="JS/TS API v3">
+    <FilteredTextBlock
+      text={TSCode}
+      startMarker="// START HybridExample"
+      endMarker="// END HybridExample"
+      language="ts"
+    />
+  </TabItem>
+
+  <TabItem value="go" label="Go">
+    <FilteredTextBlock
+      text={GoCode}
+      startMarker="// START HybridExample"
+      endMarker="// END HybridExample"
+      language="goraw"
+    />
+  </TabItem>
+
+</Tabs>
+
+## References
+
+### Available models
+
+- `jina-colbert-v2`
+    - By default, Weaviate uses `128` dimensions
+- `jina-colbert-v1`
+
+Note that `dimensions` is not applicable for the `jina-colbert-v1` model.
+
+## Further resources
+
+### Other integrations
+
+- [Jina AI embedding models + Weaviate](./embeddings.md)
+- [Jina AI multimodal embedding models + Weaviate](./embeddings-multimodal.md)
+- [Jina AI reranker models + Weaviate](./reranker.md)
+
+### Code examples
+
+Once the integrations are configured at the collection, the data management and search operations in Weaviate work identically to any other collection. See the following model-agnostic examples:
+
+- The [how-to: manage data](../../manage-data/index.md) guides show how to perform data operations (i.e. create, update, delete).
+- The [how-to: search](../../search/index.md) guides show how to perform search operations (i.e. vector, keyword, hybrid) as well as retrieval augmented generation.
+
+### External resources
+
+- Jina AI [Embeddings API documentation](https://jina.ai/embeddings/)
+
+## Questions and feedback
+
+import DocsFeedback from '/_includes/docs-feedback.mdx';
+
+<DocsFeedback/>
diff --git a/developers/weaviate/release-notes/release_1_30 2.mdx b/developers/weaviate/release-notes/release_1_30 2.mdx
new file mode 100644
index 0000000000..3c5489febc
--- /dev/null
+++ b/developers/weaviate/release-notes/release_1_30 2.mdx	
@@ -0,0 +1,10 @@
+---
+title: Weaviate 1.30.0
+sidebar_position: 85
+image: og/docs/release-notes.jpg
+tags: ['release', 'engineering']
+---
+
+import ReleaseNotes from '/blog/2025-04-02-weaviate-1-30-release/_core-1-30-include.mdx' ;
+
+<ReleaseNotes />
diff --git a/yarn.lock b/yarn.lock
index 29acc0ea62..bcb51e739d 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -6493,9 +6493,9 @@ caniuse-api@^3.0.0:
     lodash.uniq "^4.5.0"
 
 caniuse-lite@^1.0.0, caniuse-lite@^1.0.30001587, caniuse-lite@^1.0.30001599:
-  version "1.0.30001627"
-  resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001627.tgz#8071c42d468e06ed2fb2c545efe79a663fd326ab"
-  integrity sha512-4zgNiB8nTyV/tHhwZrFs88ryjls/lHiqFhrxCW4qSTeuRByBVnPYpDInchOIySWknznucaf31Z4KYqjfbrecVw==
+  version "1.0.30001696"
+  resolved "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001696.tgz"
+  integrity sha512-pDCPkvzfa39ehJtJ+OwGT/2yvT2SbjfHhiIW2LWOAcMQ7BzwxT/XuyUp4OTOd0XFWA6BKw0JalnBHgSi5DGJBQ==
 
 ccount@^1.0.0:
   version "1.1.0"