diff --git a/.azdo/pipelines/azure-dev.yml b/.azdo/pipelines/azure-dev.yml index b500d40e03..752556f709 100644 --- a/.azdo/pipelines/azure-dev.yml +++ b/.azdo/pipelines/azure-dev.yml @@ -60,6 +60,7 @@ steps: AZURE_SEARCH_QUERY_SPELLER: $(AZURE_SEARCH_QUERY_SPELLER) AZURE_SEARCH_SEMANTIC_RANKER: $(AZURE_SEARCH_SEMANTIC_RANKER) AZURE_SEARCH_QUERY_REWRITING: $(AZURE_SEARCH_QUERY_REWRITING) + AZURE_SEARCH_FIELD_NAME_EMBEDDING: $(AZURE_SEARCH_FIELD_NAME_EMBEDDING) AZURE_STORAGE_ACCOUNT: $(AZURE_STORAGE_ACCOUNT) AZURE_STORAGE_RESOURCE_GROUP: $(AZURE_STORAGE_RESOURCE_GROUP) AZURE_STORAGE_SKU: $(AZURE_STORAGE_SKU) diff --git a/.github/workflows/azure-dev.yml b/.github/workflows/azure-dev.yml index d20cc20f90..fa99f45a9e 100644 --- a/.github/workflows/azure-dev.yml +++ b/.github/workflows/azure-dev.yml @@ -50,6 +50,7 @@ jobs: AZURE_SEARCH_QUERY_SPELLER: ${{ vars.AZURE_SEARCH_QUERY_SPELLER }} AZURE_SEARCH_SEMANTIC_RANKER: ${{ vars.AZURE_SEARCH_SEMANTIC_RANKER }} AZURE_SEARCH_QUERY_REWRITING: ${{ vars.AZURE_SEARCH_QUERY_REWRITING }} + AZURE_SEARCH_FIELD_NAME_EMBEDDING: ${{ vars.AZURE_SEARCH_FIELD_NAME_EMBEDDING }} AZURE_STORAGE_ACCOUNT: ${{ vars.AZURE_STORAGE_ACCOUNT }} AZURE_STORAGE_RESOURCE_GROUP: ${{ vars.AZURE_STORAGE_RESOURCE_GROUP }} AZURE_STORAGE_SKU: ${{ vars.AZURE_STORAGE_SKU }} diff --git a/app/backend/app.py b/app/backend/app.py index 263fcf06a6..9a8c5c9864 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -464,6 +464,8 @@ async def setup_clients(): AZURE_SEARCH_QUERY_SPELLER = os.getenv("AZURE_SEARCH_QUERY_SPELLER") or "lexicon" AZURE_SEARCH_SEMANTIC_RANKER = os.getenv("AZURE_SEARCH_SEMANTIC_RANKER", "free").lower() AZURE_SEARCH_QUERY_REWRITING = os.getenv("AZURE_SEARCH_QUERY_REWRITING", "false").lower() + # This defaults to the previous field name "embedding", for backwards compatibility + AZURE_SEARCH_FIELD_NAME_EMBEDDING = os.getenv("AZURE_SEARCH_FIELD_NAME_EMBEDDING", "embedding") AZURE_SPEECH_SERVICE_ID = os.getenv("AZURE_SPEECH_SERVICE_ID") AZURE_SPEECH_SERVICE_LOCATION = os.getenv("AZURE_SPEECH_SERVICE_LOCATION") @@ -580,7 +582,10 @@ async def setup_clients(): disable_vectors=os.getenv("USE_VECTORS", "").lower() == "false", ) ingester = UploadUserFileStrategy( - search_info=search_info, embeddings=text_embeddings_service, file_processors=file_processors + search_info=search_info, + embeddings=text_embeddings_service, + file_processors=file_processors, + search_field_name_embedding=AZURE_SEARCH_FIELD_NAME_EMBEDDING, ) current_app.config[CONFIG_INGESTER] = ingester @@ -677,6 +682,7 @@ async def setup_clients(): embedding_model=OPENAI_EMB_MODEL, embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT, embedding_dimensions=OPENAI_EMB_DIMENSIONS, + embedding_field=AZURE_SEARCH_FIELD_NAME_EMBEDDING, sourcepage_field=KB_FIELDS_SOURCEPAGE, content_field=KB_FIELDS_CONTENT, query_language=AZURE_SEARCH_QUERY_LANGUAGE, @@ -695,6 +701,7 @@ async def setup_clients(): embedding_model=OPENAI_EMB_MODEL, embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT, embedding_dimensions=OPENAI_EMB_DIMENSIONS, + embedding_field=AZURE_SEARCH_FIELD_NAME_EMBEDDING, sourcepage_field=KB_FIELDS_SOURCEPAGE, content_field=KB_FIELDS_CONTENT, query_language=AZURE_SEARCH_QUERY_LANGUAGE, @@ -734,6 +741,7 @@ async def setup_clients(): embedding_model=OPENAI_EMB_MODEL, embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT, embedding_dimensions=OPENAI_EMB_DIMENSIONS, + embedding_field=AZURE_SEARCH_FIELD_NAME_EMBEDDING, sourcepage_field=KB_FIELDS_SOURCEPAGE, content_field=KB_FIELDS_CONTENT, query_language=AZURE_SEARCH_QUERY_LANGUAGE, @@ -755,6 +763,7 @@ async def setup_clients(): embedding_model=OPENAI_EMB_MODEL, embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT, embedding_dimensions=OPENAI_EMB_DIMENSIONS, + embedding_field=AZURE_SEARCH_FIELD_NAME_EMBEDDING, sourcepage_field=KB_FIELDS_SOURCEPAGE, content_field=KB_FIELDS_CONTENT, query_language=AZURE_SEARCH_QUERY_LANGUAGE, diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py index 59f1909a54..734e606690 100644 --- a/app/backend/approaches/approach.py +++ b/app/backend/approaches/approach.py @@ -38,8 +38,6 @@ class Document: id: Optional[str] content: Optional[str] - embedding: Optional[list[float]] - image_embedding: Optional[list[float]] category: Optional[str] sourcepage: Optional[str] sourcefile: Optional[str] @@ -50,11 +48,9 @@ class Document: reranker_score: Optional[float] = None def serialize_for_results(self) -> dict[str, Any]: - return { + result_dict = { "id": self.id, "content": self.content, - "embedding": Document.trim_embedding(self.embedding), - "imageEmbedding": Document.trim_embedding(self.image_embedding), "category": self.category, "sourcepage": self.sourcepage, "sourcefile": self.sourcefile, @@ -75,18 +71,7 @@ def serialize_for_results(self) -> dict[str, Any]: "score": self.score, "reranker_score": self.reranker_score, } - - @classmethod - def trim_embedding(cls, embedding: Optional[list[float]]) -> Optional[str]: - """Returns a trimmed list of floats from the vector embedding.""" - if embedding: - if len(embedding) > 2: - # Format the embedding list to show the first 2 items followed by the count of the remaining items.""" - return f"[{embedding[0]}, {embedding[1]} ...+{len(embedding) - 2} more]" - else: - return str(embedding) - - return None + return result_dict @dataclass @@ -159,6 +144,7 @@ def __init__( embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text" embedding_model: str, embedding_dimensions: int, + embedding_field: str, openai_host: str, vision_endpoint: str, vision_token_provider: Callable[[], Awaitable[str]], @@ -173,6 +159,7 @@ def __init__( self.embedding_deployment = embedding_deployment self.embedding_model = embedding_model self.embedding_dimensions = embedding_dimensions + self.embedding_field = embedding_field self.openai_host = openai_host self.vision_endpoint = vision_endpoint self.vision_token_provider = vision_token_provider @@ -238,8 +225,6 @@ async def search( Document( id=document.get("id"), content=document.get("content"), - embedding=document.get("embedding"), - image_embedding=document.get("imageEmbedding"), category=document.get("category"), sourcepage=document.get("sourcepage"), sourcefile=document.get("sourcefile"), @@ -314,12 +299,14 @@ class ExtraArgs(TypedDict, total=False): **dimensions_args, ) query_vector = embedding.data[0].embedding - return VectorizedQuery(vector=query_vector, k_nearest_neighbors=50, fields="embedding") + # This performs an oversampling due to how the search index was setup, + # so we do not need to explicitly pass in an oversampling parameter here + return VectorizedQuery(vector=query_vector, k_nearest_neighbors=50, fields=self.embedding_field) async def compute_image_embedding(self, q: str): endpoint = urljoin(self.vision_endpoint, "computervision/retrieval:vectorizeText") headers = {"Content-Type": "application/json"} - params = {"api-version": "2023-02-01-preview", "modelVersion": "latest"} + params = {"api-version": "2024-02-01", "model-version": "2023-04-15"} data = {"text": q} headers["Authorization"] = "Bearer " + await self.vision_token_provider() diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py index 249c7247b2..d795d6573e 100644 --- a/app/backend/approaches/chatreadretrieveread.py +++ b/app/backend/approaches/chatreadretrieveread.py @@ -35,6 +35,7 @@ def __init__( embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text" embedding_model: str, embedding_dimensions: int, + embedding_field: str, sourcepage_field: str, content_field: str, query_language: str, @@ -50,6 +51,7 @@ def __init__( self.embedding_deployment = embedding_deployment self.embedding_model = embedding_model self.embedding_dimensions = embedding_dimensions + self.embedding_field = embedding_field self.sourcepage_field = sourcepage_field self.content_field = content_field self.query_language = query_language diff --git a/app/backend/approaches/chatreadretrievereadvision.py b/app/backend/approaches/chatreadretrievereadvision.py index b56d773a6f..f8aaf3c37d 100644 --- a/app/backend/approaches/chatreadretrievereadvision.py +++ b/app/backend/approaches/chatreadretrievereadvision.py @@ -39,6 +39,7 @@ def __init__( embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text" embedding_model: str, embedding_dimensions: int, + embedding_field: str, sourcepage_field: str, content_field: str, query_language: str, @@ -58,6 +59,7 @@ def __init__( self.embedding_deployment = embedding_deployment self.embedding_model = embedding_model self.embedding_dimensions = embedding_dimensions + self.embedding_field = embedding_field self.sourcepage_field = sourcepage_field self.content_field = content_field self.query_language = query_language @@ -89,7 +91,7 @@ async def run_until_final_call( minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0) filter = self.build_filter(overrides, auth_claims) - vector_fields = overrides.get("vector_fields", ["embedding"]) + vector_fields = overrides.get("vector_fields", "textAndImageEmbeddings") send_text_to_gptvision = overrides.get("gpt4v_input") in ["textAndImages", "texts", None] send_images_to_gptvision = overrides.get("gpt4v_input") in ["textAndImages", "images", None] @@ -122,13 +124,10 @@ async def run_until_final_call( # If retrieval mode includes vectors, compute an embedding for the query vectors = [] if use_vector_search: - for field in vector_fields: - vector = ( - await self.compute_text_embedding(query_text) - if field == "embedding" - else await self.compute_image_embedding(query_text) - ) - vectors.append(vector) + if vector_fields == "textEmbeddingOnly" or vector_fields == "textAndImageEmbeddings": + vectors.append(await self.compute_text_embedding(query_text)) + if vector_fields == "imageEmbeddingOnly" or vector_fields == "textAndImageEmbeddings": + vectors.append(await self.compute_image_embedding(query_text)) results = await self.search( top, diff --git a/app/backend/approaches/retrievethenread.py b/app/backend/approaches/retrievethenread.py index 8bdbb9785e..f842002e1e 100644 --- a/app/backend/approaches/retrievethenread.py +++ b/app/backend/approaches/retrievethenread.py @@ -28,6 +28,7 @@ def __init__( embedding_model: str, embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text" embedding_dimensions: int, + embedding_field: str, sourcepage_field: str, content_field: str, query_language: str, @@ -44,6 +45,7 @@ def __init__( self.embedding_dimensions = embedding_dimensions self.chatgpt_deployment = chatgpt_deployment self.embedding_deployment = embedding_deployment + self.embedding_field = embedding_field self.sourcepage_field = sourcepage_field self.content_field = content_field self.query_language = query_language diff --git a/app/backend/approaches/retrievethenreadvision.py b/app/backend/approaches/retrievethenreadvision.py index a556fd8b6c..a021537c52 100644 --- a/app/backend/approaches/retrievethenreadvision.py +++ b/app/backend/approaches/retrievethenreadvision.py @@ -33,6 +33,7 @@ def __init__( embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text" embedding_model: str, embedding_dimensions: int, + embedding_field: str, sourcepage_field: str, content_field: str, query_language: str, @@ -48,6 +49,7 @@ def __init__( self.embedding_model = embedding_model self.embedding_deployment = embedding_deployment self.embedding_dimensions = embedding_dimensions + self.embedding_field = embedding_field self.sourcepage_field = sourcepage_field self.content_field = content_field self.gpt4v_deployment = gpt4v_deployment @@ -84,20 +86,17 @@ async def run( minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0) filter = self.build_filter(overrides, auth_claims) - vector_fields = overrides.get("vector_fields", ["embedding"]) + vector_fields = overrides.get("vector_fields", "textAndImageEmbeddings") send_text_to_gptvision = overrides.get("gpt4v_input") in ["textAndImages", "texts", None] send_images_to_gptvision = overrides.get("gpt4v_input") in ["textAndImages", "images", None] # If retrieval mode includes vectors, compute an embedding for the query vectors = [] if use_vector_search: - for field in vector_fields: - vector = ( - await self.compute_text_embedding(q) - if field == "embedding" - else await self.compute_image_embedding(q) - ) - vectors.append(vector) + if vector_fields == "textEmbeddingOnly" or vector_fields == "textAndImageEmbeddings": + vectors.append(await self.compute_text_embedding(q)) + if vector_fields == "imageEmbeddingOnly" or vector_fields == "textAndImageEmbeddings": + vectors.append(await self.compute_image_embedding(q)) results = await self.search( top, diff --git a/app/backend/prepdocs.py b/app/backend/prepdocs.py index a0f02a0b21..f05e1e426b 100644 --- a/app/backend/prepdocs.py +++ b/app/backend/prepdocs.py @@ -398,6 +398,7 @@ async def main(strategy: Strategy, setup_index: bool = True): blob_manager=blob_manager, document_action=document_action, embeddings=openai_embeddings_service, + search_field_name_embedding=os.environ["AZURE_SEARCH_FIELD_NAME_EMBEDDING"], subscription_id=os.environ["AZURE_SUBSCRIPTION_ID"], search_service_user_assigned_id=args.searchserviceassignedid, search_analyzer_name=os.getenv("AZURE_SEARCH_ANALYZER_NAME"), @@ -430,6 +431,8 @@ async def main(strategy: Strategy, setup_index: bool = True): embeddings=openai_embeddings_service, image_embeddings=image_embeddings_service, search_analyzer_name=os.getenv("AZURE_SEARCH_ANALYZER_NAME"), + # Default to the previous field names for backward compatibility + search_field_name_embedding=os.getenv("AZURE_SEARCH_FIELD_NAME_EMBEDDING", "embedding"), use_acls=use_acls, category=args.category, use_content_understanding=use_content_understanding, diff --git a/app/backend/prepdocslib/embeddings.py b/app/backend/prepdocslib/embeddings.py index 57af39c5ab..df56f39c08 100644 --- a/app/backend/prepdocslib/embeddings.py +++ b/app/backend/prepdocslib/embeddings.py @@ -239,7 +239,7 @@ def __init__(self, endpoint: str, token_provider: Callable[[], Awaitable[str]]): async def create_embeddings(self, blob_urls: list[str]) -> list[list[float]]: endpoint = urljoin(self.endpoint, "computervision/retrieval:vectorizeImage") headers = {"Content-Type": "application/json"} - params = {"api-version": "2023-02-01-preview", "modelVersion": "latest"} + params = {"api-version": "2024-02-01", "model-version": "2023-04-15"} headers["Authorization"] = "Bearer " + await self.token_provider() embeddings: list[list[float]] = [] diff --git a/app/backend/prepdocslib/filestrategy.py b/app/backend/prepdocslib/filestrategy.py index 2bc7dc84aa..37f399cf4b 100644 --- a/app/backend/prepdocslib/filestrategy.py +++ b/app/backend/prepdocslib/filestrategy.py @@ -51,6 +51,7 @@ def __init__( embeddings: Optional[OpenAIEmbeddings] = None, image_embeddings: Optional[ImageEmbeddings] = None, search_analyzer_name: Optional[str] = None, + search_field_name_embedding: Optional[str] = None, use_acls: bool = False, category: Optional[str] = None, use_content_understanding: bool = False, @@ -63,22 +64,27 @@ def __init__( self.embeddings = embeddings self.image_embeddings = image_embeddings self.search_analyzer_name = search_analyzer_name + self.search_field_name_embedding = search_field_name_embedding self.search_info = search_info self.use_acls = use_acls self.category = category self.use_content_understanding = use_content_understanding self.content_understanding_endpoint = content_understanding_endpoint - async def setup(self): - search_manager = SearchManager( + def setup_search_manager(self): + self.search_manager = SearchManager( self.search_info, self.search_analyzer_name, self.use_acls, False, self.embeddings, + field_name_embedding=self.search_field_name_embedding, search_images=self.image_embeddings is not None, ) - await search_manager.create_index() + + async def setup(self): + self.setup_search_manager() + await self.search_manager.create_index() if self.use_content_understanding: if self.content_understanding_endpoint is None: @@ -91,9 +97,7 @@ async def setup(self): await cu_manager.create_analyzer() async def run(self): - search_manager = SearchManager( - self.search_info, self.search_analyzer_name, self.use_acls, False, self.embeddings - ) + self.setup_search_manager() if self.document_action == DocumentAction.Add: files = self.list_file_strategy.list() async for file in files: @@ -104,7 +108,7 @@ async def run(self): blob_image_embeddings: Optional[list[list[float]]] = None if self.image_embeddings and blob_sas_uris: blob_image_embeddings = await self.image_embeddings.create_embeddings(blob_sas_uris) - await search_manager.update_content(sections, blob_image_embeddings, url=file.url) + await self.search_manager.update_content(sections, blob_image_embeddings, url=file.url) finally: if file: file.close() @@ -112,10 +116,10 @@ async def run(self): paths = self.list_file_strategy.list_paths() async for path in paths: await self.blob_manager.remove_blob(path) - await search_manager.remove_content(path) + await self.search_manager.remove_content(path) elif self.document_action == DocumentAction.RemoveAll: await self.blob_manager.remove_blob() - await search_manager.remove_content() + await self.search_manager.remove_content() class UploadUserFileStrategy: @@ -129,12 +133,22 @@ def __init__( file_processors: dict[str, FileProcessor], embeddings: Optional[OpenAIEmbeddings] = None, image_embeddings: Optional[ImageEmbeddings] = None, + search_field_name_embedding: Optional[str] = None, ): self.file_processors = file_processors self.embeddings = embeddings self.image_embeddings = image_embeddings self.search_info = search_info - self.search_manager = SearchManager(self.search_info, None, True, False, self.embeddings) + self.search_manager = SearchManager( + search_info=self.search_info, + search_analyzer_name=None, + use_acls=True, + use_int_vectorization=False, + embeddings=self.embeddings, + field_name_embedding=search_field_name_embedding, + search_images=False, + ) + self.search_field_name_embedding = search_field_name_embedding async def add_file(self, file: File): if self.image_embeddings: diff --git a/app/backend/prepdocslib/integratedvectorizerstrategy.py b/app/backend/prepdocslib/integratedvectorizerstrategy.py index 66e8e4a346..9e89facc4c 100644 --- a/app/backend/prepdocslib/integratedvectorizerstrategy.py +++ b/app/backend/prepdocslib/integratedvectorizerstrategy.py @@ -6,7 +6,6 @@ ) from azure.search.documents.indexes.models import ( AzureOpenAIEmbeddingSkill, - FieldMapping, IndexProjectionMode, InputFieldMappingEntry, OutputFieldMappingEntry, @@ -41,6 +40,7 @@ def __init__( blob_manager: BlobManager, search_info: SearchInfo, embeddings: AzureOpenAIEmbeddingService, + search_field_name_embedding: str, subscription_id: str, search_service_user_assigned_id: str, document_action: DocumentAction = DocumentAction.Add, @@ -53,18 +53,25 @@ def __init__( self.blob_manager = blob_manager self.document_action = document_action self.embeddings = embeddings + self.search_field_name_embedding = search_field_name_embedding self.subscription_id = subscription_id self.search_user_assigned_identity = search_service_user_assigned_id self.search_analyzer_name = search_analyzer_name self.use_acls = use_acls self.category = category self.search_info = search_info + prefix = f"{self.search_info.index_name}-{self.search_field_name_embedding}" + self.skillset_name = f"{prefix}-skillset" + self.indexer_name = f"{prefix}-indexer" + self.data_source_name = f"{prefix}-blob" - async def create_embedding_skill(self, index_name: str): - skillset_name = f"{index_name}-skillset" + async def create_embedding_skill(self, index_name: str) -> SearchIndexerSkillset: + """ + Create a skillset for the indexer to chunk documents and generate embeddings + """ split_skill = SplitSkill( - name=f"{index_name}-split-skill", + name="split-skill", description="Split skill to chunk documents", text_split_mode="pages", context="/document", @@ -77,7 +84,7 @@ async def create_embedding_skill(self, index_name: str): ) embedding_skill = AzureOpenAIEmbeddingSkill( - name=f"{index_name}-embedding-skill", + name="embedding-skill", description="Skill to generate embeddings via Azure OpenAI", context="/document/pages/*", resource_url=f"https://{self.embeddings.open_ai_service}.openai.azure.com", @@ -98,8 +105,12 @@ async def create_embedding_skill(self, index_name: str): source_context="/document/pages/*", mappings=[ InputFieldMappingEntry(name="content", source="/document/pages/*"), - InputFieldMappingEntry(name="embedding", source="/document/pages/*/vector"), InputFieldMappingEntry(name="sourcepage", source="/document/metadata_storage_name"), + InputFieldMappingEntry(name="sourcefile", source="/document/metadata_storage_name"), + InputFieldMappingEntry(name="storageUrl", source="/document/metadata_storage_path"), + InputFieldMappingEntry( + name=self.search_field_name_embedding, source="/document/pages/*/vector" + ), ], ), ], @@ -109,7 +120,7 @@ async def create_embedding_skill(self, index_name: str): ) skillset = SearchIndexerSkillset( - name=skillset_name, + name=self.skillset_name, description="Skillset to chunk documents and generate embeddings", skills=[split_skill, embedding_skill], index_projection=index_projection, @@ -125,6 +136,7 @@ async def setup(self): use_acls=self.use_acls, use_int_vectorization=True, embeddings=self.embeddings, + field_name_embedding=self.search_field_name_embedding, search_images=False, ) @@ -133,7 +145,7 @@ async def setup(self): ds_client = self.search_info.create_search_indexer_client() ds_container = SearchIndexerDataContainer(name=self.blob_manager.container) data_source_connection = SearchIndexerDataSourceConnection( - name=f"{self.search_info.index_name}-blob", + name=self.data_source_name, type=SearchIndexerDataSourceType.AZURE_BLOB, connection_string=self.blob_manager.get_managedidentity_connectionstring(), container=ds_container, @@ -163,23 +175,19 @@ async def run(self): await self.blob_manager.remove_blob() # Create an indexer - indexer_name = f"{self.search_info.index_name}-indexer" - indexer = SearchIndexer( - name=indexer_name, + name=self.indexer_name, description="Indexer to index documents and generate embeddings", - skillset_name=f"{self.search_info.index_name}-skillset", + skillset_name=self.skillset_name, target_index_name=self.search_info.index_name, - data_source_name=f"{self.search_info.index_name}-blob", - # Map the metadata_storage_name field to the title field in the index to display the PDF title in the search results - field_mappings=[FieldMapping(source_field_name="metadata_storage_name", target_field_name="title")], + data_source_name=self.data_source_name, ) indexer_client = self.search_info.create_search_indexer_client() indexer_result = await indexer_client.create_or_update_indexer(indexer) # Run the indexer - await indexer_client.run_indexer(indexer_name) + await indexer_client.run_indexer(self.indexer_name) await indexer_client.close() logger.info( diff --git a/app/backend/prepdocslib/searchmanager.py b/app/backend/prepdocslib/searchmanager.py index b55047a058..b544bbf52a 100644 --- a/app/backend/prepdocslib/searchmanager.py +++ b/app/backend/prepdocslib/searchmanager.py @@ -6,8 +6,10 @@ from azure.search.documents.indexes.models import ( AzureOpenAIVectorizer, AzureOpenAIVectorizerParameters, + BinaryQuantizationCompression, HnswAlgorithmConfiguration, HnswParameters, + RescoringOptions, SearchableField, SearchField, SearchFieldDataType, @@ -18,6 +20,9 @@ SemanticSearch, SimpleField, VectorSearch, + VectorSearchAlgorithmConfiguration, + VectorSearchCompression, + VectorSearchCompressionRescoreStorageMethod, VectorSearchProfile, VectorSearchVectorizer, ) @@ -55,6 +60,7 @@ def __init__( use_acls: bool = False, use_int_vectorization: bool = False, embeddings: Optional[OpenAIEmbeddings] = None, + field_name_embedding: Optional[str] = None, search_images: bool = False, ): self.search_info = search_info @@ -62,15 +68,101 @@ def __init__( self.use_acls = use_acls self.use_int_vectorization = use_int_vectorization self.embeddings = embeddings - # Integrated vectorization uses the ada-002 model with 1536 dimensions - self.embedding_dimensions = self.embeddings.open_ai_dimensions if self.embeddings else 1536 + self.embedding_dimensions = self.embeddings.open_ai_dimensions if self.embeddings else None + self.field_name_embedding = field_name_embedding self.search_images = search_images - async def create_index(self, vectorizers: Optional[list[VectorSearchVectorizer]] = None): + async def create_index(self): logger.info("Checking whether search index %s exists...", self.search_info.index_name) async with self.search_info.create_search_index_client() as search_index_client: + embedding_field = None + image_embedding_field = None + text_vector_search_profile = None + text_vector_algorithm = None + text_vector_compression = None + image_vector_search_profile = None + image_vector_algorithm = None + + if self.embeddings: + if self.embedding_dimensions is None: + raise ValueError( + "Embedding dimensions must be set in order to add an embedding field to the search index" + ) + if self.field_name_embedding is None: + raise ValueError( + "Embedding field must be set in order to add an embedding field to the search index" + ) + + text_vectorizer = None + if isinstance(self.embeddings, AzureOpenAIEmbeddingService): + text_vectorizer = AzureOpenAIVectorizer( + vectorizer_name=f"{self.embeddings.open_ai_model_name}-vectorizer", + parameters=AzureOpenAIVectorizerParameters( + resource_url=self.embeddings.open_ai_endpoint, + deployment_name=self.embeddings.open_ai_deployment, + model_name=self.embeddings.open_ai_model_name, + ), + ) + + text_vector_algorithm = HnswAlgorithmConfiguration( + name="hnsw_config", + parameters=HnswParameters(metric="cosine"), + ) + text_vector_compression = BinaryQuantizationCompression( + compression_name=f"{self.field_name_embedding}-compression", + truncation_dimension=1024, # should this be a parameter? maybe not yet? + rescoring_options=RescoringOptions( + enable_rescoring=True, + default_oversampling=10, + rescore_storage_method=VectorSearchCompressionRescoreStorageMethod.PRESERVE_ORIGINALS, + ), + # Explicitly set deprecated parameters to None + rerank_with_original_vectors=None, + default_oversampling=None, + ) + text_vector_search_profile = VectorSearchProfile( + name=f"{self.field_name_embedding}-profile", + algorithm_configuration_name=text_vector_algorithm.name, + compression_name=text_vector_compression.compression_name, + **({"vectorizer_name": text_vectorizer.vectorizer_name if text_vectorizer else None}), + ) + + embedding_field = SearchField( + name=self.field_name_embedding, + type=SearchFieldDataType.Collection(SearchFieldDataType.Single), + hidden=True, + searchable=True, + filterable=False, + sortable=False, + facetable=False, + vector_search_dimensions=self.embedding_dimensions, + vector_search_profile_name=f"{self.field_name_embedding}-profile", + stored=False, + ) + + if self.search_images: + image_vector_algorithm = HnswAlgorithmConfiguration( + name="image_hnsw_config", + parameters=HnswParameters(metric="cosine"), + ) + image_vector_search_profile = VectorSearchProfile( + name="imageEmbedding-profile", + algorithm_configuration_name=image_vector_algorithm.name, + ) + image_embedding_field = SearchField( + name="imageEmbedding", + type=SearchFieldDataType.Collection(SearchFieldDataType.Single), + hidden=False, + searchable=True, + filterable=False, + sortable=False, + facetable=False, + vector_search_dimensions=1024, + vector_search_profile_name=image_vector_search_profile.name, + ) + if self.search_info.index_name not in [name async for name in search_index_client.list_index_names()]: logger.info("Creating new search index %s", self.search_info.index_name) fields = [ @@ -92,17 +184,6 @@ async def create_index(self, vectorizers: Optional[list[VectorSearchVectorizer]] type="Edm.String", analyzer_name=self.search_analyzer_name, ), - SearchField( - name="embedding", - type=SearchFieldDataType.Collection(SearchFieldDataType.Single), - hidden=False, - searchable=True, - filterable=False, - sortable=False, - facetable=False, - vector_search_dimensions=self.embedding_dimensions, - vector_search_profile_name="embedding_config", - ), SimpleField(name="category", type="Edm.String", filterable=True, facetable=True), SimpleField( name="sourcepage", @@ -138,47 +219,37 @@ async def create_index(self, vectorizers: Optional[list[VectorSearchVectorizer]] filterable=True, ) ) + if self.use_int_vectorization: - logger.info("Including parent_id field in new index %s", self.search_info.index_name) + logger.info("Including parent_id field for integrated vectorization support in new index") fields.append(SearchableField(name="parent_id", type="Edm.String", filterable=True)) - if self.search_images: - logger.info("Including imageEmbedding field in new index %s", self.search_info.index_name) - fields.append( - SearchField( - name="imageEmbedding", - type=SearchFieldDataType.Collection(SearchFieldDataType.Single), - hidden=False, - searchable=True, - filterable=False, - sortable=False, - facetable=False, - vector_search_dimensions=1024, - vector_search_profile_name="embedding_config", - ), - ) - vectorizers = [] - if self.embeddings and isinstance(self.embeddings, AzureOpenAIEmbeddingService): - logger.info( - "Including vectorizer for search index %s, using Azure OpenAI service %s", - self.search_info.index_name, - self.embeddings.open_ai_service, - ) - vectorizers.append( - AzureOpenAIVectorizer( - vectorizer_name=f"{self.search_info.index_name}-vectorizer", - parameters=AzureOpenAIVectorizerParameters( - resource_url=self.embeddings.open_ai_endpoint, - deployment_name=self.embeddings.open_ai_deployment, - model_name=self.embeddings.open_ai_model_name, - ), - ) - ) - else: - logger.info( - "Not including vectorizer for search index %s, no Azure OpenAI service found", - self.search_info.index_name, - ) + vectorizers: list[VectorSearchVectorizer] = [] + vector_search_profiles = [] + vector_algorithms: list[VectorSearchAlgorithmConfiguration] = [] + vector_compressions: list[VectorSearchCompression] = [] + if embedding_field: + logger.info("Including %s field for text vectors in new index", embedding_field.name) + fields.append(embedding_field) + if text_vectorizer is not None: + vectorizers.append(text_vectorizer) + if ( + text_vector_search_profile is None + or text_vector_algorithm is None + or text_vector_compression is None + ): + raise ValueError("Text vector search profile, algorithm and compression must be set") + vector_search_profiles.append(text_vector_search_profile) + vector_algorithms.append(text_vector_algorithm) + vector_compressions.append(text_vector_compression) + + if image_embedding_field: + logger.info("Including %s field for image vectors in new index", image_embedding_field.name) + fields.append(image_embedding_field) + if image_vector_search_profile is None or image_vector_algorithm is None: + raise ValueError("Image search profile and algorithm must be set") + vector_search_profiles.append(image_vector_search_profile) + vector_algorithms.append(image_vector_algorithm) index = SearchIndex( name=self.search_info.index_name, @@ -194,21 +265,9 @@ async def create_index(self, vectorizers: Optional[list[VectorSearchVectorizer]] ] ), vector_search=VectorSearch( - algorithms=[ - HnswAlgorithmConfiguration( - name="hnsw_config", - parameters=HnswParameters(metric="cosine"), - ) - ], - profiles=[ - VectorSearchProfile( - name="embedding_config", - algorithm_configuration_name="hnsw_config", - vectorizer_name=( - f"{self.search_info.index_name}-vectorizer" if self.use_int_vectorization else None - ), - ), - ], + profiles=vector_search_profiles, + algorithms=vector_algorithms, + compressions=vector_compressions, vectorizers=vectorizers, ), ) @@ -229,28 +288,48 @@ async def create_index(self, vectorizers: Optional[list[VectorSearchVectorizer]] ) await search_index_client.create_or_update_index(existing_index) - if existing_index.vector_search is not None and ( - existing_index.vector_search.vectorizers is None - or len(existing_index.vector_search.vectorizers) == 0 + if embedding_field and not any( + field.name == self.field_name_embedding for field in existing_index.fields ): - if self.embeddings is not None and isinstance(self.embeddings, AzureOpenAIEmbeddingService): - logger.info("Adding vectorizer to search index %s", self.search_info.index_name) - existing_index.vector_search.vectorizers = [ - AzureOpenAIVectorizer( - vectorizer_name=f"{self.search_info.index_name}-vectorizer", - parameters=AzureOpenAIVectorizerParameters( - resource_url=self.embeddings.open_ai_endpoint, - deployment_name=self.embeddings.open_ai_deployment, - model_name=self.embeddings.open_ai_model_name, - ), - ) - ] - await search_index_client.create_or_update_index(existing_index) - else: - logger.info( - "Can't add vectorizer to search index %s since no Azure OpenAI embeddings service is defined", - self.search_info, - ) + logger.info("Adding %s field for text embeddings", self.field_name_embedding) + existing_index.fields.append(embedding_field) + if existing_index.vector_search is None: + raise ValueError("Vector search is not enabled for the existing index") + if text_vectorizer is not None: + if existing_index.vector_search.vectorizers is None: + existing_index.vector_search.vectorizers = [] + existing_index.vector_search.vectorizers.append(text_vectorizer) + if ( + text_vector_search_profile is None + or text_vector_algorithm is None + or text_vector_compression is None + ): + raise ValueError("Text vector search profile, algorithm and compression must be set") + if existing_index.vector_search.profiles is None: + existing_index.vector_search.profiles = [] + existing_index.vector_search.profiles.append(text_vector_search_profile) + if existing_index.vector_search.algorithms is None: + existing_index.vector_search.algorithms = [] + existing_index.vector_search.algorithms.append(text_vector_algorithm) + if existing_index.vector_search.compressions is None: + existing_index.vector_search.compressions = [] + existing_index.vector_search.compressions.append(text_vector_compression) + await search_index_client.create_or_update_index(existing_index) + + if image_embedding_field and not any(field.name == "imageEmbedding" for field in existing_index.fields): + logger.info("Adding %s field for image embeddings", image_embedding_field.name) + existing_index.fields.append(image_embedding_field) + if image_vector_search_profile is None or image_vector_algorithm is None: + raise ValueError("Image vector search profile and algorithm must be set") + if existing_index.vector_search is None: + raise ValueError("Image vector search is not enabled for the existing index") + if existing_index.vector_search.profiles is None: + existing_index.vector_search.profiles = [] + existing_index.vector_search.profiles.append(image_vector_search_profile) + if existing_index.vector_search.algorithms is None: + existing_index.vector_search.algorithms = [] + existing_index.vector_search.algorithms.append(image_vector_algorithm) + await search_index_client.create_or_update_index(existing_index) async def update_content( self, sections: list[Section], image_embeddings: Optional[list[list[float]]] = None, url: Optional[str] = None @@ -285,11 +364,13 @@ async def update_content( for document in documents: document["storageUrl"] = url if self.embeddings: + if self.field_name_embedding is None: + raise ValueError("Embedding field name must be set") embeddings = await self.embeddings.create_embeddings( texts=[section.split_page.text for section in batch] ) for i, document in enumerate(documents): - document["embedding"] = embeddings[i] + document[self.field_name_embedding] = embeddings[i] if image_embeddings: for i, (document, section) in enumerate(zip(documents, batch)): document["imageEmbedding"] = image_embeddings[section.split_page.page_num] diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index 3556cd7213..6948289a96 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -53,7 +53,7 @@ azure-monitor-opentelemetry==1.6.1 # via -r requirements.in azure-monitor-opentelemetry-exporter==1.0.0b32 # via azure-monitor-opentelemetry -azure-search-documents==11.6.0b9 +azure-search-documents==11.6.0b11 # via -r requirements.in azure-storage-blob==12.22.0 # via diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts index c915a19ee5..8cd37f7b7e 100644 --- a/app/frontend/src/api/models.ts +++ b/app/frontend/src/api/models.ts @@ -10,10 +10,10 @@ export const enum GPT4VInput { Texts = "texts" } -export const enum VectorFieldOptions { - Embedding = "embedding", - ImageEmbedding = "imageEmbedding", - Both = "both" +export const enum VectorFields { + Embedding = "textEmbeddingOnly", + ImageEmbedding = "imageEmbeddingOnly", + TextAndImageEmbeddings = "textAndImageEmbeddings" } export type ChatAppRequestOverrides = { @@ -37,7 +37,7 @@ export type ChatAppRequestOverrides = { use_groups_security_filter?: boolean; use_gpt4v?: boolean; gpt4v_input?: GPT4VInput; - vector_fields: VectorFieldOptions[]; + vector_fields: VectorFields; language: string; }; diff --git a/app/frontend/src/components/Settings/Settings.tsx b/app/frontend/src/components/Settings/Settings.tsx index b16beb0246..1b5fc9e410 100644 --- a/app/frontend/src/components/Settings/Settings.tsx +++ b/app/frontend/src/components/Settings/Settings.tsx @@ -4,7 +4,7 @@ import { TextField, ITextFieldProps, Checkbox, ICheckboxProps, Dropdown, IDropdo import { HelpCallout } from "../HelpCallout"; import { GPT4VSettings } from "../GPT4VSettings"; import { VectorSettings } from "../VectorSettings"; -import { RetrievalMode, VectorFieldOptions, GPT4VInput } from "../../api"; +import { RetrievalMode, VectorFields, GPT4VInput } from "../../api"; import styles from "./Settings.module.css"; // Add type for onRenderLabel @@ -26,7 +26,7 @@ export interface SettingsProps { retrievalMode: RetrievalMode; useGPT4V: boolean; gpt4vInput: GPT4VInput; - vectorFieldList: VectorFieldOptions[]; + vectorFields: VectorFields; showSemanticRankerOption: boolean; showQueryRewritingOption: boolean; showReasoningEffortOption: boolean; @@ -63,7 +63,7 @@ export const Settings = ({ retrievalMode, useGPT4V, gpt4vInput, - vectorFieldList, + vectorFields, showSemanticRankerOption, showQueryRewritingOption, showReasoningEffortOption, @@ -323,8 +323,9 @@ export const Settings = ({ {showVectorOption && ( onChange("vectorFieldList", val)} + updateVectorFields={val => onChange("vectorFields", val)} updateRetrievalMode={val => onChange("retrievalMode", val)} /> )} diff --git a/app/frontend/src/components/VectorSettings/VectorSettings.tsx b/app/frontend/src/components/VectorSettings/VectorSettings.tsx index 7463d0cd46..2010eeb50c 100644 --- a/app/frontend/src/components/VectorSettings/VectorSettings.tsx +++ b/app/frontend/src/components/VectorSettings/VectorSettings.tsx @@ -5,34 +5,42 @@ import { useTranslation } from "react-i18next"; import styles from "./VectorSettings.module.css"; import { HelpCallout } from "../../components/HelpCallout"; -import { RetrievalMode, VectorFieldOptions } from "../../api"; +import { RetrievalMode, VectorFields } from "../../api"; interface Props { showImageOptions?: boolean; defaultRetrievalMode: RetrievalMode; + defaultVectorFields?: VectorFields; updateRetrievalMode: (retrievalMode: RetrievalMode) => void; - updateVectorFields: (options: VectorFieldOptions[]) => void; + updateVectorFields: (vectorFields: VectorFields) => void; } -export const VectorSettings = ({ updateRetrievalMode, updateVectorFields, showImageOptions, defaultRetrievalMode }: Props) => { - const [retrievalMode, setRetrievalMode] = useState(RetrievalMode.Hybrid); - const [vectorFieldOption, setVectorFieldOption] = useState(VectorFieldOptions.Both); +export const VectorSettings = ({ updateRetrievalMode, updateVectorFields, showImageOptions, defaultRetrievalMode, defaultVectorFields }: Props) => { + const [retrievalMode, setRetrievalMode] = useState(defaultRetrievalMode || RetrievalMode.Hybrid); + const [vectorFields, setVectorFields] = useState(defaultVectorFields || VectorFields.TextAndImageEmbeddings); const onRetrievalModeChange = (_ev: React.FormEvent, option?: IDropdownOption | undefined) => { setRetrievalMode(option?.data || RetrievalMode.Hybrid); updateRetrievalMode(option?.data || RetrievalMode.Hybrid); }; - const onVectorFieldsChange = (_ev: React.FormEvent, option?: IDropdownOption | undefined) => { - setVectorFieldOption(option?.key as VectorFieldOptions); - updateVectorFields([option?.key as VectorFieldOptions]); + const onVectorFieldsChange = (_ev: React.FormEvent, option?: IDropdownOption | undefined) => { + setVectorFields(option?.data || VectorFields.TextAndImageEmbeddings); + updateVectorFields(option?.data || VectorFields.TextAndImageEmbeddings); }; + // Only run if showImageOptions changes from true to false or false to true useEffect(() => { - showImageOptions - ? updateVectorFields([VectorFieldOptions.Embedding, VectorFieldOptions.ImageEmbedding]) - : updateVectorFields([VectorFieldOptions.Embedding]); - }, [showImageOptions]); + if (!showImageOptions) { + // If images are disabled, we must force to text-only embeddings + setVectorFields(VectorFields.Embedding); + updateVectorFields(VectorFields.Embedding); + } else { + // When image options become available, reset to default or use TextAndImageEmbeddings + setVectorFields(defaultVectorFields || VectorFields.TextAndImageEmbeddings); + updateVectorFields(defaultVectorFields || VectorFields.TextAndImageEmbeddings); + } + }, [showImageOptions, updateVectorFields, defaultVectorFields]); const retrievalModeId = useId("retrievalMode"); const retrievalModeFieldId = useId("retrievalModeField"); @@ -45,7 +53,7 @@ export const VectorSettings = ({ updateRetrievalMode, updateVectorFields, showIm (""); const [excludeCategory, setExcludeCategory] = useState(""); const [question, setQuestion] = useState(""); - const [vectorFieldList, setVectorFieldList] = useState([VectorFieldOptions.Embedding, VectorFieldOptions.ImageEmbedding]); + const [vectorFields, setVectorFields] = useState(VectorFields.TextAndImageEmbeddings); const [useOidSecurityFilter, setUseOidSecurityFilter] = useState(false); const [useGroupsSecurityFilter, setUseGroupsSecurityFilter] = useState(false); const [showGPT4VOptions, setShowGPT4VOptions] = useState(false); @@ -140,7 +140,7 @@ export function Component(): JSX.Element { reasoning_effort: reasoningEffort, use_oid_security_filter: useOidSecurityFilter, use_groups_security_filter: useGroupsSecurityFilter, - vector_fields: vectorFieldList, + vector_fields: vectorFields, use_gpt4v: useGPT4V, gpt4v_input: gpt4vInput, language: i18n.language, @@ -216,8 +216,8 @@ export function Component(): JSX.Element { case "gpt4vInput": setGPT4VInput(value); break; - case "vectorFieldList": - setVectorFieldList(value); + case "vectorFields": + setVectorFields(value); break; case "retrievalMode": setRetrievalMode(value); @@ -346,7 +346,7 @@ export function Component(): JSX.Element { retrievalMode={retrievalMode} useGPT4V={useGPT4V} gpt4vInput={gpt4vInput} - vectorFieldList={vectorFieldList} + vectorFields={vectorFields} showSemanticRankerOption={showSemanticRankerOption} showQueryRewritingOption={showQueryRewritingOption} showReasoningEffortOption={showReasoningEffortOption} diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 5d00c2c914..d379b2a693 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -15,7 +15,7 @@ import { ChatAppResponseOrError, ChatAppRequest, ResponseMessage, - VectorFieldOptions, + VectorFields, GPT4VInput, SpeechConfig } from "../../api"; @@ -56,7 +56,7 @@ const Chat = () => { const [includeCategory, setIncludeCategory] = useState(""); const [excludeCategory, setExcludeCategory] = useState(""); const [useSuggestFollowupQuestions, setUseSuggestFollowupQuestions] = useState(false); - const [vectorFieldList, setVectorFieldList] = useState([VectorFieldOptions.Embedding]); + const [vectorFields, setVectorFields] = useState(VectorFields.TextAndImageEmbeddings); const [useOidSecurityFilter, setUseOidSecurityFilter] = useState(false); const [useGroupsSecurityFilter, setUseGroupsSecurityFilter] = useState(false); const [gpt4vInput, setGPT4VInput] = useState(GPT4VInput.TextAndImages); @@ -103,6 +103,9 @@ const Chat = () => { const getConfig = async () => { configApi().then(config => { setShowGPT4VOptions(config.showGPT4VOptions); + if (config.showGPT4VOptions) { + setUseGPT4V(true); + } setUseSemanticRanker(config.showSemanticRankerOption); setShowSemanticRankerOption(config.showSemanticRankerOption); setUseQueryRewriting(config.showQueryRewritingOption); @@ -217,7 +220,7 @@ const Chat = () => { suggest_followup_questions: useSuggestFollowupQuestions, use_oid_security_filter: useOidSecurityFilter, use_groups_security_filter: useGroupsSecurityFilter, - vector_fields: vectorFieldList, + vector_fields: vectorFields, use_gpt4v: useGPT4V, gpt4v_input: gpt4vInput, language: i18n.language, @@ -335,8 +338,8 @@ const Chat = () => { case "gpt4vInput": setGPT4VInput(value); break; - case "vectorFieldList": - setVectorFieldList(value); + case "vectorFields": + setVectorFields(value); break; case "retrievalMode": setRetrievalMode(value); @@ -530,7 +533,7 @@ const Chat = () => { retrievalMode={retrievalMode} useGPT4V={useGPT4V} gpt4vInput={gpt4vInput} - vectorFieldList={vectorFieldList} + vectorFields={vectorFields} showSemanticRankerOption={showSemanticRankerOption} showQueryRewritingOption={showQueryRewritingOption} showReasoningEffortOption={showReasoningEffortOption} diff --git a/azure.yaml b/azure.yaml index 0793545f3a..f77bfb5828 100644 --- a/azure.yaml +++ b/azure.yaml @@ -57,6 +57,7 @@ pipeline: - AZURE_SEARCH_QUERY_SPELLER - AZURE_SEARCH_SEMANTIC_RANKER - AZURE_SEARCH_QUERY_REWRITING + - AZURE_SEARCH_FIELD_NAME_EMBEDDING - AZURE_STORAGE_ACCOUNT - AZURE_STORAGE_RESOURCE_GROUP - AZURE_STORAGE_SKU diff --git a/docs/deploy_existing.md b/docs/deploy_existing.md index 62a0321ca6..667bf767d4 100644 --- a/docs/deploy_existing.md +++ b/docs/deploy_existing.md @@ -31,8 +31,8 @@ You should set these values before running `azd up`. Once you've set them, retur 1. Run `azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION {Version string for existing chat deployment}`. Only needed if your chat deployment model version is not the default '2024-07-18'. You definitely need to change this if you changed the model. 1. Run `azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU {Name of SKU for existing chat deployment}`. Only needed if your chat deployment SKU is not the default 'Standard', like if it is 'GlobalStandard' instead. 1. Run `azd env set AZURE_OPENAI_EMB_DEPLOYMENT {Name of existing embedding deployment}`. Only needed if your embeddings deployment is not the default 'embedding'. -1. Run `azd env set AZURE_OPENAI_EMB_MODEL_NAME {Model name of existing embedding deployment}`. Only needed if your embeddings model is not the default 'text-embedding-ada-002'. -1. Run `azd env set AZURE_OPENAI_EMB_DIMENSIONS {Dimensions for existing embedding deployment}`. Only needed if your embeddings model is not the default 'text-embedding-ada-002'. +1. Run `azd env set AZURE_OPENAI_EMB_MODEL_NAME {Model name of existing embedding deployment}`. Only needed if your embeddings model is not the default 'text-embedding-3-large'. +1. Run `azd env set AZURE_OPENAI_EMB_DIMENSIONS {Dimensions for existing embedding deployment}`. Only needed if your embeddings model is not the default 'text-embedding-3-large'. 1. Run `azd env set AZURE_OPENAI_EMB_DEPLOYMENT_VERSION {Version string for existing embedding deployment}`. If your embeddings deployment is one of the 'text-embedding-3' models, set this to the number 1. 1. This project does *not* use keys when authenticating to Azure OpenAI. However, if your Azure OpenAI service must have key access enabled for some reason (like for use by other projects), then run `azd env set AZURE_OPENAI_DISABLE_KEYS false`. The default value is `true` so you should only run the command if you need key access. diff --git a/docs/deploy_features.md b/docs/deploy_features.md index 5da19ddc37..c402868932 100644 --- a/docs/deploy_features.md +++ b/docs/deploy_features.md @@ -5,8 +5,8 @@ You should typically enable these features before running `azd up`. Once you've * [Using different chat completion models](#using-different-chat-completion-models) * [Using reasoning models](#using-reasoning-models) -* [Using text-embedding-3 models](#using-text-embedding-3-models) -* [Enabling GPT-4 Turbo with Vision](#enabling-gpt-4-turbo-with-vision) +* [Using different embedding models](#using-different-embedding-models) +* [Enabling GPT vision feature](#enabling-gpt-vision-feature) * [Enabling media description with Azure Content Understanding](#enabling-media-description-with-azure-content-understanding) * [Enabling client-side chat history](#enabling-client-side-chat-history) * [Enabling persistent chat history with Azure Cosmos DB](#enabling-persistent-chat-history-with-azure-cosmos-db) @@ -128,12 +128,16 @@ This process does *not* delete your previous model deployment. If you want to de This feature allows you to use reasoning models to generate responses based on retrieved content. These models spend more time processing and understanding the user's request. To enable reasoning models, follow the steps in [the reasoning models guide](./reasoning.md). -## Using text-embedding-3 models +## Using different embedding models -By default, the deployed Azure web app uses the `text-embedding-ada-002` embedding model. If you want to use one of the text-embedding-3 models, you can do so by following these steps: +By default, the deployed Azure web app uses the `text-embedding-3-large` embedding model. If you want to use a different embedding model, you can do so by following these steps: 1. Run one of the following commands to set the desired model: + ```shell + azd env set AZURE_OPENAI_EMB_MODEL_NAME text-embedding-ada-002 + ``` + ```shell azd env set AZURE_OPENAI_EMB_MODEL_NAME text-embedding-3-small ``` @@ -144,32 +148,64 @@ By default, the deployed Azure web app uses the `text-embedding-ada-002` embeddi 2. Specify the desired dimensions of the model: (from 256-3072, model dependent) + Default dimensions for text-embedding-ada-002 + + ```shell + azd env set AZURE_OPENAI_EMB_DIMENSIONS 1536 + ``` + + Default dimensions for text-embedding-3-small + ```shell - azd env set AZURE_OPENAI_EMB_DIMENSIONS 256 + azd env set AZURE_OPENAI_EMB_DIMENSIONS 1536 ``` -3. Set the model version to "1" (the only version as of March 2024): + Default dimensions for text-embedding-3-large + + ```shell + azd env set AZURE_OPENAI_EMB_DIMENSIONS 3072 + ``` + +3. Set the model version, depending on the model you are using: + + For text-embedding-ada-002: + + ```shell + azd env set AZURE_OPENAI_EMB_DEPLOYMENT_VERSION 2 + ``` + + For text-embedding-3-small and text-embedding-3-large: ```shell azd env set AZURE_OPENAI_EMB_DEPLOYMENT_VERSION 1 ``` -4. When prompted during `azd up`, make sure to select a region for the OpenAI resource group location that supports the text-embedding-3 models. There are [limited regions available](https://learn.microsoft.com/azure/ai-services/openai/concepts/models#embeddings-models). +4. To set the embedding model deployment SKU name, run this command with [the desired SKU name](https://learn.microsoft.com/azure/ai-services/openai/how-to/deployment-types#deployment-types). -If you have already deployed: + For GlobalStandard: + + ```bash + azd env set AZURE_OPENAI_EMB_DEPLOYMENT_SKU GlobalStandard + ``` -* You'll need to change the deployment name by running `azd env set AZURE_OPENAI_EMB_DEPLOYMENT ` -* You'll need to create a new index, and re-index all of the data using the new model. You can either delete the current index in the Azure Portal, or create an index with a different name by running `azd env set AZURE_SEARCH_INDEX new-index-name`. When you next run `azd up`, the new index will be created and the data will be re-indexed. -* If your OpenAI resource is not in one of the supported regions, you should delete `openAiResourceGroupLocation` from `.azure/YOUR-ENV-NAME/config.json`. When running `azd up`, you will be prompted to select a new region. + For Standard: + + ```bash + azd env set AZURE_OPENAI_EMB_DEPLOYMENT_SKU Standard + ``` + +5. When prompted during `azd up`, make sure to select a region for the OpenAI resource group location that supports the desired embedding model and deployment SKU. There are [limited regions available](https://learn.microsoft.com/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#models-by-deployment-type). + +If you have already deployed: -> ![NOTE] -> The text-embedding-3 models are not currently supported by the integrated vectorization feature. +* You'll need to change the deployment name by running the appropriate commands for the model above. +* You'll need to create a new index, and re-index all of the data using the new model. You can either delete the current index in the Azure Portal, or create an index with a different name by running `azd env set AZURE_SEARCH_INDEX new-index-name`. When you next run `azd up`, the new index will be created. See the [data ingestion guide](./data_ingestion.md) for more details. -## Enabling GPT-4 Turbo with Vision +## Enabling GPT vision feature ⚠️ This feature is not currently compatible with [integrated vectorization](#enabling-integrated-vectorization). -This section covers the integration of GPT-4 Vision with Azure AI Search. Learn how to enhance your search capabilities with the power of image and text indexing, enabling advanced search functionalities over diverse document types. For a detailed guide on setup and usage, visit our [Enabling GPT-4 Turbo with Vision](gpt4v.md) page. +This section covers the integration of GPT vision models with Azure AI Search. Learn how to enhance your search capabilities with the power of image and text indexing, enabling advanced search functionalities over diverse document types. For a detailed guide on setup and usage, visit our page on [Using GPT vision model with RAG approach](gpt4v.md). ## Enabling media description with Azure Content Understanding diff --git a/docs/deploy_troubleshooting.md b/docs/deploy_troubleshooting.md index 658c200c79..782f48f3b3 100644 --- a/docs/deploy_troubleshooting.md +++ b/docs/deploy_troubleshooting.md @@ -8,6 +8,6 @@ If you are experiencing an error when deploying the RAG chat solution using the 1. You're getting "same resource name not allowed" conflicts. That's likely because you've run the sample multiple times and deleted the resources you've been creating each time, but are forgetting to purge them. Azure keeps resources for 48 hours unless you purge from soft delete. See [this article on purging resources](https://learn.microsoft.com/azure/cognitive-services/manage-resources?tabs=azure-portal#purge-a-deleted-resource). -1. You see `CERTIFICATE_VERIFY_FAILED` when the `prepdocs.py` script runs. That's typically due to incorrect SSL certificates setup on your machine. Try the suggestions in this [StackOverflow answer](https://stackoverflow.com/questions/35569042/ssl-certificate-verify-failed-with-python3/43855394#43855394). +1. You see `CERTIFICATE_VERIFY_FAILED` when the `prepdocs.py` script runs. That's typically due to incorrect SSL certificates setup on your machine. Try the suggestions in this [StackOverflow answer](https://stackoverflow.com/a/43855394). 1. After running `azd up` and visiting the website, you see a '404 Not Found' in the browser. Wait 10 minutes and try again, as it might be still starting up. Then try running `azd deploy` and wait again. If you still encounter errors with the deployed app and are deploying to App Service, consult the [guide on debugging App Service deployments](/docs/appservice.md). Please file an issue if the logs don't help you resolve the error. diff --git a/docs/gpt4v.md b/docs/gpt4v.md index 8825bc2742..7bb4890a17 100644 --- a/docs/gpt4v.md +++ b/docs/gpt4v.md @@ -21,10 +21,10 @@ For more details on how this feature works, read [this blog post](https://techco * Create a [AI Vision account in Azure Portal first](https://ms.portal.azure.com/#create/Microsoft.CognitiveServicesComputerVision), so that you can agree to the Responsible AI terms for that resource. You can delete that account after agreeing. * The ability to deploy a gpt-4o model in the [supported regions](https://learn.microsoft.com/azure/ai-services/openai/concepts/models#standard-deployment-model-availability). If you're not sure, try to create a gpt-4o deployment from your Azure OpenAI deployments page. -* Ensure that you can deploy the Azure OpenAI resource group in [a region where all required components are available](https://learn.microsoft.com/azure/cognitive-services/openai/concepts/models#model-summary-table-and-region-availability): +* Ensure that you can deploy the Azure OpenAI resource group in [a region and deployment SKU where all required components are available](https://learn.microsoft.com/azure/cognitive-services/openai/concepts/models#model-summary-table-and-region-availability): * Azure OpenAI models * gpt-4o-mini - * text-embedding-ada-002 + * text-embedding-3-large * gpt-4o (for vision/evaluation features) * [Azure AI Vision](https://learn.microsoft.com/azure/ai-services/computer-vision/) diff --git a/infra/main.bicep b/infra/main.bicep index ca165b27f1..b63f0a10a4 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -27,6 +27,7 @@ param searchIndexName string // Set in main.parameters.json param searchQueryLanguage string // Set in main.parameters.json param searchQuerySpeller string // Set in main.parameters.json param searchServiceSemanticRankerLevel string // Set in main.parameters.json +param searchFieldNameEmbedding string // Set in main.parameters.json var actualSearchServiceSemanticRankerLevel = (searchServiceSkuName == 'free') ? 'disabled' : searchServiceSemanticRankerLevel @@ -76,14 +77,30 @@ param chatHistoryDatabaseName string = 'chat-database' param chatHistoryContainerName string = 'chat-history-v2' param chatHistoryVersion string = 'cosmosdb-v2' -// https://learn.microsoft.com/azure/ai-services/openai/concepts/models?tabs=standard%2Cstandard-chat-completions#models-by-deployment-type +// https://learn.microsoft.com/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#models-by-deployment-type @description('Location for the OpenAI resource group') @allowed([ + 'australiaeast' + 'brazilsouth' + 'canadaeast' 'eastus' 'eastus2' + 'francecentral' + 'germanywestcentral' + 'japaneast' + 'koreacentral' 'northcentralus' + 'norwayeast' + 'polandcentral' + 'southafricanorth' 'southcentralus' + 'southindia' + 'spaincentral' 'swedencentral' + 'switzerlandnorth' + 'uaenorth' + 'uksouth' + 'westeurope' 'westus' 'westus3' ]) @@ -134,7 +151,7 @@ var chatGpt = { modelName: !empty(chatGptModelName) ? chatGptModelName : 'gpt-4o-mini' deploymentName: !empty(chatGptDeploymentName) ? chatGptDeploymentName : 'gpt-4o-mini' deploymentVersion: !empty(chatGptDeploymentVersion) ? chatGptDeploymentVersion : '2024-07-18' - deploymentSkuName: !empty(chatGptDeploymentSkuName) ? chatGptDeploymentSkuName : 'Standard' + deploymentSkuName: !empty(chatGptDeploymentSkuName) ? chatGptDeploymentSkuName : 'GlobalStandard' // Not backward-compatible deploymentCapacity: chatGptDeploymentCapacity != 0 ? chatGptDeploymentCapacity : 30 } @@ -145,12 +162,12 @@ param embeddingDeploymentSkuName string = '' param embeddingDeploymentCapacity int = 0 param embeddingDimensions int = 0 var embedding = { - modelName: !empty(embeddingModelName) ? embeddingModelName : 'text-embedding-ada-002' - deploymentName: !empty(embeddingDeploymentName) ? embeddingDeploymentName : 'embedding' - deploymentVersion: !empty(embeddingDeploymentVersion) ? embeddingDeploymentVersion : '2' - deploymentSkuName: !empty(embeddingDeploymentSkuName) ? embeddingDeploymentSkuName : 'Standard' + modelName: !empty(embeddingModelName) ? embeddingModelName : 'text-embedding-3-large' + deploymentName: !empty(embeddingDeploymentName) ? embeddingDeploymentName : 'text-embedding-3-large' + deploymentVersion: !empty(embeddingDeploymentVersion) ? embeddingDeploymentVersion : (embeddingModelName == 'text-embedding-ada-002' ? '2' : '1') + deploymentSkuName: !empty(embeddingDeploymentSkuName) ? embeddingDeploymentSkuName : (embeddingModelName == 'text-embedding-ada-002' ? 'Standard' : 'GlobalStandard') deploymentCapacity: embeddingDeploymentCapacity != 0 ? embeddingDeploymentCapacity : 30 - dimensions: embeddingDimensions != 0 ? embeddingDimensions : 1536 + dimensions: embeddingDimensions != 0 ? embeddingDimensions : 3072 } param gpt4vModelName string = '' @@ -162,7 +179,7 @@ var gpt4v = { modelName: !empty(gpt4vModelName) ? gpt4vModelName : 'gpt-4o' deploymentName: !empty(gpt4vDeploymentName) ? gpt4vDeploymentName : 'gpt-4o' deploymentVersion: !empty(gpt4vModelVersion) ? gpt4vModelVersion : '2024-08-06' - deploymentSkuName: !empty(gpt4vDeploymentSkuName) ? gpt4vDeploymentSkuName : 'Standard' + deploymentSkuName: !empty(gpt4vDeploymentSkuName) ? gpt4vDeploymentSkuName : 'GlobalStandard' // Not-backward compatible deploymentCapacity: gpt4vDeploymentCapacity != 0 ? gpt4vDeploymentCapacity : 10 } @@ -175,7 +192,7 @@ var eval = { modelName: !empty(evalModelName) ? evalModelName : 'gpt-4o' deploymentName: !empty(evalDeploymentName) ? evalDeploymentName : 'gpt-4o' deploymentVersion: !empty(evalModelVersion) ? evalModelVersion : '2024-08-06' - deploymentSkuName: !empty(evalDeploymentSkuName) ? evalDeploymentSkuName : 'Standard' + deploymentSkuName: !empty(evalDeploymentSkuName) ? evalDeploymentSkuName : 'GlobalStandard' // Not backward-compatible deploymentCapacity: evalDeploymentCapacity != 0 ? evalDeploymentCapacity : 30 } @@ -376,6 +393,7 @@ var appEnvVariables = { AZURE_VISION_ENDPOINT: useGPT4V ? computerVision.outputs.endpoint : '' AZURE_SEARCH_QUERY_LANGUAGE: searchQueryLanguage AZURE_SEARCH_QUERY_SPELLER: searchQuerySpeller + AZURE_SEARCH_FIELD_NAME_EMBEDDING: searchFieldNameEmbedding APPLICATIONINSIGHTS_CONNECTION_STRING: useApplicationInsights ? monitoring.outputs.applicationInsightsConnectionString : '' @@ -1236,6 +1254,7 @@ output AZURE_RESOURCE_GROUP string = resourceGroup.name // Shared by all OpenAI deployments output OPENAI_HOST string = openAiHost output AZURE_OPENAI_EMB_MODEL_NAME string = embedding.modelName +output AZURE_OPENAI_EMB_DIMENSIONS int = embedding.dimensions output AZURE_OPENAI_CHATGPT_MODEL string = chatGpt.modelName output AZURE_OPENAI_GPT4V_MODEL string = gpt4v.modelName @@ -1244,9 +1263,17 @@ output AZURE_OPENAI_SERVICE string = isAzureOpenAiHost && deployAzureOpenAi ? op output AZURE_OPENAI_API_VERSION string = isAzureOpenAiHost ? azureOpenAiApiVersion : '' output AZURE_OPENAI_RESOURCE_GROUP string = isAzureOpenAiHost ? openAiResourceGroup.name : '' output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = isAzureOpenAiHost ? chatGpt.deploymentName : '' +output AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION string = isAzureOpenAiHost ? chatGpt.deploymentVersion : '' +output AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU string = isAzureOpenAiHost ? chatGpt.deploymentSkuName : '' output AZURE_OPENAI_EMB_DEPLOYMENT string = isAzureOpenAiHost ? embedding.deploymentName : '' +output AZURE_OPENAI_EMB_DEPLOYMENT_VERSION string = isAzureOpenAiHost ? embedding.deploymentVersion : '' +output AZURE_OPENAI_EMB_DEPLOYMENT_SKU string = isAzureOpenAiHost ? embedding.deploymentSkuName : '' output AZURE_OPENAI_GPT4V_DEPLOYMENT string = isAzureOpenAiHost && useGPT4V ? gpt4v.deploymentName : '' +output AZURE_OPENAI_GPT4V_DEPLOYMENT_VERSION string = isAzureOpenAiHost && useGPT4V ? gpt4v.deploymentVersion : '' +output AZURE_OPENAI_GPT4V_DEPLOYMENT_SKU string = isAzureOpenAiHost && useGPT4V ? gpt4v.deploymentSkuName : '' output AZURE_OPENAI_EVAL_DEPLOYMENT string = isAzureOpenAiHost && useEval ? eval.deploymentName : '' +output AZURE_OPENAI_EVAL_DEPLOYMENT_VERSION string = isAzureOpenAiHost && useEval ? eval.deploymentVersion : '' +output AZURE_OPENAI_EVAL_DEPLOYMENT_SKU string = isAzureOpenAiHost && useEval ? eval.deploymentSkuName : '' output AZURE_OPENAI_EVAL_MODEL string = isAzureOpenAiHost && useEval ? eval.modelName : '' output AZURE_OPENAI_REASONING_EFFORT string = defaultReasoningEffort output AZURE_SPEECH_SERVICE_ID string = useSpeechOutputAzure ? speech.outputs.resourceId : '' @@ -1263,6 +1290,7 @@ output AZURE_SEARCH_SERVICE string = searchService.outputs.name output AZURE_SEARCH_SERVICE_RESOURCE_GROUP string = searchServiceResourceGroup.name output AZURE_SEARCH_SEMANTIC_RANKER string = actualSearchServiceSemanticRankerLevel output AZURE_SEARCH_SERVICE_ASSIGNED_USERID string = searchService.outputs.principalId +output AZURE_SEARCH_FIELD_NAME_EMBEDDING string = searchFieldNameEmbedding output AZURE_COSMOSDB_ACCOUNT string = (useAuthentication && useChatHistoryCosmos) ? cosmosDb.outputs.name : '' output AZURE_CHAT_HISTORY_DATABASE string = chatHistoryDatabaseName diff --git a/infra/main.parameters.json b/infra/main.parameters.json index b75b9c4210..84303f198f 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -83,6 +83,9 @@ "searchServiceQueryRewriting": { "value": "${AZURE_SEARCH_QUERY_REWRITING=false}" }, + "searchFieldNameEmbedding": { + "value": "${AZURE_SEARCH_FIELD_NAME_EMBEDDING=embedding3}" + }, "defaultReasoningEffort": { "value": "${AZURE_OPENAI_REASONING_EFFORT=medium}" }, diff --git a/locustfile.py b/locustfile.py index 2bcc443759..b41b9bd372 100644 --- a/locustfile.py +++ b/locustfile.py @@ -102,7 +102,7 @@ def ask_question(self): "suggest_followup_questions": False, "use_oid_security_filter": False, "use_groups_security_filter": False, - "vector_fields": ["embedding", "imageEmbedding"], + "vector_fields": "textAndImageEmbeddings", "use_gpt4v": True, "gpt4v_input": "textAndImages", } @@ -129,7 +129,7 @@ def ask_question(self): "suggest_followup_questions": False, "use_oid_security_filter": False, "use_groups_security_filter": False, - "vector_fields": ["embedding", "imageEmbedding"], + "vector_fields": "textAndImageEmbeddings", "use_gpt4v": True, "gpt4v_input": "textAndImages", } diff --git a/tests/conftest.py b/tests/conftest.py index b458b5d453..4516c564ff 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,7 +95,7 @@ async def mock_acreate(*args, **kwargs): object="embedding", ) ], - model="text-embedding-ada-002", + model="text-embedding-3-large", usage=Usage(prompt_tokens=8, total_tokens=8), ) @@ -271,12 +271,16 @@ def mock_blob_container_client(monkeypatch): "OPENAI_HOST": "openai", "OPENAI_API_KEY": "secretkey", "OPENAI_ORGANIZATION": "organization", + "AZURE_OPENAI_EMB_MODEL_NAME": "text-embedding-3-large", + "AZURE_OPENAI_EMB_DIMENSIONS": "3072", }, { "OPENAI_HOST": "azure", "AZURE_OPENAI_SERVICE": "test-openai-service", "AZURE_OPENAI_CHATGPT_DEPLOYMENT": "test-chatgpt", "AZURE_OPENAI_EMB_DEPLOYMENT": "test-ada", + "AZURE_OPENAI_EMB_MODEL_NAME": "text-embedding-3-large", + "AZURE_OPENAI_EMB_DIMENSIONS": "3072", "USE_GPT4V": "true", "AZURE_OPENAI_GPT4V_MODEL": "gpt-4", "VISION_ENDPOINT": "https://testvision.cognitiveservices.azure.com/", @@ -289,6 +293,8 @@ def mock_blob_container_client(monkeypatch): "AZURE_OPENAI_SERVICE": "test-openai-service", "AZURE_OPENAI_CHATGPT_DEPLOYMENT": "test-chatgpt", "AZURE_OPENAI_EMB_DEPLOYMENT": "test-ada", + "AZURE_OPENAI_EMB_MODEL_NAME": "text-embedding-3-large", + "AZURE_OPENAI_EMB_DIMENSIONS": "3072", "AZURE_USE_AUTHENTICATION": "true", "AZURE_USER_STORAGE_ACCOUNT": "test-user-storage-account", "AZURE_USER_STORAGE_CONTAINER": "test-user-storage-container", @@ -305,6 +311,8 @@ def mock_blob_container_client(monkeypatch): "AZURE_OPENAI_SERVICE": "test-openai-service", "AZURE_OPENAI_CHATGPT_DEPLOYMENT": "test-chatgpt", "AZURE_OPENAI_EMB_DEPLOYMENT": "test-ada", + "AZURE_OPENAI_EMB_MODEL_NAME": "text-embedding-3-large", + "AZURE_OPENAI_EMB_DIMENSIONS": "3072", "AZURE_USE_AUTHENTICATION": "true", "AZURE_ENABLE_GLOBAL_DOCUMENT_ACCESS": "true", "AZURE_ENABLE_UNAUTHENTICATED_ACCESS": "true", diff --git a/tests/e2e.py b/tests/e2e.py index 117c298f64..00e30c4375 100644 --- a/tests/e2e.py +++ b/tests/e2e.py @@ -57,6 +57,8 @@ def run_server(port: int): "AZURE_SPEECH_SERVICE_LOCATION": "eastus", "AZURE_OPENAI_SERVICE": "test-openai-service", "AZURE_OPENAI_CHATGPT_MODEL": "gpt-4o-mini", + "AZURE_OPENAI_EMB_MODEL_NAME": "text-embedding-3-large", + "AZURE_OPENAI_EMB_DIMENSIONS": "3072", }, clear=True, ): @@ -216,7 +218,7 @@ def handle_chat(route: Route): overrides = route.request.post_data_json["context"]["overrides"] assert overrides["gpt4v_input"] == "images" assert overrides["use_gpt4v"] is True - assert overrides["vector_fields"] == ["imageEmbedding"] + assert overrides["vector_fields"] == "imageEmbeddingOnly" # Read the JSON from our snapshot results and return as the response f = open("tests/snapshots/test_app/test_chat_text/client0/result.json") @@ -247,7 +249,9 @@ def handle_config(route: Route): # Customize the GPT-4-vision settings page.get_by_role("button", name="Developer settings").click() - page.get_by_text("Use GPT vision model").click() + # Check that "Use GPT vision model" is visible and selected + expect(page.get_by_text("Use GPT vision model")).to_be_visible() + expect(page.get_by_role("checkbox", name="Use GPT vision model")).to_be_checked() page.get_by_text("Images and text").click() page.get_by_role("option", name="Images", exact=True).click() page.get_by_text("Text and Image embeddings").click() diff --git a/tests/snapshots/test_app/test_ask_prompt_template/client0/result.json b/tests/snapshots/test_app/test_ask_prompt_template/client0/result.json index 29bf5c4b32..3f41dc08d3 100644 --- a/tests/snapshots/test_app/test_ask_prompt_template/client0/result.json +++ b/tests/snapshots/test_app/test_ask_prompt_template/client0/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_prompt_template/client1/result.json b/tests/snapshots/test_app/test_ask_prompt_template/client1/result.json index cdb33e9ff1..aa7163fe2a 100644 --- a/tests/snapshots/test_app/test_ask_prompt_template/client1/result.json +++ b/tests/snapshots/test_app/test_ask_prompt_template/client1/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_prompt_template_concat/client0/result.json b/tests/snapshots/test_app/test_ask_prompt_template_concat/client0/result.json index 9dac59bde7..a895c0f723 100644 --- a/tests/snapshots/test_app/test_ask_prompt_template_concat/client0/result.json +++ b/tests/snapshots/test_app/test_ask_prompt_template_concat/client0/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_prompt_template_concat/client1/result.json b/tests/snapshots/test_app/test_ask_prompt_template_concat/client1/result.json index 9976288c77..a4b1ea96bd 100644 --- a/tests/snapshots/test_app/test_ask_prompt_template_concat/client1/result.json +++ b/tests/snapshots/test_app/test_ask_prompt_template_concat/client1/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_rtr_hybrid/client0/result.json b/tests/snapshots/test_app/test_ask_rtr_hybrid/client0/result.json index 30ddcddf68..d31d6dbde9 100644 --- a/tests/snapshots/test_app/test_ask_rtr_hybrid/client0/result.json +++ b/tests/snapshots/test_app/test_ask_rtr_hybrid/client0/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_rtr_hybrid/client1/result.json b/tests/snapshots/test_app/test_ask_rtr_hybrid/client1/result.json index dbf67f961a..14462a67bd 100644 --- a/tests/snapshots/test_app/test_ask_rtr_hybrid/client1/result.json +++ b/tests/snapshots/test_app/test_ask_rtr_hybrid/client1/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_rtr_text/client0/result.json b/tests/snapshots/test_app/test_ask_rtr_text/client0/result.json index fd52926eca..672910c02f 100644 --- a/tests/snapshots/test_app/test_ask_rtr_text/client0/result.json +++ b/tests/snapshots/test_app/test_ask_rtr_text/client0/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_rtr_text/client1/result.json b/tests/snapshots/test_app/test_ask_rtr_text/client1/result.json index 751b64760d..f3168c117a 100644 --- a/tests/snapshots/test_app/test_ask_rtr_text/client1/result.json +++ b/tests/snapshots/test_app/test_ask_rtr_text/client1/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_rtr_text_filter/auth_client0/result.json b/tests/snapshots/test_app/test_ask_rtr_text_filter/auth_client0/result.json index c4617a9651..3226e2c741 100644 --- a/tests/snapshots/test_app/test_ask_rtr_text_filter/auth_client0/result.json +++ b/tests/snapshots/test_app/test_ask_rtr_text_filter/auth_client0/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_rtr_text_filter_public_documents/auth_public_documents_client0/result.json b/tests/snapshots/test_app/test_ask_rtr_text_filter_public_documents/auth_public_documents_client0/result.json index cb68ea0377..9bf2a82bc9 100644 --- a/tests/snapshots/test_app/test_ask_rtr_text_filter_public_documents/auth_public_documents_client0/result.json +++ b/tests/snapshots/test_app/test_ask_rtr_text_filter_public_documents/auth_public_documents_client0/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client0/result.json b/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client0/result.json index ece30dbcc8..3a9c7d6936 100644 --- a/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client0/result.json +++ b/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client0/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client1/result.json b/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client1/result.json index 950a125e97..2db882b50a 100644 --- a/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client1/result.json +++ b/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client1/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client0/result.json b/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client0/result.json index 4b76175c16..3e76befe60 100644 --- a/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client0/result.json +++ b/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client0/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client1/result.json b/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client1/result.json index 0afea366fa..371e79bf5e 100644 --- a/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client1/result.json +++ b/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client1/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_vision/client0/result.json b/tests/snapshots/test_app/test_ask_vision/client0/result.json index 7d11ee12eb..e9d4bfdca2 100644 --- a/tests/snapshots/test_app/test_ask_vision/client0/result.json +++ b/tests/snapshots/test_app/test_ask_vision/client0/result.json @@ -33,10 +33,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_ask_vision/client1/result.json b/tests/snapshots/test_app/test_ask_vision/client1/result.json index 38fe9c5b82..d130c6c6e6 100644 --- a/tests/snapshots/test_app/test_ask_vision/client1/result.json +++ b/tests/snapshots/test_app/test_ask_vision/client1/result.json @@ -20,10 +20,7 @@ "use_semantic_ranker": false, "use_text_search": true, "use_vector_search": true, - "vector_fields": [ - "embedding", - "imageEmbedding" - ] + "vector_fields": "textAndImageEmbeddings" }, "title": "Search using user query" }, @@ -33,10 +30,8 @@ "captions": [], "category": null, "content": "31\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", - "embedding": "[-0.012668486, -0.02251158 ...+8 more]", "groups": null, "id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", - "imageEmbedding": null, "oids": null, "reranker_score": 3.1704962253570557, "score": 0.04972677677869797, diff --git a/tests/snapshots/test_app/test_ask_vision/client1/result.jsonlines b/tests/snapshots/test_app/test_ask_vision/client1/result.jsonlines index 2aa365591e..e285f0bdd6 100644 --- a/tests/snapshots/test_app/test_ask_vision/client1/result.jsonlines +++ b/tests/snapshots/test_app/test_ask_vision/client1/result.jsonlines @@ -1 +1 @@ -{"choices":[{"context":{"data_points":{"images":[{"detail":"auto","url":""}],"text":["Benefit_Options-2.pdf: There is a whistleblower policy."]},"thoughts":[{"description":"Are interest rates high?","props":{"semanticCaptions":false,"vector_fields":["embedding"]},"title":"Search Query"},{"description":[{"captions":[{"additional_properties":{},"highlights":[],"text":"Caption: A whistleblower policy."}],"category":null,"content":"There is a whistleblower policy.","embedding":null,"groups":null,"id":"file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2","imageEmbedding":null,"oids":null,"sourcefile":"Benefit_Options.pdf","sourcepage":"Benefit_Options-2.pdf"}],"props":null,"title":"Results"},{"description":["{'role': 'system', 'content': \"You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd., The documents contain text, graphs, tables and images. Each image source has the file name in the top left corner of the image with coordinates (10,10) pixels and is in the format SourceFileName: Each text source starts in a new line and has the file name followed by colon and the actual information Always include the source name from the image or text for each fact you use in the response in the format: [filename] Answer the following question using only the data provided in the sources below. For tabular information return it as an html table. Do not return markdown format. The text and image source can be the same file name, don't use the image title when citing the image source, only use the file name as mentioned If you cannot answer using the sources below, say you don't know. Return just the answer without any input texts \"}","{'role': 'user', 'content': [{'text': 'Are interest rates high?', 'type': 'text'}, {'text': 'Benefit_Options-2.pdf: There is a whistleblower policy.', 'type': 'text'}, {'image_url': {'url': '', 'detail': 'auto'}, 'type': 'image_url'}]}"],"props":null,"title":"Prompt"}]},"finish_reason":"stop","index":0,"message":{"content":"From the provided sources, the impact of interest rates and GDP growth on financial markets can be observed through the line graph. [Financial Market Analysis Report 2023-7.png]","function_call":null,"role":"assistant","tool_calls":null},"session_state":null}],"created":0,"id":"test-123","model":"test-model","object":"chat.completion","system_fingerprint":null,"usage":null} +{"choices":[{"context":{"data_points":{"images":[{"detail":"auto","url":""}],"text":["Benefit_Options-2.pdf: There is a whistleblower policy."]},"thoughts":[{"description":"Are interest rates high?","props":{"semanticCaptions":false,"vector_fields":"textEmbeddingOnly"},"title":"Search Query"},{"description":[{"captions":[{"additional_properties":{},"highlights":[],"text":"Caption: A whistleblower policy."}],"category":null,"content":"There is a whistleblower policy.","embedding":null,"groups":null,"id":"file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2","imageEmbedding":null,"oids":null,"sourcefile":"Benefit_Options.pdf","sourcepage":"Benefit_Options-2.pdf"}],"props":null,"title":"Results"},{"description":["{'role': 'system', 'content': \"You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd., The documents contain text, graphs, tables and images. Each image source has the file name in the top left corner of the image with coordinates (10,10) pixels and is in the format SourceFileName: Each text source starts in a new line and has the file name followed by colon and the actual information Always include the source name from the image or text for each fact you use in the response in the format: [filename] Answer the following question using only the data provided in the sources below. For tabular information return it as an html table. Do not return markdown format. The text and image source can be the same file name, don't use the image title when citing the image source, only use the file name as mentioned If you cannot answer using the sources below, say you don't know. Return just the answer without any input texts \"}","{'role': 'user', 'content': [{'text': 'Are interest rates high?', 'type': 'text'}, {'text': 'Benefit_Options-2.pdf: There is a whistleblower policy.', 'type': 'text'}, {'image_url': {'url': '', 'detail': 'auto'}, 'type': 'image_url'}]}"],"props":null,"title":"Prompt"}]},"finish_reason":"stop","index":0,"message":{"content":"From the provided sources, the impact of interest rates and GDP growth on financial markets can be observed through the line graph. [Financial Market Analysis Report 2023-7.png]","function_call":null,"role":"assistant","tool_calls":null},"session_state":null}],"created":0,"id":"test-123","model":"test-model","object":"chat.completion","system_fingerprint":null,"usage":null} diff --git a/tests/snapshots/test_app/test_chat_followup/client0/result.json b/tests/snapshots/test_app/test_chat_followup/client0/result.json index 7f4fa26166..a87b79ac06 100644 --- a/tests/snapshots/test_app/test_chat_followup/client0/result.json +++ b/tests/snapshots/test_app/test_chat_followup/client0/result.json @@ -73,10 +73,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_followup/client1/result.json b/tests/snapshots/test_app/test_chat_followup/client1/result.json index cf31b9483e..d918ed2af2 100644 --- a/tests/snapshots/test_app/test_chat_followup/client1/result.json +++ b/tests/snapshots/test_app/test_chat_followup/client1/result.json @@ -74,10 +74,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_hybrid/client0/result.json b/tests/snapshots/test_app/test_chat_hybrid/client0/result.json index 0fdac0a03d..6868aafff7 100644 --- a/tests/snapshots/test_app/test_chat_hybrid/client0/result.json +++ b/tests/snapshots/test_app/test_chat_hybrid/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_hybrid/client1/result.json b/tests/snapshots/test_app/test_chat_hybrid/client1/result.json index c6db9307c9..c093b39071 100644 --- a/tests/snapshots/test_app/test_chat_hybrid/client1/result.json +++ b/tests/snapshots/test_app/test_chat_hybrid/client1/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client0/result.json b/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client0/result.json index 6d47c7cad0..6a53d78bf3 100644 --- a/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client0/result.json +++ b/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client1/result.json b/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client1/result.json index f89b711cda..2e53c6b9b4 100644 --- a/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client1/result.json +++ b/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client1/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client0/result.json b/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client0/result.json index 1fa0ba1dd2..2bb5784a58 100644 --- a/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client0/result.json +++ b/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client1/result.json b/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client1/result.json index 0ff86af91b..bc8ef7155d 100644 --- a/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client1/result.json +++ b/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client1/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_prompt_template/client0/result.json b/tests/snapshots/test_app/test_chat_prompt_template/client0/result.json index 7c526c680b..03d211fa4b 100644 --- a/tests/snapshots/test_app/test_chat_prompt_template/client0/result.json +++ b/tests/snapshots/test_app/test_chat_prompt_template/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_prompt_template/client1/result.json b/tests/snapshots/test_app/test_chat_prompt_template/client1/result.json index 5bcd584284..98279625db 100644 --- a/tests/snapshots/test_app/test_chat_prompt_template/client1/result.json +++ b/tests/snapshots/test_app/test_chat_prompt_template/client1/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_prompt_template_concat/client0/result.json b/tests/snapshots/test_app/test_chat_prompt_template_concat/client0/result.json index 16d9bc346a..cf0c5f3580 100644 --- a/tests/snapshots/test_app/test_chat_prompt_template_concat/client0/result.json +++ b/tests/snapshots/test_app/test_chat_prompt_template_concat/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_prompt_template_concat/client1/result.json b/tests/snapshots/test_app/test_chat_prompt_template_concat/client1/result.json index 43a36decc0..8aa6a80f2a 100644 --- a/tests/snapshots/test_app/test_chat_prompt_template_concat/client1/result.json +++ b/tests/snapshots/test_app/test_chat_prompt_template_concat/client1/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_seed/client0/result.json b/tests/snapshots/test_app/test_chat_seed/client0/result.json index 0fdac0a03d..6868aafff7 100644 --- a/tests/snapshots/test_app/test_chat_seed/client0/result.json +++ b/tests/snapshots/test_app/test_chat_seed/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_seed/client1/result.json b/tests/snapshots/test_app/test_chat_seed/client1/result.json index c6db9307c9..c093b39071 100644 --- a/tests/snapshots/test_app/test_chat_seed/client1/result.json +++ b/tests/snapshots/test_app/test_chat_seed/client1/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_session_state_persists/client0/result.json b/tests/snapshots/test_app/test_chat_session_state_persists/client0/result.json index 92a71a0331..9cddd24798 100644 --- a/tests/snapshots/test_app/test_chat_session_state_persists/client0/result.json +++ b/tests/snapshots/test_app/test_chat_session_state_persists/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_session_state_persists/client1/result.json b/tests/snapshots/test_app/test_chat_session_state_persists/client1/result.json index c32515de71..36b1d61f56 100644 --- a/tests/snapshots/test_app/test_chat_session_state_persists/client1/result.json +++ b/tests/snapshots/test_app/test_chat_session_state_persists/client1/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_stream_followup/client0/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_followup/client0/result.jsonlines index 53eee9c9f2..eeca662510 100644 --- a/tests/snapshots/test_app/test_chat_stream_followup/client0/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_followup/client0/result.jsonlines @@ -1,5 +1,5 @@ -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<>\n<>\n<>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini"}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<>\n<>\n<>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini"}}], "followup_questions": null}, "session_state": null} {"delta": {"content": null, "role": "assistant"}} {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf]. ", "role": "assistant"}} -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<>\n<>\n<>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} -{"delta": {"role": "assistant"}, "context": {"context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<>\n<>\n<>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "followup_questions": ["What is the capital of Spain?"]}} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<>\n<>\n<>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<>\n<>\n<>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "followup_questions": ["What is the capital of Spain?"]}} diff --git a/tests/snapshots/test_app/test_chat_stream_followup/client1/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_followup/client1/result.jsonlines index 422c3bcf83..d8215d797d 100644 --- a/tests/snapshots/test_app/test_chat_stream_followup/client1/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_followup/client1/result.jsonlines @@ -1,5 +1,5 @@ -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<>\n<>\n<>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<>\n<>\n<>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}], "followup_questions": null}, "session_state": null} {"delta": {"content": null, "role": "assistant"}} {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf]. ", "role": "assistant"}} -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<>\n<>\n<>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} -{"delta": {"role": "assistant"}, "context": {"context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<>\n<>\n<>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "followup_questions": ["What is the capital of Spain?"]}} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<>\n<>\n<>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<>\n<>\n<>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "followup_questions": ["What is the capital of Spain?"]}} diff --git a/tests/snapshots/test_app/test_chat_stream_session_state_persists/client0/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_session_state_persists/client0/result.jsonlines index f458d12065..750691cb87 100644 --- a/tests/snapshots/test_app/test_chat_stream_session_state_persists/client0/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_session_state_persists/client0/result.jsonlines @@ -1,4 +1,4 @@ -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini"}}], "followup_questions": null}, "session_state": {"conversation_id": 1234}} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini"}}], "followup_questions": null}, "session_state": {"conversation_id": 1234}} {"delta": {"content": null, "role": "assistant"}} {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}} -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": {"conversation_id": 1234}} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": {"conversation_id": 1234}} diff --git a/tests/snapshots/test_app/test_chat_stream_session_state_persists/client1/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_session_state_persists/client1/result.jsonlines index 5ae6b512b3..dea22c846e 100644 --- a/tests/snapshots/test_app/test_chat_stream_session_state_persists/client1/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_session_state_persists/client1/result.jsonlines @@ -1,4 +1,4 @@ -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}], "followup_questions": null}, "session_state": {"conversation_id": 1234}} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}], "followup_questions": null}, "session_state": {"conversation_id": 1234}} {"delta": {"content": null, "role": "assistant"}} {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}} -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": {"conversation_id": 1234}} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": {"conversation_id": 1234}} diff --git a/tests/snapshots/test_app/test_chat_stream_text/client0/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text/client0/result.jsonlines index 9e578d110a..decd3d54ab 100644 --- a/tests/snapshots/test_app/test_chat_stream_text/client0/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_text/client0/result.jsonlines @@ -1,4 +1,4 @@ -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini"}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini"}}], "followup_questions": null}, "session_state": null} {"delta": {"content": null, "role": "assistant"}} {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}} -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} diff --git a/tests/snapshots/test_app/test_chat_stream_text/client1/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text/client1/result.jsonlines index c1ea935fa6..d98838102e 100644 --- a/tests/snapshots/test_app/test_chat_stream_text/client1/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_text/client1/result.jsonlines @@ -1,4 +1,4 @@ -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}], "followup_questions": null}, "session_state": null} {"delta": {"content": null, "role": "assistant"}} {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}} -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} diff --git a/tests/snapshots/test_app/test_chat_stream_text_filter/auth_client0/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text_filter/auth_client0/result.jsonlines index d8beed42bc..954e063ff0 100644 --- a/tests/snapshots/test_app/test_chat_stream_text_filter/auth_client0/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_text_filter/auth_client0/result.jsonlines @@ -1,4 +1,4 @@ -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": "category ne 'excluded' and (oids/any(g:search.in(g, 'OID_X')) or groups/any(g:search.in(g, 'GROUP_Y, GROUP_Z')))", "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": "category ne 'excluded' and (oids/any(g:search.in(g, 'OID_X')) or groups/any(g:search.in(g, 'GROUP_Y, GROUP_Z')))", "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}], "followup_questions": null}, "session_state": null} {"delta": {"content": null, "role": "assistant"}} {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}} -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": "category ne 'excluded' and (oids/any(g:search.in(g, 'OID_X')) or groups/any(g:search.in(g, 'GROUP_Y, GROUP_Z')))", "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": "category ne 'excluded' and (oids/any(g:search.in(g, 'OID_X')) or groups/any(g:search.in(g, 'GROUP_Y, GROUP_Z')))", "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} diff --git a/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client0/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client0/result.jsonlines index 06a45f2747..5a907cf72b 100644 --- a/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client0/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client0/result.jsonlines @@ -1,4 +1,4 @@ -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": null}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": null}}], "followup_questions": null}, "session_state": null} {"delta": {"content": null, "role": "assistant"}} {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}} -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": null, "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": null, "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} diff --git a/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client1/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client1/result.jsonlines index d7adb3e45a..346e8de618 100644 --- a/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client1/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client1/result.jsonlines @@ -1,4 +1,4 @@ -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low"}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low"}}], "followup_questions": null}, "session_state": null} {"delta": {"content": null, "role": "assistant"}} {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}} -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} diff --git a/tests/snapshots/test_app/test_chat_stream_vision/client0/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_vision/client0/result.jsonlines index d2b7cd3347..4b1b337845 100644 --- a/tests/snapshots/test_app/test_chat_stream_vision/client0/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_vision/client0/result.jsonlines @@ -1,4 +1,4 @@ -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Financial Market Analysis Report 2023.pdf#page=6: 31 Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: Are interest rates high?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "interest rates", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", "content": "31\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", "embedding": "[-0.012668486, -0.02251158 ...+8 more]", "imageEmbedding": null, "category": null, "sourcepage": "Financial Market Analysis Report 2023-6.png", "sourcefile": "Financial Market Analysis Report 2023.pdf", "oids": null, "groups": null, "captions": [], "score": 0.04972677677869797, "reranker_score": 3.1704962253570557}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "Are interest rates high?\n\nSources:\n\nFinancial Market Analysis Report 2023.pdf#page=6: 31 Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions"}], "props": {"model": "gpt-4o-mini"}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Financial Market Analysis Report 2023.pdf#page=6: 31 Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: Are interest rates high?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "interest rates", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", "content": "31\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", "category": null, "sourcepage": "Financial Market Analysis Report 2023-6.png", "sourcefile": "Financial Market Analysis Report 2023.pdf", "oids": null, "groups": null, "captions": [], "score": 0.04972677677869797, "reranker_score": 3.1704962253570557}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "Are interest rates high?\n\nSources:\n\nFinancial Market Analysis Report 2023.pdf#page=6: 31 Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions"}], "props": {"model": "gpt-4o-mini"}}], "followup_questions": null}, "session_state": null} {"delta": {"content": null, "role": "assistant"}} {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}} -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Financial Market Analysis Report 2023.pdf#page=6: 31 Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: Are interest rates high?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "interest rates", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", "content": "31\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", "embedding": "[-0.012668486, -0.02251158 ...+8 more]", "imageEmbedding": null, "category": null, "sourcepage": "Financial Market Analysis Report 2023-6.png", "sourcefile": "Financial Market Analysis Report 2023.pdf", "oids": null, "groups": null, "captions": [], "score": 0.04972677677869797, "reranker_score": 3.1704962253570557}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "Are interest rates high?\n\nSources:\n\nFinancial Market Analysis Report 2023.pdf#page=6: 31 Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Financial Market Analysis Report 2023.pdf#page=6: 31 Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: Are interest rates high?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "interest rates", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", "content": "31\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", "category": null, "sourcepage": "Financial Market Analysis Report 2023-6.png", "sourcefile": "Financial Market Analysis Report 2023.pdf", "oids": null, "groups": null, "captions": [], "score": 0.04972677677869797, "reranker_score": 3.1704962253570557}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "Are interest rates high?\n\nSources:\n\nFinancial Market Analysis Report 2023.pdf#page=6: 31 Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null} diff --git a/tests/snapshots/test_app/test_chat_stream_vision/client1/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_vision/client1/result.jsonlines index 5f639076f1..d31541b4d7 100644 --- a/tests/snapshots/test_app/test_chat_stream_vision/client1/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_vision/client1/result.jsonlines @@ -1,3 +1,3 @@ -{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Financial Market Analysis Report 2023-6.png: 31 Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "], "images": [""]}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: Are interest rates high?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}, {"title": "Search using generated search query", "description": "interest rates", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "vector_fields": ["embedding", "imageEmbedding"], "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", "content": "31\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", "embedding": "[-0.012668486, -0.02251158 ...+8 more]", "imageEmbedding": null, "category": null, "sourcepage": "Financial Market Analysis Report 2023-6.png", "sourcefile": "Financial Market Analysis Report 2023.pdf", "oids": null, "groups": null, "captions": [], "score": 0.04972677677869797, "reranker_score": 3.1704962253570557}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd., The documents contain text, graphs, tables and images.\nEach image source has the file name in the top left corner of the image with coordinates (10,10) pixels and is in the format SourceFileName:\nEach text source starts in a new line and has the file name followed by colon and the actual information\nAlways include the source name from the image or text for each fact you use in the response in the format: [filename]\nAnswer the following question using only the data provided in the sources below.\nIf asking a clarifying question to the user would help, ask the question.\nBe brief in your answers.\nThe text and image source can be the same file name, don't use the image title when citing the image source, only use the file name as mentioned\nIf you cannot answer using the sources below, say you don't know. Return just the answer without any input texts."}, {"role": "user", "content": [{"type": "text", "text": "Are interest rates high?"}, {"type": "image_url", "image_url": {"url": ""}}, {"type": "text", "text": "Sources:\n\nFinancial Market Analysis Report 2023-6.png: 31 Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions"}]}], "props": {"model": "gpt-4"}}], "followup_questions": null}, "session_state": null} +{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Financial Market Analysis Report 2023-6.png: 31 Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "], "images": [""]}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: Are interest rates high?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}, {"title": "Search using generated search query", "description": "interest rates", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "vector_fields": "textAndImageEmbeddings", "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", "content": "31\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", "category": null, "sourcepage": "Financial Market Analysis Report 2023-6.png", "sourcefile": "Financial Market Analysis Report 2023.pdf", "oids": null, "groups": null, "captions": [], "score": 0.04972677677869797, "reranker_score": 3.1704962253570557}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd., The documents contain text, graphs, tables and images.\nEach image source has the file name in the top left corner of the image with coordinates (10,10) pixels and is in the format SourceFileName:\nEach text source starts in a new line and has the file name followed by colon and the actual information\nAlways include the source name from the image or text for each fact you use in the response in the format: [filename]\nAnswer the following question using only the data provided in the sources below.\nIf asking a clarifying question to the user would help, ask the question.\nBe brief in your answers.\nThe text and image source can be the same file name, don't use the image title when citing the image source, only use the file name as mentioned\nIf you cannot answer using the sources below, say you don't know. Return just the answer without any input texts."}, {"role": "user", "content": [{"type": "text", "text": "Are interest rates high?"}, {"type": "image_url", "image_url": {"url": ""}}, {"type": "text", "text": "Sources:\n\nFinancial Market Analysis Report 2023-6.png: 31 Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions"}]}], "props": {"model": "gpt-4"}}], "followup_questions": null}, "session_state": null} {"delta": {"content": null, "role": "assistant"}} {"delta": {"content": "From the provided sources, the impact of interest rates and GDP growth on financial markets can be observed through the line graph. [Financial Market Analysis Report 2023-7.png]", "role": null}} diff --git a/tests/snapshots/test_app/test_chat_text/client0/result.json b/tests/snapshots/test_app/test_chat_text/client0/result.json index 1451d011ba..5d42b2f04d 100644 --- a/tests/snapshots/test_app/test_chat_text/client0/result.json +++ b/tests/snapshots/test_app/test_chat_text/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_text/client1/result.json b/tests/snapshots/test_app/test_chat_text/client1/result.json index 55c42ce819..8d81c0e2e0 100644 --- a/tests/snapshots/test_app/test_chat_text/client1/result.json +++ b/tests/snapshots/test_app/test_chat_text/client1/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_text_filter/auth_client0/result.json b/tests/snapshots/test_app/test_chat_text_filter/auth_client0/result.json index 8de34fcda6..c20697d2f5 100644 --- a/tests/snapshots/test_app/test_chat_text_filter/auth_client0/result.json +++ b/tests/snapshots/test_app/test_chat_text_filter/auth_client0/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_text_filter_public_documents/auth_public_documents_client0/result.json b/tests/snapshots/test_app/test_chat_text_filter_public_documents/auth_public_documents_client0/result.json index 23d76162ed..201fb9fcc8 100644 --- a/tests/snapshots/test_app/test_chat_text_filter_public_documents/auth_public_documents_client0/result.json +++ b/tests/snapshots/test_app/test_chat_text_filter_public_documents/auth_public_documents_client0/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client0/result.json b/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client0/result.json index 4b6538d397..0634d404ad 100644 --- a/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client0/result.json +++ b/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client0/result.json @@ -73,10 +73,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client1/result.json b/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client1/result.json index 205768aaa9..431258417f 100644 --- a/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client1/result.json +++ b/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client1/result.json @@ -73,10 +73,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_text_semantic_ranker/client0/result.json b/tests/snapshots/test_app/test_chat_text_semantic_ranker/client0/result.json index 80bc43104d..6740ce36cb 100644 --- a/tests/snapshots/test_app/test_chat_text_semantic_ranker/client0/result.json +++ b/tests/snapshots/test_app/test_chat_text_semantic_ranker/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_text_semantic_ranker/client1/result.json b/tests/snapshots/test_app/test_chat_text_semantic_ranker/client1/result.json index 969d01f46c..78d46952af 100644 --- a/tests/snapshots/test_app/test_chat_text_semantic_ranker/client1/result.json +++ b/tests/snapshots/test_app/test_chat_text_semantic_ranker/client1/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_text_semanticcaptions/client0/result.json b/tests/snapshots/test_app/test_chat_text_semanticcaptions/client0/result.json index 317f577d61..694304c70d 100644 --- a/tests/snapshots/test_app/test_chat_text_semanticcaptions/client0/result.json +++ b/tests/snapshots/test_app/test_chat_text_semanticcaptions/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_text_semanticcaptions/client1/result.json b/tests/snapshots/test_app/test_chat_text_semanticcaptions/client1/result.json index 2e3999cf6a..51ceabdab8 100644 --- a/tests/snapshots/test_app/test_chat_text_semanticcaptions/client1/result.json +++ b/tests/snapshots/test_app/test_chat_text_semanticcaptions/client1/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_text_semanticranker/client0/result.json b/tests/snapshots/test_app/test_chat_text_semanticranker/client0/result.json index 80bc43104d..6740ce36cb 100644 --- a/tests/snapshots/test_app/test_chat_text_semanticranker/client0/result.json +++ b/tests/snapshots/test_app/test_chat_text_semanticranker/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_text_semanticranker/client1/result.json b/tests/snapshots/test_app/test_chat_text_semanticranker/client1/result.json index 969d01f46c..78d46952af 100644 --- a/tests/snapshots/test_app/test_chat_text_semanticranker/client1/result.json +++ b/tests/snapshots/test_app/test_chat_text_semanticranker/client1/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_vector/client0/result.json b/tests/snapshots/test_app/test_chat_vector/client0/result.json index e7b84204c1..e07471425e 100644 --- a/tests/snapshots/test_app/test_chat_vector/client0/result.json +++ b/tests/snapshots/test_app/test_chat_vector/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_vector/client1/result.json b/tests/snapshots/test_app/test_chat_vector/client1/result.json index c568dbe297..049906c697 100644 --- a/tests/snapshots/test_app/test_chat_vector/client1/result.json +++ b/tests/snapshots/test_app/test_chat_vector/client1/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client0/result.json b/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client0/result.json index 43834ab3b5..2ff04cc1df 100644 --- a/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client0/result.json +++ b/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client1/result.json b/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client1/result.json index 866a2ac2b8..0d1d59e299 100644 --- a/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client1/result.json +++ b/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client1/result.json @@ -72,10 +72,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_vision/client0/result.json b/tests/snapshots/test_app/test_chat_vision/client0/result.json index 5592597666..70e52d24b0 100644 --- a/tests/snapshots/test_app/test_chat_vision/client0/result.json +++ b/tests/snapshots/test_app/test_chat_vision/client0/result.json @@ -65,10 +65,8 @@ "captions": [], "category": null, "content": "31\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", - "embedding": "[-0.012668486, -0.02251158 ...+8 more]", "groups": null, "id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", - "imageEmbedding": null, "oids": null, "reranker_score": 3.1704962253570557, "score": 0.04972677677869797, diff --git a/tests/snapshots/test_app/test_chat_vision/client1/result.json b/tests/snapshots/test_app/test_chat_vision/client1/result.json index 9a5514bf24..bbbd57898c 100644 --- a/tests/snapshots/test_app/test_chat_vision/client1/result.json +++ b/tests/snapshots/test_app/test_chat_vision/client1/result.json @@ -52,10 +52,7 @@ "use_semantic_captions": false, "use_semantic_ranker": false, "use_text_search": true, - "vector_fields": [ - "embedding", - "imageEmbedding" - ] + "vector_fields": "textAndImageEmbeddings" }, "title": "Search using generated search query" }, @@ -65,10 +62,8 @@ "captions": [], "category": null, "content": "31\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", - "embedding": "[-0.012668486, -0.02251158 ...+8 more]", "groups": null, "id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", - "imageEmbedding": null, "oids": null, "reranker_score": 3.1704962253570557, "score": 0.04972677677869797, diff --git a/tests/snapshots/test_app/test_chat_vision/client1/result.jsonlines b/tests/snapshots/test_app/test_chat_vision/client1/result.jsonlines index c854668fc6..4ac90fdfbf 100644 --- a/tests/snapshots/test_app/test_chat_vision/client1/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_vision/client1/result.jsonlines @@ -1,3 +1,3 @@ -{"choices": [{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": [{"url": "", "detail": "auto"}]}, "thoughts": [{"title": "Original user query", "description": "Are interest rates high?", "props": null}, {"title": "Generated search query", "description": "The capital of France is Paris. [Benefit_Options-2.pdf].", "props": {"semanticCaptions": false, "vector_fields": ["embedding"]}}, {"title": "Results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}]}], "props": null}, {"title": "Prompt", "description": ["{'role': 'system', 'content': \"\\n You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd., The documents contain text, graphs, tables and images.\\n Each image source has the file name in the top left corner of the image with coordinates (10,10) pixels and is in the format SourceFileName:\\n Each text source starts in a new line and has the file name followed by colon and the actual information\\n Always include the source name from the image or text for each fact you use in the response in the format: [filename]\\n Answer the following question using only the data provided in the sources below.\\n If asking a clarifying question to the user would help, ask the question.\\n Be brief in your answers.\\n For tabular information return it as an html table. Do not return markdown format.\\n The text and image source can be the same file name, don't use the image title when citing the image source, only use the file name as mentioned\\n If you cannot answer using the sources below, say you don't know. Return just the answer without any input texts.\\n \\n \\n \"}", "{'role': 'user', 'content': [{'text': 'Are interest rates high?', 'type': 'text'}, {'text': '\\n\\nSources:\\nBenefit_Options-2.pdf: There is a whistleblower policy.', 'type': 'text'}, {'image_url': {'url': '', 'detail': 'auto'}, 'type': 'image_url'}]}"], "props": null}]}, "session_state": null, "finish_reason": null, "index": 0}], "object": "chat.completion.chunk"} +{"choices": [{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": [{"url": "", "detail": "auto"}]}, "thoughts": [{"title": "Original user query", "description": "Are interest rates high?", "props": null}, {"title": "Generated search query", "description": "The capital of France is Paris. [Benefit_Options-2.pdf].", "props": {"semanticCaptions": false, "vector_fields": "textAndImageEmbeddings"}}, {"title": "Results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}]}], "props": null}, {"title": "Prompt", "description": ["{'role': 'system', 'content': \"\\n You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd., The documents contain text, graphs, tables and images.\\n Each image source has the file name in the top left corner of the image with coordinates (10,10) pixels and is in the format SourceFileName:\\n Each text source starts in a new line and has the file name followed by colon and the actual information\\n Always include the source name from the image or text for each fact you use in the response in the format: [filename]\\n Answer the following question using only the data provided in the sources below.\\n If asking a clarifying question to the user would help, ask the question.\\n Be brief in your answers.\\n For tabular information return it as an html table. Do not return markdown format.\\n The text and image source can be the same file name, don't use the image title when citing the image source, only use the file name as mentioned\\n If you cannot answer using the sources below, say you don't know. Return just the answer without any input texts.\\n \\n \\n \"}", "{'role': 'user', 'content': [{'text': 'Are interest rates high?', 'type': 'text'}, {'text': '\\n\\nSources:\\nBenefit_Options-2.pdf: There is a whistleblower policy.', 'type': 'text'}, {'image_url': {'url': '', 'detail': 'auto'}, 'type': 'image_url'}]}"], "props": null}]}, "session_state": null, "finish_reason": null, "index": 0}], "object": "chat.completion.chunk"} {"id": "test-id", "choices": [{"delta": {"content": null, "function_call": null, "role": "assistant", "tool_calls": null}, "finish_reason": null, "index": 0}], "created": 1, "model": "gpt-4o-mini", "object": "chat.completion.chunk", "system_fingerprint": null} {"id": "test-id", "choices": [{"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "function_call": null, "role": null, "tool_calls": null}, "finish_reason": null, "index": 0}], "created": 1, "model": "gpt-4o-mini", "object": "chat.completion.chunk", "system_fingerprint": null} diff --git a/tests/snapshots/test_app/test_chat_vision_vectors/client0/result.json b/tests/snapshots/test_app/test_chat_vision_vectors/client0/result.json index 7adcb574bb..3ed411512b 100644 --- a/tests/snapshots/test_app/test_chat_vision_vectors/client0/result.json +++ b/tests/snapshots/test_app/test_chat_vision_vectors/client0/result.json @@ -71,10 +71,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_vision_vectors/client1/result.json b/tests/snapshots/test_app/test_chat_vision_vectors/client1/result.json index 5527109145..1c91558a9d 100644 --- a/tests/snapshots/test_app/test_chat_vision_vectors/client1/result.json +++ b/tests/snapshots/test_app/test_chat_vision_vectors/client1/result.json @@ -52,10 +52,7 @@ "use_semantic_captions": false, "use_semantic_ranker": false, "use_text_search": false, - "vector_fields": [ - "embedding", - "imageEmbedding" - ] + "vector_fields": "textAndImageEmbeddings" }, "title": "Search using generated search query" }, @@ -65,10 +62,8 @@ "captions": [], "category": null, "content": "31\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", - "embedding": "[-0.012668486, -0.02251158 ...+8 more]", "groups": null, "id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", - "imageEmbedding": null, "oids": null, "reranker_score": 3.1704962253570557, "score": 0.04972677677869797, diff --git a/tests/snapshots/test_app/test_chat_vision_vectors/client1/result.jsonlines b/tests/snapshots/test_app/test_chat_vision_vectors/client1/result.jsonlines index c362a6d628..064a1c8b4a 100644 --- a/tests/snapshots/test_app/test_chat_vision_vectors/client1/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_vision_vectors/client1/result.jsonlines @@ -1,3 +1,3 @@ -{"choices": [{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": [{"url": "", "detail": "auto"}]}, "thoughts": [{"title": "Original user query", "description": "Are interest rates high?", "props": null}, {"title": "Generated search query", "description": null, "props": {"semanticCaptions": false, "vector_fields": ["embedding"]}}, {"title": "Results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}]}], "props": null}, {"title": "Prompt", "description": ["{'role': 'system', 'content': \"\\n You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd., The documents contain text, graphs, tables and images.\\n Each image source has the file name in the top left corner of the image with coordinates (10,10) pixels and is in the format SourceFileName:\\n Each text source starts in a new line and has the file name followed by colon and the actual information\\n Always include the source name from the image or text for each fact you use in the response in the format: [filename]\\n Answer the following question using only the data provided in the sources below.\\n If asking a clarifying question to the user would help, ask the question.\\n Be brief in your answers.\\n For tabular information return it as an html table. Do not return markdown format.\\n The text and image source can be the same file name, don't use the image title when citing the image source, only use the file name as mentioned\\n If you cannot answer using the sources below, say you don't know. Return just the answer without any input texts.\\n \\n \\n \"}", "{'role': 'user', 'content': [{'text': 'Are interest rates high?', 'type': 'text'}, {'text': '\\n\\nSources:\\nBenefit_Options-2.pdf: There is a whistleblower policy.', 'type': 'text'}, {'image_url': {'url': '', 'detail': 'auto'}, 'type': 'image_url'}]}"], "props": null}]}, "session_state": null, "finish_reason": null, "index": 0}], "object": "chat.completion.chunk"} +{"choices": [{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": [{"url": "", "detail": "auto"}]}, "thoughts": [{"title": "Original user query", "description": "Are interest rates high?", "props": null}, {"title": "Generated search query", "description": null, "props": {"semanticCaptions": false, "vector_fields": "textAndImageEmbeddings"}}, {"title": "Results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}]}], "props": null}, {"title": "Prompt", "description": ["{'role': 'system', 'content': \"\\n You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd., The documents contain text, graphs, tables and images.\\n Each image source has the file name in the top left corner of the image with coordinates (10,10) pixels and is in the format SourceFileName:\\n Each text source starts in a new line and has the file name followed by colon and the actual information\\n Always include the source name from the image or text for each fact you use in the response in the format: [filename]\\n Answer the following question using only the data provided in the sources below.\\n If asking a clarifying question to the user would help, ask the question.\\n Be brief in your answers.\\n For tabular information return it as an html table. Do not return markdown format.\\n The text and image source can be the same file name, don't use the image title when citing the image source, only use the file name as mentioned\\n If you cannot answer using the sources below, say you don't know. Return just the answer without any input texts.\\n \\n \\n \"}", "{'role': 'user', 'content': [{'text': 'Are interest rates high?', 'type': 'text'}, {'text': '\\n\\nSources:\\nBenefit_Options-2.pdf: There is a whistleblower policy.', 'type': 'text'}, {'image_url': {'url': '', 'detail': 'auto'}, 'type': 'image_url'}]}"], "props": null}]}, "session_state": null, "finish_reason": null, "index": 0}], "object": "chat.completion.chunk"} {"id": "test-id", "choices": [{"delta": {"content": null, "function_call": null, "role": "assistant", "tool_calls": null}, "finish_reason": null, "index": 0}], "created": 1, "model": "gpt-4o-mini", "object": "chat.completion.chunk", "system_fingerprint": null} {"id": "test-id", "choices": [{"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "function_call": null, "role": null, "tool_calls": null}, "finish_reason": null, "index": 0}], "created": 1, "model": "gpt-4o-mini", "object": "chat.completion.chunk", "system_fingerprint": null} diff --git a/tests/snapshots/test_app/test_chat_with_history/client0/result.json b/tests/snapshots/test_app/test_chat_with_history/client0/result.json index 071e3aa95b..aa0c192e34 100644 --- a/tests/snapshots/test_app/test_chat_with_history/client0/result.json +++ b/tests/snapshots/test_app/test_chat_with_history/client0/result.json @@ -79,10 +79,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/snapshots/test_app/test_chat_with_history/client1/result.json b/tests/snapshots/test_app/test_chat_with_history/client1/result.json index 195dcb1587..0b2eda9f3e 100644 --- a/tests/snapshots/test_app/test_chat_with_history/client1/result.json +++ b/tests/snapshots/test_app/test_chat_with_history/client1/result.json @@ -80,10 +80,8 @@ ], "category": null, "content": "There is a whistleblower policy.", - "embedding": null, "groups": null, "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", - "imageEmbedding": null, "oids": null, "reranker_score": 3.4577205181121826, "score": 0.03279569745063782, diff --git a/tests/test_app.py b/tests/test_app.py index f3bbeaccd1..1d06e00734 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -935,7 +935,7 @@ async def test_chat_vision(client, snapshot): "overrides": { "use_gpt4v": True, "gpt4v_input": "textAndImages", - "vector_fields": ["embedding", "imageEmbedding"], + "vector_fields": "textAndImageEmbeddings", }, }, }, @@ -955,7 +955,7 @@ async def test_chat_stream_vision(client, snapshot): "overrides": { "use_gpt4v": True, "gpt4v_input": "textAndImages", - "vector_fields": ["embedding", "imageEmbedding"], + "vector_fields": "textAndImageEmbeddings", }, }, }, @@ -975,7 +975,7 @@ async def test_chat_vision_vectors(client, snapshot): "overrides": { "use_gpt4v": True, "gpt4v_input": "textAndImages", - "vector_fields": ["embedding", "imageEmbedding"], + "vector_fields": "textAndImageEmbeddings", "retrieval_mode": "vectors", }, }, @@ -996,7 +996,7 @@ async def test_ask_vision(client, snapshot): "overrides": { "use_gpt4v": True, "gpt4v_input": "textAndImages", - "vector_fields": ["embedding", "imageEmbedding"], + "vector_fields": "textAndImageEmbeddings", }, }, }, diff --git a/tests/test_app_config.py b/tests/test_app_config.py index bb5a595217..ed440ca75a 100644 --- a/tests/test_app_config.py +++ b/tests/test_app_config.py @@ -16,6 +16,8 @@ def minimal_env(monkeypatch): monkeypatch.setenv("AZURE_SEARCH_SERVICE", "test-search-service") monkeypatch.setenv("AZURE_OPENAI_SERVICE", "test-openai-service") monkeypatch.setenv("AZURE_OPENAI_CHATGPT_MODEL", "gpt-4o-mini") + monkeypatch.setenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-3-large") + monkeypatch.setenv("AZURE_OPENAI_EMB_DIMENSIONS", "3072") yield diff --git a/tests/test_chatapproach.py b/tests/test_chatapproach.py index a12c3e4147..9900ae88bc 100644 --- a/tests/test_chatapproach.py +++ b/tests/test_chatapproach.py @@ -30,6 +30,7 @@ def chat_approach(): embedding_deployment="embeddings", embedding_model=MOCK_EMBEDDING_MODEL_NAME, embedding_dimensions=MOCK_EMBEDDING_DIMENSIONS, + embedding_field="embedding3", sourcepage_field="", content_field="", query_language="en-us", @@ -176,6 +177,7 @@ async def test_search_results_filtering_by_scores( embedding_deployment="embeddings", embedding_model=MOCK_EMBEDDING_MODEL_NAME, embedding_dimensions=MOCK_EMBEDDING_DIMENSIONS, + embedding_field="embedding3", sourcepage_field="", content_field="", query_language="en-us", @@ -214,6 +216,7 @@ async def test_search_results_query_rewriting(monkeypatch): embedding_deployment="embeddings", embedding_model=MOCK_EMBEDDING_MODEL_NAME, embedding_dimensions=MOCK_EMBEDDING_DIMENSIONS, + embedding_field="embedding3", sourcepage_field="", content_field="", query_language="en-us", diff --git a/tests/test_chatvisionapproach.py b/tests/test_chatvisionapproach.py index d2c450efca..7039cae395 100644 --- a/tests/test_chatvisionapproach.py +++ b/tests/test_chatvisionapproach.py @@ -60,6 +60,7 @@ def chat_approach(openai_client, mock_confidential_client_success): embedding_deployment="embeddings", embedding_model=MOCK_EMBEDDING_MODEL_NAME, embedding_dimensions=MOCK_EMBEDDING_DIMENSIONS, + embedding_field="embedding3", sourcepage_field="", content_field="", query_language="en-us", @@ -149,4 +150,4 @@ async def test_compute_text_embedding(chat_approach, openai_client, mock_openai_ assert isinstance(result, VectorizedQuery) assert result.vector == [0.0023064255, -0.009327292, -0.0028842222] assert result.k_nearest_neighbors == 50 - assert result.fields == "embedding" + assert result.fields == "embedding3" diff --git a/tests/test_fetch_image.py b/tests/test_fetch_image.py index 73d951ea49..5f3421b91e 100644 --- a/tests/test_fetch_image.py +++ b/tests/test_fetch_image.py @@ -78,8 +78,6 @@ async def close(self): test_document = Document( id="test", content="test content", - embedding=[1, 2, 3], - image_embedding=[4, 5, 6], oids=[], groups=[], captions=[], diff --git a/tests/test_prepdocs.py b/tests/test_prepdocs.py index 79489c93ff..2d598dc4c0 100644 --- a/tests/test_prepdocs.py +++ b/tests/test_prepdocs.py @@ -51,7 +51,7 @@ async def mock_create_client(*args, **kwargs): object="embedding", ) ], - model="text-embedding-ada-002", + model="text-embedding-3-large", usage=Usage(prompt_tokens=8, total_tokens=8), ) ) diff --git a/tests/test_searchmanager.py b/tests/test_searchmanager.py index cc8403bb51..9689509bf1 100644 --- a/tests/test_searchmanager.py +++ b/tests/test_searchmanager.py @@ -50,11 +50,11 @@ async def mock_list_index_names(self): monkeypatch.setattr(SearchIndexClient, "create_index", mock_create_index) monkeypatch.setattr(SearchIndexClient, "list_index_names", mock_list_index_names) - manager = SearchManager(search_info) + manager = SearchManager(search_info, use_int_vectorization=False, field_name_embedding="embedding") await manager.create_index() assert len(indexes) == 1, "It should have created one index" assert indexes[0].name == "test" - assert len(indexes[0].fields) == 7 + assert len(indexes[0].fields) == 6 @pytest.mark.asyncio @@ -71,11 +71,15 @@ async def mock_list_index_names(self): monkeypatch.setattr(SearchIndexClient, "create_index", mock_create_index) monkeypatch.setattr(SearchIndexClient, "list_index_names", mock_list_index_names) - manager = SearchManager(search_info, use_int_vectorization=True) + manager = SearchManager( + search_info, + use_int_vectorization=True, + field_name_embedding="embedding", + ) await manager.create_index() assert len(indexes) == 1, "It should have created one index" assert indexes[0].name == "test" - assert len(indexes[0].fields) == 8 + assert len(indexes[0].fields) == 7 @pytest.mark.asyncio @@ -165,11 +169,12 @@ async def mock_list_index_names(self): manager = SearchManager( search_info, use_acls=True, + field_name_embedding="embedding", ) await manager.create_index() assert len(indexes) == 1, "It should have created one index" assert indexes[0].name == "test" - assert len(indexes[0].fields) == 9 + assert len(indexes[0].fields) == 8 @pytest.mark.asyncio @@ -258,7 +263,7 @@ async def mock_create_client(*args, **kwargs): object="embedding", ) ], - model="text-embedding-ada-002", + model="text-embedding-3-large", usage=Usage(prompt_tokens=8, total_tokens=8), ) ) @@ -283,6 +288,7 @@ async def mock_upload_documents(self, documents): manager = SearchManager( search_info, embeddings=embeddings, + field_name_embedding="embedding3", ) test_io = io.BytesIO(b"test content") @@ -303,7 +309,7 @@ async def mock_upload_documents(self, documents): ) assert len(documents_uploaded) == 1, "It should have uploaded one document" - assert documents_uploaded[0]["embedding"] == [ + assert documents_uploaded[0]["embedding3"] == [ 0.0023064255, -0.009327292, -0.0028842222, diff --git a/tests/test_upload.py b/tests/test_upload.py index 0a9b1ef34a..9a758c1f0a 100644 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -75,7 +75,7 @@ async def mock_create_client(self, *args, **kwargs): object="embedding", ) ], - model="text-embedding-ada-002", + model="text-embedding-3-large", usage=Usage(prompt_tokens=8, total_tokens=8), ) )