Skip to content

Commit

Permalink
fix ai search (#119)
Browse files Browse the repository at this point in the history
* fix ai search

* allow names to be specified for indexer, skillset, and datasource; set defaults based on index name
  • Loading branch information
robch authored Nov 14, 2023
1 parent 6166c6c commit 59ea450
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 14 deletions.
2 changes: 2 additions & 0 deletions src/ai/.x/help/search.index.update
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ USAGE: ai index update [...]
AZURE SEARCH DATA SOURCE
--data-source-connection NAME (see: ai help search data source connection)
--blob-container ENDPOINT/NAME (see: ai help search data source blob container)
--indexer-name NAME (see: ai help search indexer name)
--skillset-name NAME (see: ai help search skillset name)
--id-field NAME (see: ai help search id field name)
--content-field NAME (see: ai help search content field name)
--vector-field NAME (see: ai help search vector field name)
Expand Down
1 change: 1 addition & 0 deletions src/ai/commands/parsers/search_command_parser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ public CommonSearchNamedValueTokenParsers() : base(

BlobContainerToken.Parser(),
SearchIndexerDataSourceConnectionNameToken.Parser(),
SearchIndexerSkillsetNameToken.Parser(),
IndexIdFieldNameToken.Parser(),
IndexContentFieldNameToken.Parser(),
IndexVectorFieldNameToken.Parser(),
Expand Down
28 changes: 14 additions & 14 deletions src/ai/commands/search_command.cs
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,15 @@ private void DoIndexUpdate()
var embeddingsEndpoint = DemandEmbeddingsEndpointUri(action, command);
var embeddingsApiKey = DemandEmbeddingsApiKey(action, command);
var embeddingModelDeployment = SearchEmbeddingModelDeploymentNameToken.Data().Demand(_values, action, command, checkConfig: "embedding.model.deployment.name");
var dataSourceConnectionName = SearchIndexerDataSourceConnectionNameToken.Data().GetOrDefault(_values, "datasource");
var dataSourceConnectionName = SearchIndexerDataSourceConnectionNameToken.Data().GetOrDefault(_values, $"{searchIndexName}-datasource");
var skillsetName = SearchIndexerSkillsetNameToken.Data().GetOrDefault(_values, $"{searchIndexName}-skillset");
var indexerName = SearchIndexerNameToken.Data().GetOrDefault(_values, $"{searchIndexName}-indexer");

var idFieldName = IndexIdFieldNameToken.Data().GetOrDefault(_values, "id");
var contentFieldName = IndexContentFieldNameToken.Data().GetOrDefault(_values, "content");
var vectorFieldName = IndexVectorFieldNameToken.Data().GetOrDefault(_values, "contentVector");

output = DoIndexUpdateWithAISearch(aiServicesApiKey, searchEndpoint, searchApiKey, embeddingsEndpoint, embeddingModelDeployment, embeddingsApiKey, searchIndexName, dataSourceConnectionName, blobContainer, pattern, idFieldName, contentFieldName, vectorFieldName).Result;
output = DoIndexUpdateWithAISearch(aiServicesApiKey, searchEndpoint, searchApiKey, embeddingsEndpoint, embeddingModelDeployment, embeddingsApiKey, searchIndexName, dataSourceConnectionName, blobContainer, pattern, skillsetName, indexerName, idFieldName, contentFieldName, vectorFieldName).Result;
}
else if (useSK)
{
Expand Down Expand Up @@ -162,15 +164,15 @@ private string DoIndexUpdateWithGenAi(string subscription, string groupName, str
return PythonSDKWrapper.UpdateMLIndex(_values, subscription, groupName, projectName, indexName, embeddingModelDeployment, embeddingModelName, dataFiles, externalSourceUrl);
}

private async Task<string> DoIndexUpdateWithAISearch(string aiServicesApiKey, string searchEndpoint, string searchApiKey, string embeddingsEndpoint, string embeddingsDeployment, string embeddingsApiKey, string searchIndexName, string dataSourceConnectionName, string blobContainer, string pattern, string idFieldName, string contentFieldName, string vectorFieldName)
private async Task<string> DoIndexUpdateWithAISearch(string aiServicesApiKey, string searchEndpoint, string searchApiKey, string embeddingsEndpoint, string embeddingsDeployment, string embeddingsApiKey, string searchIndexName, string dataSourceConnectionName, string blobContainer, string pattern, string skillsetName, string indexerName, string idFieldName, string contentFieldName, string vectorFieldName)
{
var (connectionString, containerName) = await UploadFilesToBlobContainer(blobContainer, pattern);

Console.WriteLine("Connecting to Search ...");
var datasourceIndex = PrepGetSearchIndex(embeddingsEndpoint, embeddingsDeployment, embeddingsApiKey, searchIndexName, idFieldName, contentFieldName, vectorFieldName);
var dataSource = PrepGetDataSourceConnection(dataSourceConnectionName, connectionString, containerName);
var skillset = PrepGetSkillset(aiServicesApiKey, embeddingsEndpoint, embeddingsDeployment, embeddingsApiKey, idFieldName, contentFieldName, vectorFieldName, datasourceIndex);
var indexer = PrepGetIndexer(datasourceIndex, dataSource, skillset);
var skillset = PrepGetSkillset(skillsetName, aiServicesApiKey, embeddingsEndpoint, embeddingsDeployment, embeddingsApiKey, idFieldName, contentFieldName, vectorFieldName, datasourceIndex);
var indexer = PrepGetIndexer(indexerName, datasourceIndex, dataSource, skillset);

Uri endpoint = new Uri(searchEndpoint);
AzureKeyCredential credential = new AzureKeyCredential(searchApiKey);
Expand Down Expand Up @@ -365,7 +367,7 @@ private static SearchIndexerDataSourceConnection PrepGetDataSourceConnection(str
new SearchIndexerDataContainer(dataSourceContainerName));
}

private static SearchIndexerSkillset PrepGetSkillset(string aiServicesApiKey, string embeddingsEndpoint, string embeddingsDeployment, string embeddingsApiKey, string idFieldName, string contentFieldName, string vectorFieldName, SearchIndex datasourceIndex)
private static SearchIndexerSkillset PrepGetSkillset(string skillsetName, string aiServicesApiKey, string embeddingsEndpoint, string embeddingsDeployment, string embeddingsApiKey, string idFieldName, string contentFieldName, string vectorFieldName, SearchIndex datasourceIndex)
{
var useOcr = true;

Expand Down Expand Up @@ -394,11 +396,9 @@ private static SearchIndexerSkillset PrepGetSkillset(string aiServicesApiKey, st
InsertPostTag = " "
};


var splitSkill = new SplitSkill(
new List<InputFieldMappingEntry> {
new InputFieldMappingEntry("text") { Source = useOcr ? "/document/mergedText" : "/document/content" },
new InputFieldMappingEntry("languageCode") { Source = "/document/language" }
new InputFieldMappingEntry("text") { Source = useOcr ? "/document/mergedText" : "/document/content" }
},
new List<OutputFieldMappingEntry> {
new OutputFieldMappingEntry("textItems") { TargetName = "pages"}
Expand All @@ -415,7 +415,7 @@ private static SearchIndexerSkillset PrepGetSkillset(string aiServicesApiKey, st
new InputFieldMappingEntry("text") { Source = "/document/pages/*" }
},
new List<OutputFieldMappingEntry> {
new OutputFieldMappingEntry("embedding") { TargetName = vectorFieldName}
new OutputFieldMappingEntry("embedding") { TargetName = "vector" }
}) {
Context = "/document/pages/*",
ResourceUri = new Uri(embeddingsEndpoint),
Expand All @@ -442,20 +442,20 @@ private static SearchIndexerSkillset PrepGetSkillset(string aiServicesApiKey, st
};

var skillset = !string.IsNullOrEmpty(aiServicesApiKey)
? new SearchIndexerSkillset("datasourceskillset", skills) {
? new SearchIndexerSkillset(skillsetName, skills) {
CognitiveServicesAccount = new CognitiveServicesAccountKey(aiServicesApiKey),
IndexProjections = indexProjections
}
: new SearchIndexerSkillset("datasourceskillset", skills) {
: new SearchIndexerSkillset(skillsetName, skills) {
IndexProjections = indexProjections
};

return skillset;
}

private static SearchIndexer PrepGetIndexer(SearchIndex datasourceIndex, SearchIndexerDataSourceConnection dataSource, SearchIndexerSkillset skillset)
private static SearchIndexer PrepGetIndexer(string indexerName, SearchIndex datasourceIndex, SearchIndexerDataSourceConnection dataSource, SearchIndexerSkillset skillset)
{
return new SearchIndexer("datasourceindexer", dataSource.Name, datasourceIndex.Name)
return new SearchIndexer(indexerName, dataSource.Name, datasourceIndex.Name)
{
Description = "Data indexer",
Schedule = new IndexingSchedule(TimeSpan.FromDays(1))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//

namespace Azure.AI.Details.Common.CLI
{
public class SearchIndexerNameToken
{
public static NamedValueTokenData Data() => new NamedValueTokenData(_optionName, _fullName, _optionExample, _requiredDisplayName);
public static INamedValueTokenParser Parser() => new NamedValueTokenParser(_optionName, _fullName, "011;01", "1");

private const string _requiredDisplayName = "indexer name";
private const string _optionName = "--indexer-name";
private const string _optionExample = "NAME";
private const string _fullName = "search.indexer.name";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//

namespace Azure.AI.Details.Common.CLI
{
public class SearchIndexerSkillsetNameToken
{
public static NamedValueTokenData Data() => new NamedValueTokenData(_optionName, _fullName, _optionExample, _requiredDisplayName);
public static INamedValueTokenParser Parser() => new NamedValueTokenParser(_optionName, _fullName, "0011;0010", "1");

private const string _requiredDisplayName = "skillset name";
private const string _optionName = "--skillset-name";
private const string _optionExample = "NAME";
private const string _fullName = "search.indexer.skillset.name";
}
}

0 comments on commit 59ea450

Please sign in to comment.