diff --git a/cmd/internal/config_test.go b/cmd/internal/config_test.go index 91d0ffc8bcd..e1266cf34cf 100644 --- a/cmd/internal/config_test.go +++ b/cmd/internal/config_test.go @@ -1862,7 +1862,7 @@ func TestPrebuiltTools(t *testing.T) { }, "data-products": tools.ToolsetConfig{ Name: "data-products", - ToolNames: []string{"search_entries", "lookup_entry", "search_aspect_types", "lookup_context", "list_data_products", "get_data_product"}, + ToolNames: []string{"search_entries", "lookup_entry", "search_aspect_types", "lookup_context", "list_data_products", "get_data_product", "list_data_assets"}, }, "enrich": tools.ToolsetConfig{ Name: "enrich", diff --git a/cmd/internal/imports.go b/cmd/internal/imports.go index 83207d90746..7240d282947 100644 --- a/cmd/internal/imports.go +++ b/cmd/internal/imports.go @@ -170,6 +170,7 @@ import ( _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexgetdiscoveryresults" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexgetoperation" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexgetrunstatus" + _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexlistdataassets" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexlistdataproducts" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexlookupcontext" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexlookupentry" diff --git a/docs/KNOWLEDGE_CATALOG_README.md b/docs/KNOWLEDGE_CATALOG_README.md index 96ead324866..c70db96822b 100644 --- a/docs/KNOWLEDGE_CATALOG_README.md +++ b/docs/KNOWLEDGE_CATALOG_README.md @@ -7,7 +7,7 @@ The Knowledge Catalog (formerly known as Dataplex) Model Context Protocol (MCP) An editor configured to use the Knowledge Catalog MCP server can use its AI capabilities to help you: - **Search Catalog** - Search for entries in Knowledge Catalog -- **Explore Metadata** - Lookup specific entries, search aspect types, and list/retrieve Data Products +- **Explore Metadata** - Lookup specific entries, search aspect types, and list/retrieve Data Products and Data Assets - **Data Quality** - Search for data quality scans ## Prerequisites @@ -44,6 +44,7 @@ Once configured, the MCP server will automatically provide Knowledge Catalog cap * "Look up details for the entry 'projects/my-project/locations/us-central1/entryGroups/my-group/entries/my-entry'." * "List all Data Products." * "Get details of the Data Product 'projects/my-project/locations/us-central1/dataProducts/my-product'." +* "List Data Assets for the Data Product 'projects/my-project/locations/us-central1/dataProducts/my-product'." ## Server Capabilities @@ -58,6 +59,7 @@ The Knowledge Catalog MCP server provides the following tools: | `search_dq_scans` | Search for Data Quality scans. | | `list_data_products` | List Data Products for the current project. | | `get_data_product` | Retrieve a specific Data Product. | +| `list_data_assets` | List Data Assets under a Data Product. | ## Custom MCP Server Configuration diff --git a/docs/en/integrations/knowledge-catalog/prebuilt-configs/knowledge-catalog.md b/docs/en/integrations/knowledge-catalog/prebuilt-configs/knowledge-catalog.md index 5b5a3395fa1..bb25a54fd90 100644 --- a/docs/en/integrations/knowledge-catalog/prebuilt-configs/knowledge-catalog.md +++ b/docs/en/integrations/knowledge-catalog/prebuilt-configs/knowledge-catalog.md @@ -23,6 +23,7 @@ aliases: * `search_dq_scans`: Searches for data quality scans in Dataplex. * `list_data_products`: Lists Data Products across all locations. * `get_data_product`: Retrieves a specific Data Product. + * `list_data_assets`: Lists Data Assets under a Data Product. * `generate_data_insights`: Creates a new Dataplex Data Documentation scan template and triggers the run. * `get_data_insights`: Retrieves the final generated data insights for a completed scan. * `generate_data_profile`: Creates a new Dataplex Data Profile scan template and triggers the run. @@ -35,5 +36,5 @@ aliases: * `get_run_status`: Retrieves the execution status of the latest background job run. * **Toolsets:** * `discovery`: Metadata discovery and search toolset (`search_entries`, `lookup_entry`, `search_aspect_types`, `lookup_context`, `search_dq_scans`). - * `data-products`: Data Products and Data Assets curation and management toolset (`search_entries`, `lookup_entry`, `search_aspect_types`, `lookup_context`, `list_data_products`, `get_data_product`). + * `data-products`: Data Products and Data Assets curation and management toolset (`search_entries`, `lookup_entry`, `search_aspect_types`, `lookup_context`, `list_data_products`, `get_data_product`, `list_data_assets`). * `enrich`: Metadata enrichment pipeline orchestration and execution toolset (`search_entries`, `lookup_entry`, `lookup_context`, `generate_data_insights`, `get_data_insights`, `generate_data_profile`, `get_data_profile`, `discover_metadata`, `get_discovery_results`, `check_data_quality`, `get_data_quality_results`, `get_operation`, `get_run_status`). diff --git a/docs/en/integrations/knowledge-catalog/source.md b/docs/en/integrations/knowledge-catalog/source.md index 11aac2dee32..343e17ecbac 100644 --- a/docs/en/integrations/knowledge-catalog/source.md +++ b/docs/en/integrations/knowledge-catalog/source.md @@ -388,4 +388,12 @@ This abbreviated syntax works for the qualified predicates except for `label` in 2. You must provide `locationId` and `dataProductId`. ### Response 1. Present the retrieved metadata for the Data Product, including its display name, description, owner emails, asset count, labels, and access groups. + +## Tool: list_data_assets +### Request +1. Use this tool to retrieve all Data Assets under a specific Data Product. +2. You must provide `locationId` and `dataProductId`. +3. You can optionally filter the listed assets using `filter` or limit the response using `pageSize`. +### Response +1. Present the retrieved list of Data Assets, including their names, resources, and labels. ``` diff --git a/docs/en/integrations/knowledge-catalog/tools/knowledge-catalog-list-data-assets.md b/docs/en/integrations/knowledge-catalog/tools/knowledge-catalog-list-data-assets.md new file mode 100644 index 00000000000..33bd4cb0bbc --- /dev/null +++ b/docs/en/integrations/knowledge-catalog/tools/knowledge-catalog-list-data-assets.md @@ -0,0 +1,71 @@ +--- +title: "dataplex-list-data-assets" +type: docs +weight: 1 +description: > + A "dataplex-list-data-assets" tool allows to list Data Assets under a Data Product. +--- + +## About + +A `dataplex-list-data-assets` tool retrieves a list of Data Assets associated with a specific Data Product in Knowledge Catalog (formerly known as Dataplex). + +View the [Data Products guide][guide] for more information. + +[guide]: https://docs.cloud.google.com/dataplex/docs/data-products-overview + +## Compatible Sources + +{{< compatible-sources >}} + +## Requirements + +### IAM Permissions + +Knowledge Catalog uses [Identity and Access Management (IAM)][iam-overview] to control +user and group access to Knowledge Catalog resources. Toolbox will use your +[Application Default Credentials (ADC)][adc] to authorize and authenticate when +interacting with [Knowledge Catalog][dataplex-docs]. + +In addition to [setting the ADC for your server][set-adc], you need to ensure +the IAM identity has been given the correct IAM permissions for the tasks you +intend to perform. See [Knowledge Catalog IAM permissions][iam-permissions] +and [Knowledge Catalog IAM roles][iam-roles] for more information on +applying IAM permissions and roles to an identity. + +[iam-overview]: https://cloud.google.com/dataplex/docs/iam-and-access-control +[adc]: https://cloud.google.com/docs/authentication#adc +[set-adc]: https://cloud.google.com/docs/authentication/provide-credentials-adc +[iam-permissions]: https://cloud.google.com/dataplex/docs/iam-permissions +[iam-roles]: https://cloud.google.com/dataplex/docs/iam-roles +[dataplex-docs]: https://cloud.google.com/dataplex + +## Parameters + +The `dataplex-list-data-assets` tool has the following parameters: + +| **field** | **type** | **required** | **description** | +| ------------- | -------- | ------------ | --------------------------------------------------------------- | +| locationId | string | true | The location ID (e.g. `us`, `us-central1`) of the Data Product. | +| dataProductId | string | true | The unique ID of the parent Data Product. | +| filter | string | false | Filter string to list data assets. | +| pageSize | integer | false | Number of returned data assets in the page. | +| orderBy | string | false | Specifies the ordering of results. | + +## Example + +```yaml +kind: tool +name: list_data_assets +type: dataplex-list-data-assets +source: my-dataplex-source +description: Use this tool to list Data Assets under a Data Product. +``` + +## Reference + +| **field** | **type** | **required** | **description** | +| ----------- | -------- | ------------ | -------------------------------------------------- | +| type | string | true | Must be "dataplex-list-data-assets". | +| source | string | true | Name of the source the tool should execute on. | +| description | string | true | Description of the tool that is passed to the LLM. | diff --git a/internal/prebuiltconfigs/tools/dataplex.yaml b/internal/prebuiltconfigs/tools/dataplex.yaml index 37161bc6dca..54083216069 100644 --- a/internal/prebuiltconfigs/tools/dataplex.yaml +++ b/internal/prebuiltconfigs/tools/dataplex.yaml @@ -60,6 +60,12 @@ source: dataplex-source description: Retrieves specific metadata regarding a Data Product. --- kind: tool +name: list_data_assets +type: dataplex-list-data-assets +source: dataplex-source +description: Lists Data Assets under a Data Product. +--- +kind: tool name: generate_data_insights type: dataplex-generate-data-insights source: dataplex-source @@ -252,6 +258,7 @@ tools: - lookup_context - list_data_products - get_data_product +- list_data_assets --- kind: toolset name: enrich diff --git a/internal/sources/dataplex/dataplex.go b/internal/sources/dataplex/dataplex.go index 3bfddb90a1a..ba27942dc0e 100644 --- a/internal/sources/dataplex/dataplex.go +++ b/internal/sources/dataplex/dataplex.go @@ -479,6 +479,68 @@ func (s *Source) GetDataProduct(ctx context.Context, locationID string, dataProd }, nil } +type DataAssetSummary struct { + LocationID string `json:"locationId"` + DataProductID string `json:"dataProductId"` + DataAsset string `json:"dataAsset"` + ResourceUri string `json:"resourceUri"` + Labels map[string]string `json:"labels"` +} + +func (s *Source) ListDataAssets( + ctx context.Context, + locationId string, + dataProductId string, + filter string, + pageSize int, + orderBy string, +) ([]*DataAssetSummary, error) { + if s.GetDataProductClient() == nil { + return nil, fmt.Errorf("dataplex data product client is not initialized") + } + if pageSize <= 0 { + return nil, fmt.Errorf("pageSize must be positive: %d", pageSize) + } + parent := fmt.Sprintf("projects/%s/locations/%s/dataProducts/%s", s.ProjectID(), locationId, dataProductId) + req := &dataplexpb.ListDataAssetsRequest{ + Parent: parent, + Filter: filter, + PageSize: int32(pageSize), + OrderBy: orderBy, + } + + it := s.GetDataProductClient().ListDataAssets(ctx, req) + var results []*DataAssetSummary + + for len(results) < pageSize { + asset, err := it.Next() + if err == iterator.Done { + break + } + if err != nil { + if st, ok := grpcstatus.FromError(err); ok { + return nil, fmt.Errorf("failed to list data assets: code=%s message=%s", st.Code(), st.Message()) + } + return nil, fmt.Errorf("failed to list data assets: %w", err) + } + parts := strings.Split(asset.GetName(), "/") + var locId, prodId, assetId string + if len(parts) >= 8 && parts[0] == "projects" && parts[2] == "locations" && parts[4] == "dataProducts" && parts[6] == "dataAssets" { + locId = parts[3] + prodId = parts[5] + assetId = parts[7] + } + results = append(results, &DataAssetSummary{ + LocationID: locId, + DataProductID: prodId, + DataAsset: assetId, + ResourceUri: asset.GetResource(), + Labels: asset.GetLabels(), + }) + } + return results, nil +} + func (s *Source) GenerateDataInsights(ctx context.Context, location, resourcePath string, publish bool) (string, error) { parent := fmt.Sprintf("projects/%s/locations/%s", s.ProjectID(), location) dataScanID := fmt.Sprintf("nq-doc-%s", uuid.New().String()) diff --git a/internal/tools/dataplex/dataplexlistdataassets/dataplexlistdataassets.go b/internal/tools/dataplex/dataplexlistdataassets/dataplexlistdataassets.go new file mode 100644 index 00000000000..a468d165acf --- /dev/null +++ b/internal/tools/dataplex/dataplexlistdataassets/dataplexlistdataassets.go @@ -0,0 +1,142 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dataplexlistdataassets + +import ( + "context" + "fmt" + "net/http" + + yaml "github.com/goccy/go-yaml" + "github.com/googleapis/mcp-toolbox/internal/sources/dataplex" + "github.com/googleapis/mcp-toolbox/internal/tools" + "github.com/googleapis/mcp-toolbox/internal/util" + "github.com/googleapis/mcp-toolbox/internal/util/parameters" +) + +const resourceType string = "dataplex-list-data-assets" + +func init() { + if !tools.Register(resourceType, newConfig) { + panic(fmt.Sprintf("tool type %q already registered", resourceType)) + } +} + +func newConfig(ctx context.Context, name string, decoder *yaml.Decoder) (tools.ToolConfig, error) { + actual := Config{ConfigBase: tools.ConfigBase{Name: name}} + if err := decoder.DecodeContext(ctx, &actual); err != nil { + return nil, err + } + return actual, nil +} + +type compatibleSource interface { + ListDataAssets(ctx context.Context, locationId string, dataProductId string, filter string, pageSize int, orderBy string) ([]*dataplex.DataAssetSummary, error) +} + +type Config struct { + tools.ConfigBase `yaml:",inline"` + Type string `yaml:"type" validate:"required"` + Source string `yaml:"source" validate:"required"` + Annotations *tools.ToolAnnotations `yaml:"annotations,omitempty"` +} + +// validate interface +var _ tools.ToolConfig = Config{} + +func (cfg Config) ToolConfigType() string { + return resourceType +} + +func (cfg Config) Initialize(ctx context.Context) (tools.Tool, error) { + locationId := parameters.NewStringParameter("locationId", "Required. The location ID (e.g., 'us', 'us-central1') where the Data Product is located.") + dataProductId := parameters.NewStringParameter("dataProductId", "Required. The unique ID of the parent Data Product.") + filter := parameters.NewStringParameter( + "filter", + "Optional. Filter string to list data assets. Based on the AIP-160 proposal. Use '=' for exact, and ':' for contains matching. String literals must be enclosed within \"\". Matching across all fields at once is not yet supported.", + parameters.WithStringDefault(""), + ) + pageSize := parameters.NewIntParameter( + "pageSize", + "Optional. Number of returned data assets in the page.", + parameters.WithIntDefault(10), + ) + orderBy := parameters.NewStringParameter( + "orderBy", + "Optional. Specifies the ordering of results.", + parameters.WithStringDefault(""), + ) + params := parameters.Parameters{locationId, dataProductId, filter, pageSize, orderBy} + + t := Tool{ + BaseTool: tools.NewBaseTool( + cfg, + tools.GetAnnotationsOrDefault(cfg.Annotations, tools.NewReadOnlyAnnotations), + tools.Manifest{ + Description: cfg.Description, + Parameters: params.Manifest(), + AuthRequired: cfg.AuthRequired, + }, + params, + ), + } + return t, nil +} + +// validate interface +var _ tools.Tool = Tool{} + +type Tool struct { + tools.BaseTool[Config] +} + +func (t Tool) ToConfig() tools.ToolConfig { + return t.Cfg +} + +func (t Tool) Invoke(ctx context.Context, resourceMgr tools.SourceProvider, params parameters.ParamValues, accessToken tools.AccessToken) (any, util.ToolboxError) { + source, err := tools.GetCompatibleSource[compatibleSource](resourceMgr, t.Cfg.Source, t.Cfg.Name, t.Cfg.Type) + if err != nil { + return nil, util.NewClientServerError("source used is not compatible with the tool", http.StatusInternalServerError, err) + } + + paramsMap := params.AsMap() + locationId, ok := paramsMap["locationId"].(string) + if !ok { + return nil, util.NewAgentError(fmt.Sprintf("error casting 'locationId' parameter: %v", paramsMap["locationId"]), nil) + } + dataProductId, ok := paramsMap["dataProductId"].(string) + if !ok { + return nil, util.NewAgentError(fmt.Sprintf("error casting 'dataProductId' parameter: %v", paramsMap["dataProductId"]), nil) + } + filter, ok := paramsMap["filter"].(string) + if !ok { + return nil, util.NewAgentError(fmt.Sprintf("error casting 'filter' parameter: %v", paramsMap["filter"]), nil) + } + pageSize, ok := paramsMap["pageSize"].(int) + if !ok { + return nil, util.NewAgentError(fmt.Sprintf("error casting 'pageSize' parameter: %v", paramsMap["pageSize"]), nil) + } + orderBy, ok := paramsMap["orderBy"].(string) + if !ok { + return nil, util.NewAgentError(fmt.Sprintf("error casting 'orderBy' parameter: %v", paramsMap["orderBy"]), nil) + } + + resp, err := source.ListDataAssets(ctx, locationId, dataProductId, filter, pageSize, orderBy) + if err != nil { + return nil, util.ProcessGcpError(err) + } + return resp, nil +} diff --git a/internal/tools/dataplex/dataplexlistdataassets/dataplexlistdataassets_test.go b/internal/tools/dataplex/dataplexlistdataassets/dataplexlistdataassets_test.go new file mode 100644 index 00000000000..259d3fa553d --- /dev/null +++ b/internal/tools/dataplex/dataplexlistdataassets/dataplexlistdataassets_test.go @@ -0,0 +1,71 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dataplexlistdataassets_test + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/googleapis/mcp-toolbox/internal/server" + "github.com/googleapis/mcp-toolbox/internal/testutils" + "github.com/googleapis/mcp-toolbox/internal/tools" + "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexlistdataassets" +) + +func TestParseFromYamlDataplexListDataAssets(t *testing.T) { + ctx, err := testutils.ContextWithNewLogger() + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + tcs := []struct { + desc string + in string + want server.ToolConfigs + }{ + { + desc: "basic example", + in: ` + kind: tool + name: example_tool + type: dataplex-list-data-assets + source: my-instance + description: some description + `, + want: server.ToolConfigs{ + "example_tool": dataplexlistdataassets.Config{ + ConfigBase: tools.ConfigBase{ + Name: "example_tool", + Description: "some description", + AuthRequired: []string{}, + }, + Type: "dataplex-list-data-assets", + Source: "my-instance", + }, + }, + }, + } + for _, tc := range tcs { + t.Run(tc.desc, func(t *testing.T) { + // Parse contents + _, _, _, got, _, _, err := server.UnmarshalResourceConfig(ctx, testutils.FormatYaml(tc.in)) + if err != nil { + t.Fatalf("unable to unmarshal: %s", err) + } + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Fatalf("incorrect parse: diff %v", diff) + } + }) + } +} diff --git a/tests/dataplex/dataplex_integration_test.go b/tests/dataplex/dataplex_integration_test.go index 9bdbf89ffea..e6c22a013c8 100644 --- a/tests/dataplex/dataplex_integration_test.go +++ b/tests/dataplex/dataplex_integration_test.go @@ -51,6 +51,7 @@ var ( DataplexSearchDataQualityScansToolType = "dataplex-search-dq-scans" DataplexListDataProductsToolType = "dataplex-list-data-products" DataplexGetDataProductToolType = "dataplex-get-data-product" + DataplexListDataAssetsToolType = "dataplex-list-data-assets" DataplexGenerateDataProfileToolType = "dataplex-generate-data-profile" DataplexGetDataProfileToolType = "dataplex-get-data-profile" DataplexGetOperationToolType = "dataplex-get-operation" @@ -274,15 +275,29 @@ func TestDataplexToolEndpoints(t *testing.T) { dataScanId := fmt.Sprintf("param-data-scan-%s", strings.ReplaceAll(uuid.New().String(), "-", "")) dataProductId1 := fmt.Sprintf("param-data-product-%s", strings.ReplaceAll(uuid.New().String(), "-", "")) dataProductId2 := fmt.Sprintf("param-data-product-%s", strings.ReplaceAll(uuid.New().String(), "-", "")) + dataAssetId := fmt.Sprintf("param-data-asset-%s", strings.ReplaceAll(uuid.New().String(), "-", "")) bucketName := fmt.Sprintf("temp-toolbox-test-%s", strings.ReplaceAll(uuid.New().String(), "-", "")) - var teardowns []func(*testing.T) - teardowns = append(teardowns, setupBigQueryTable(t, ctx, bigqueryClient, datasetName, tableName)) - teardowns = append(teardowns, setupDataplexThirdPartyAspectType(t, ctx, dataplexClient, aspectTypeId)) - teardowns = append(teardowns, setupDataplexSearchDataQualityScan(t, ctx, dataplexDataScanClient, dataScanId, datasetName, tableName)) - teardowns = append(teardowns, setupDataplexDataProduct(t, ctx, dataplexDataProductClient, dataProductId1)) - teardowns = append(teardowns, setupDataplexDataProduct(t, ctx, dataplexDataProductClient, dataProductId2)) - teardowns = append(teardowns, setupGcsBucket(t, ctx, DataplexProject, bucketName)) + teardownTable := setupBigQueryTable(t, ctx, bigqueryClient, datasetName, tableName) + teardownAspectType := setupDataplexThirdPartyAspectType(t, ctx, dataplexClient, aspectTypeId) + teardownDataScan := setupDataplexSearchDataQualityScan(t, ctx, dataplexDataScanClient, dataScanId, datasetName, tableName) + teardownDataProduct1 := setupDataplexDataProduct(t, ctx, dataplexDataProductClient, dataProductId1) + teardownDataProduct2 := setupDataplexDataProduct(t, ctx, dataplexDataProductClient, dataProductId2) + teardownDataAsset := setupDataplexDataAsset(t, ctx, dataplexDataProductClient, fmt.Sprintf("projects/%s/locations/us/dataProducts/%s", DataplexProject, dataProductId1), dataAssetId, datasetName, tableName) + teardownBucket := setupGcsBucket(t, ctx, DataplexProject, bucketName) + + teardowns := []func(*testing.T){ + teardownTable, + teardownAspectType, + teardownDataScan, + teardownDataProduct2, + // Sequence asset deletion before its parent data product to avoid API precondition failure + func(t *testing.T) { + teardownDataAsset(t) + teardownDataProduct1(t) + }, + teardownBucket, + } time.Sleep(1 * time.Minute) // wait for table and aspect type to be ingested // Execute teardowns concurrently using a WaitGroup to minimize overall test cleanup duration @@ -322,6 +337,7 @@ func TestDataplexToolEndpoints(t *testing.T) { runDataplexSearchDataQualityScansToolInvokeTest(t, dataScanId, tableName, datasetName) runDataplexListDataProductsToolInvokeTest(t, dataProductId1, dataProductId2) runDataplexGetDataProductToolInvokeTest(t, dataProductId1) + runDataplexListDataAssetsToolInvokeTest(t, dataProductId1, dataAssetId) runDataplexEnrichmentToolInvokeTest(t, tableName, datasetName, bucketName, dataplexDataScanClient) } @@ -388,7 +404,7 @@ func setupBigQueryTable(t *testing.T, ctx context.Context, client *bigqueryapi.C } func setupDataplexDataProduct(t *testing.T, ctx context.Context, client *dataplex.DataProductClient, dataProductId string) func(*testing.T) { - parent := fmt.Sprintf("projects/%s/locations/us-central1", DataplexProject) + parent := fmt.Sprintf("projects/%s/locations/us", DataplexProject) ownerEmail := tests.ServiceAccountEmail if ownerEmail == "" { t.Fatalf("Service account email is required, but tests.ServiceAccountEmail is empty") @@ -446,6 +462,50 @@ func setupDataplexDataProduct(t *testing.T, ctx context.Context, client *dataple return teardown } +func setupDataplexDataAsset(t *testing.T, ctx context.Context, client *dataplex.DataProductClient, parentProductPath string, dataAssetId string, datasetName string, tableName string) func(*testing.T) { + resource := fmt.Sprintf("//bigquery.googleapis.com/projects/%s/datasets/%s/tables/%s", DataplexProject, datasetName, tableName) + createReq := &dataplexpb.CreateDataAssetRequest{ + Parent: parentProductPath, + DataAssetId: dataAssetId, + DataAsset: &dataplexpb.DataAsset{ + Resource: resource, + Labels: map[string]string{ + "env": "test", + }, + }, + } + + teardown := func(t *testing.T) { + cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cleanupCancel() + deleteReq := &dataplexpb.DeleteDataAssetRequest{ + Name: fmt.Sprintf("%s/dataAssets/%s", parentProductPath, dataAssetId), + } + op, err := client.DeleteDataAsset(cleanupCtx, deleteReq) + if err != nil { + t.Errorf("Failed to initiate DeleteDataAsset for %s: %v", dataAssetId, err) + return + } + err = op.Wait(cleanupCtx) + if err != nil { + t.Logf("Warning: Failed to wait for DeleteDataAsset for %s: %v", dataAssetId, err) + } + } + + op, err := client.CreateDataAsset(ctx, createReq) + if err != nil { + t.Fatalf("Failed to initiate CreateDataAsset for %s: %v", dataAssetId, err) + } + + _, err = op.Wait(ctx) + if err != nil { + teardown(t) + t.Fatalf("Failed to wait for CreateDataAsset for %s: %v", dataAssetId, err) + } + + return teardown +} + func setupGcsBucket(t *testing.T, ctx context.Context, project string, bucketName string) func(*testing.T) { cred, err := google.FindDefaultCredentials(ctx) if err != nil { @@ -590,6 +650,17 @@ func getDataplexToolsConfig(sourceConfig map[string]any) map[string]any { "description": "Simple dataplex get data product tool to test end to end functionality.", "authRequired": []string{"my-google-auth"}, }, + "my-dataplex-list-data-assets-tool": map[string]any{ + "type": DataplexListDataAssetsToolType, + "source": "my-dataplex-instance", + "description": "Simple dataplex list data assets tool to test end to end functionality.", + }, + "my-auth-dataplex-list-data-assets-tool": map[string]any{ + "type": DataplexListDataAssetsToolType, + "source": "my-dataplex-instance", + "description": "Simple dataplex list data assets tool to test end to end functionality.", + "authRequired": []string{"my-google-auth"}, + }, "my-dataplex-generate-data-profile-tool": map[string]any{ "type": DataplexGenerateDataProfileToolType, "source": "my-dataplex-instance", @@ -682,6 +753,11 @@ func runDataplexToolGetTest(t *testing.T) { toolName: "my-dataplex-get-data-product-tool", expectedParams: []string{"locationId", "dataProductId"}, }, + { + name: "get my-dataplex-list-data-assets-tool", + toolName: "my-dataplex-list-data-assets-tool", + expectedParams: []string{"locationId", "dataProductId", "filter", "pageSize", "orderBy"}, + }, { name: "get my-dataplex-generate-data-profile-tool", toolName: "my-dataplex-generate-data-profile-tool", @@ -1501,7 +1577,7 @@ func runDataplexListDataProductsToolInvokeTest(t *testing.T, dataProductId1 stri requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"filter\":\"display_name:\\\"%s\\\"\"}", dataProductId1))), wantStatusCode: 200, expectResult: true, - wantLocationID: "us-central1", + wantLocationID: "us", wantDataProductID: dataProductId1, }, { @@ -1511,7 +1587,7 @@ func runDataplexListDataProductsToolInvokeTest(t *testing.T, dataProductId1 stri requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"pageSize\":1, \"filter\":\"display_name:\\\"%s\\\" OR display_name:\\\"%s\\\"\"}", dataProductId1, dataProductId2))), wantStatusCode: 200, expectResult: true, - wantLocationID: "us-central1", + wantLocationID: "us", wantDataProductID: "", }, { @@ -1621,27 +1697,27 @@ func runDataplexGetDataProductToolInvokeTest(t *testing.T, dataProductId string) name: "Success - Get Product (Authorized)", api: "http://127.0.0.1:5000/api/tool/my-auth-dataplex-get-data-product-tool/invoke", requestHeader: map[string]string{"my-google-auth_token": idToken}, - requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us-central1\",\"dataProductId\":\"%s\"}", dataProductId))), + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us\",\"dataProductId\":\"%s\"}", dataProductId))), wantStatusCode: 200, expectResult: true, - wantLocationID: "us-central1", + wantLocationID: "us", wantDataProductID: dataProductId, }, { name: "Success - Get Product (Un-authorized)", api: "http://127.0.0.1:5000/api/tool/my-dataplex-get-data-product-tool/invoke", requestHeader: map[string]string{}, - requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us-central1\",\"dataProductId\":\"%s\"}", dataProductId))), + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us\",\"dataProductId\":\"%s\"}", dataProductId))), wantStatusCode: 200, expectResult: true, - wantLocationID: "us-central1", + wantLocationID: "us", wantDataProductID: dataProductId, }, { name: "Failure - Invalid Authorization Token", api: "http://127.0.0.1:5000/api/tool/my-auth-dataplex-get-data-product-tool/invoke", requestHeader: map[string]string{"my-google-auth_token": "invalid_token"}, - requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us-central1\",\"dataProductId\":\"%s\"}", dataProductId))), + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us\",\"dataProductId\":\"%s\"}", dataProductId))), wantStatusCode: 401, expectResult: false, }, @@ -1649,7 +1725,7 @@ func runDataplexGetDataProductToolInvokeTest(t *testing.T, dataProductId string) name: "Failure - Without Authorization Token", api: "http://127.0.0.1:5000/api/tool/my-auth-dataplex-get-data-product-tool/invoke", requestHeader: map[string]string{}, - requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us-central1\",\"dataProductId\":\"%s\"}", dataProductId))), + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us\",\"dataProductId\":\"%s\"}", dataProductId))), wantStatusCode: 401, expectResult: false, }, @@ -1735,6 +1811,140 @@ func runDataplexGetDataProductToolInvokeTest(t *testing.T, dataProductId string) } } +func runDataplexListDataAssetsToolInvokeTest(t *testing.T, dataProductId string, dataAssetId string) { + idToken, err := tests.GetGoogleIdToken(t) + if err != nil { + t.Fatalf("error getting Google ID token: %s", err) + } + + testCases := []struct { + name string + api string + requestHeader map[string]string + requestBody io.Reader + wantStatusCode int + expectResult bool + wantLocationID string + wantDataProductID string + wantDataAsset string + }{ + { + name: "Success - List Data Assets (Authorized)", + api: "http://127.0.0.1:5000/api/tool/my-auth-dataplex-list-data-assets-tool/invoke", + requestHeader: map[string]string{"my-google-auth_token": idToken}, + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us\",\"dataProductId\":\"%s\"}", dataProductId))), + wantStatusCode: 200, + expectResult: true, + wantLocationID: "us", + wantDataProductID: dataProductId, + wantDataAsset: dataAssetId, + }, + { + name: "Success - List Data Assets (Un-authorized)", + api: "http://127.0.0.1:5000/api/tool/my-dataplex-list-data-assets-tool/invoke", + requestHeader: map[string]string{}, + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us\",\"dataProductId\":\"%s\"}", dataProductId))), + wantStatusCode: 200, + expectResult: true, + wantLocationID: "us", + wantDataProductID: dataProductId, + wantDataAsset: dataAssetId, + }, + { + name: "Failure - Invalid Authorization Token", + api: "http://127.0.0.1:5000/api/tool/my-auth-dataplex-list-data-assets-tool/invoke", + requestHeader: map[string]string{"my-google-auth_token": "invalid_token"}, + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us\",\"dataProductId\":\"%s\"}", dataProductId))), + wantStatusCode: 401, + expectResult: false, + }, + { + name: "Failure - Without Authorization Token", + api: "http://127.0.0.1:5000/api/tool/my-auth-dataplex-list-data-assets-tool/invoke", + requestHeader: map[string]string{}, + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us\",\"dataProductId\":\"%s\"}", dataProductId))), + wantStatusCode: 401, + expectResult: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + req, err := http.NewRequest(http.MethodPost, tc.api, tc.requestBody) + if err != nil { + t.Fatalf("unable to create request: %s", err) + } + req.Header.Add("Content-type", "application/json") + for k, v := range tc.requestHeader { + req.Header.Add(k, v) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("error when sending a request: %s", err) + } + defer resp.Body.Close() + if resp.StatusCode != tc.wantStatusCode { + t.Fatalf("response status code is not %d. It is %d", tc.wantStatusCode, resp.StatusCode) + } + if !tc.expectResult { + return + } + var result map[string]interface{} + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + t.Fatalf("error parsing response body: %s", err) + } + resultStr, ok := result["result"].(string) + if !ok { + t.Fatalf("expected 'result' field to be a string, got %T", result["result"]) + } + var entries []interface{} + if err := json.Unmarshal([]byte(resultStr), &entries); err != nil { + t.Fatalf("error unmarshalling result string: %v", err) + } + + if len(entries) != 1 { + t.Fatalf("expected exactly one entry, but got %d", len(entries)) + } + entry, ok := entries[0].(map[string]interface{}) + if !ok { + t.Fatalf("expected entry to be a map, got %T", entries[0]) + } + locID, ok := entry["locationId"].(string) + if !ok { + t.Fatalf("expected entry to have key 'locationId' as string, but it was not found or not a string in %v", entry) + } + if tc.wantLocationID != "" && locID != tc.wantLocationID { + t.Fatalf("expected locationId to be %q, got %q", tc.wantLocationID, locID) + } + prodID, ok := entry["dataProductId"].(string) + if !ok { + t.Fatalf("expected entry to have key 'dataProductId' as string, but it was not found or not a string in %v", entry) + } + if tc.wantDataProductID != "" && prodID != tc.wantDataProductID { + t.Fatalf("expected dataProductId to be %q, got %q", tc.wantDataProductID, prodID) + } + assetID, ok := entry["dataAsset"].(string) + if !ok { + t.Fatalf("expected entry to have key 'dataAsset' as string, but it was not found or not a string in %v", entry) + } + if tc.wantDataAsset != "" && assetID != tc.wantDataAsset { + t.Fatalf("expected dataAsset to be %q, got %q", tc.wantDataAsset, assetID) + } + + // Assert output is cleaned + if _, ok := entry["uid"]; ok { + t.Errorf("expected entry to NOT have 'uid' field, but it was found") + } + if _, ok := entry["etag"]; ok { + t.Errorf("expected entry to NOT have 'etag' field, but it was found") + } + if _, ok := entry["createTime"]; ok { + t.Errorf("expected entry to NOT have 'createTime' field, but it was found") + } + }) + } +} + func runDataplexEnrichmentToolInvokeTest(t *testing.T, tableName string, datasetName string, bucketName string, client *dataplex.DataScanClient) { ctx := context.Background() tableResource := fmt.Sprintf("//bigquery.googleapis.com/projects/%s/datasets/%s/tables/%s", DataplexProject, datasetName, tableName)