diff --git a/cmd/internal/config_test.go b/cmd/internal/config_test.go index 30f1b2a11df5..b5186c7ad40e 100644 --- a/cmd/internal/config_test.go +++ b/cmd/internal/config_test.go @@ -1860,6 +1860,10 @@ func TestPrebuiltTools(t *testing.T) { Name: "discovery", ToolNames: []string{"search_entries", "lookup_entry", "search_aspect_types", "lookup_context", "search_dq_scans"}, }, + "data-products": tools.ToolsetConfig{ + Name: "data-products", + ToolNames: []string{"search_entries", "lookup_entry", "search_aspect_types", "lookup_context", "list_data_products", "get_data_product"}, + }, "enrich": tools.ToolsetConfig{ Name: "enrich", ToolNames: []string{"search_entries", "lookup_entry", "lookup_context", "generate_data_insights", "get_data_insights", "generate_data_profile", "get_data_profile", "discover_metadata", "get_discovery_results", "check_data_quality", "get_data_quality_results", "get_operation", "get_run_status"}, diff --git a/cmd/internal/imports.go b/cmd/internal/imports.go index 818f34e7501b..83207d907464 100644 --- a/cmd/internal/imports.go +++ b/cmd/internal/imports.go @@ -164,11 +164,13 @@ import ( _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexgeneratedatainsights" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexgeneratedataprofile" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexgetdatainsights" + _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexgetdataproduct" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexgetdataprofile" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexgetdataqualityresults" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexgetdiscoveryresults" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexgetoperation" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexgetrunstatus" + _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexlistdataproducts" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexlookupcontext" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexlookupentry" _ "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexsearchaspecttypes" diff --git a/docs/KNOWLEDGE_CATALOG_README.md b/docs/KNOWLEDGE_CATALOG_README.md index b74e1bf9205b..96ead3248663 100644 --- a/docs/KNOWLEDGE_CATALOG_README.md +++ b/docs/KNOWLEDGE_CATALOG_README.md @@ -7,7 +7,8 @@ The Knowledge Catalog (formerly known as Dataplex) Model Context Protocol (MCP) An editor configured to use the Knowledge Catalog MCP server can use its AI capabilities to help you: - **Search Catalog** - Search for entries in Knowledge Catalog -- **Explore Metadata** - Lookup specific entries and search aspect types +- **Explore Metadata** - Lookup specific entries, search aspect types, and list/retrieve Data Products +- **Data Quality** - Search for data quality scans ## Prerequisites @@ -41,6 +42,8 @@ Once configured, the MCP server will automatically provide Knowledge Catalog cap * "Search for entries related to 'sales' in Knowledge Catalog." * "Look up details for the entry 'projects/my-project/locations/us-central1/entryGroups/my-group/entries/my-entry'." +* "List all Data Products." +* "Get details of the Data Product 'projects/my-project/locations/us-central1/dataProducts/my-product'." ## Server Capabilities @@ -48,10 +51,13 @@ The Knowledge Catalog MCP server provides the following tools: | Tool Name | Description | |:----------------------|:-----------------------------------------------------------------------------------------------------------------------------| -| `search_entries` | Search for entries in Knowledge Catalog. | +| `search_entries` | Search for entries in Knowledge Catalog. | | `lookup_entry` | Retrieve specific subset of metadata (for example, schema, usage, business overview, and contacts) of a specific data asset. | | `search_aspect_types` | Find aspect types relevant to the query. | | `lookup_context` | Retrieve rich metadata regarding one or more data assets along with their relationships. | +| `search_dq_scans` | Search for Data Quality scans. | +| `list_data_products` | List Data Products for the current project. | +| `get_data_product` | Retrieve a specific Data Product. | ## Custom MCP Server Configuration diff --git a/docs/en/integrations/knowledge-catalog/prebuilt-configs/knowledge-catalog.md b/docs/en/integrations/knowledge-catalog/prebuilt-configs/knowledge-catalog.md index 57985a93414b..7dc64c413212 100644 --- a/docs/en/integrations/knowledge-catalog/prebuilt-configs/knowledge-catalog.md +++ b/docs/en/integrations/knowledge-catalog/prebuilt-configs/knowledge-catalog.md @@ -21,6 +21,8 @@ aliases: * `search_aspect_types`: Finds aspect types relevant to the query. * `lookup_context`: Retrieves rich metadata regarding one or more data assets along with their relationships. * `search_dq_scans`: Search for data quality scans in Dataplex. + * `list_data_products`: Lists Data Products across all locations. + * `get_data_product`: Retrieves a specific Data Product. * `generate_data_insights`: Creates a new Dataplex Data Documentation scan template and triggers the run. * `get_data_insights`: Retrieves the final generated data insights for a completed scan. * `generate_data_profile`: Creates a new Dataplex Data Profile scan template and triggers the run. @@ -33,5 +35,6 @@ aliases: * `get_run_status`: Retrieves the execution status of the latest background job run. * **Toolsets:** * `discovery`: Metadata discovery and search toolset (`search_entries`, `lookup_entry`, `search_aspect_types`, `lookup_context`, `search_dq_scans`). + * `data-products`: Data Products and Data Assets curation and management toolset (`search_entries`, `lookup_entry`, `search_aspect_types`, `lookup_context`, `list_data_products`, `get_data_product`). * `enrich`: Metadata enrichment pipeline orchestration and execution toolset (`search_entries`, `lookup_entry`, `lookup_context`, `generate_data_insights`, `get_data_insights`, `generate_data_profile`, `get_data_profile`, `discover_metadata`, `get_discovery_results`, `check_data_quality`, `get_data_quality_results`, `get_operation`, `get_run_status`). diff --git a/docs/en/integrations/knowledge-catalog/source.md b/docs/en/integrations/knowledge-catalog/source.md index 626c190e094f..11aac2dee320 100644 --- a/docs/en/integrations/knowledge-catalog/source.md +++ b/docs/en/integrations/knowledge-catalog/source.md @@ -373,5 +373,19 @@ This abbreviated syntax works for the qualified predicates except for `label` in 1. Use this tool to retrieve rich metadata regarding one or more data assets along with their relationships. 2. You must provide the `resources` list with full resource names. ### Response -1. Present the requested metadata and relationship information. -``` \ No newline at end of file +1. Present the requested metadata and relationship information. + +## Tool: list_data_products +### Request +1. Use this tool to retrieve all Data Products globally across all locations. +2. You can optionally filter by `display_name` (e.g., "`display_name:\"my-product\"`") or other fields using the Dataplex filter syntax. +### Response +1. Unless asked for a specific data product, respond with all entries returned. + +## Tool: get_data_product +### Request +1. Use this tool to retrieve detailed metadata for a specific Data Product. +2. You must provide `locationId` and `dataProductId`. +### Response +1. Present the retrieved metadata for the Data Product, including its display name, description, owner emails, asset count, labels, and access groups. +``` diff --git a/docs/en/integrations/knowledge-catalog/tools/knowledge-catalog-get-data-product.md b/docs/en/integrations/knowledge-catalog/tools/knowledge-catalog-get-data-product.md new file mode 100644 index 000000000000..1af111a2c56a --- /dev/null +++ b/docs/en/integrations/knowledge-catalog/tools/knowledge-catalog-get-data-product.md @@ -0,0 +1,68 @@ +--- +title: "dataplex-get-data-product" +type: docs +weight: 1 +description: > + A "dataplex-get-data-product" tool allows to retrieve a specific Data Product. +--- + +## About + +A `dataplex-get-data-product` tool retrieves detailed metadata for a specific Data Product in Knowledge Catalog (formerly known as Dataplex). + +View the [Data Products guide][guide] for more information. + +[guide]: https://docs.cloud.google.com/dataplex/docs/data-products-overview + +## Compatible Sources + +{{< compatible-sources >}} + +## Requirements + +### IAM Permissions + +Knowledge Catalog uses [Identity and Access Management (IAM)][iam-overview] to control +user and group access to Knowledge Catalog resources. Toolbox will use your +[Application Default Credentials (ADC)][adc] to authorize and authenticate when +interacting with [Knowledge Catalog][dataplex-docs]. + +In addition to [setting the ADC for your server][set-adc], you need to ensure +the IAM identity has been given the correct IAM permissions for the tasks you +intend to perform. See [Knowledge Catalog IAM permissions][iam-permissions] +and [Knowledge Catalog IAM roles][iam-roles] for more information on +applying IAM permissions and roles to an identity. + +[iam-overview]: https://cloud.google.com/dataplex/docs/iam-and-access-control +[adc]: https://cloud.google.com/docs/authentication#adc +[set-adc]: https://cloud.google.com/docs/authentication/provide-credentials-adc +[iam-permissions]: https://cloud.google.com/dataplex/docs/iam-permissions +[iam-roles]: https://cloud.google.com/dataplex/docs/iam-roles +[dataplex-docs]: https://cloud.google.com/dataplex + +## Parameters + +The `dataplex-get-data-product` tool has the following parameters: + +| **field** | **type** | **required** | **description** | +| ------------- | -------- | ------------ | --------------------------------------------------------------- | +| locationId | string | true | The location ID (e.g. `us`, `us-central1`) of the Data Product. | +| dataProductId | string | true | The unique ID of the Data Product. | + +## Example + +```yaml +kind: tool +name: get_data_product +type: dataplex-get-data-product +source: my-dataplex-source +description: Use this tool to retrieve a Data Product. +``` + +## Reference + +| **field** | **type** | **required** | **description** | +| ----------- | -------- | ------------ | -------------------------------------------------- | +| type | string | true | Must be "dataplex-get-data-product". | +| source | string | true | Name of the source the tool should execute on. | +| description | string | true | Description of the tool that is passed to the LLM. | diff --git a/docs/en/integrations/knowledge-catalog/tools/knowledge-catalog-list-data-products.md b/docs/en/integrations/knowledge-catalog/tools/knowledge-catalog-list-data-products.md new file mode 100644 index 000000000000..626d41e7acbc --- /dev/null +++ b/docs/en/integrations/knowledge-catalog/tools/knowledge-catalog-list-data-products.md @@ -0,0 +1,69 @@ +--- +title: "dataplex-list-data-products" +type: docs +weight: 1 +description: > + A "dataplex-list-data-products" tool allows to list data products. +--- + +## About + +A `dataplex-list-data-products` tool lists all Data Products in Knowledge Catalog (formerly known as Dataplex) across all locations (globally). + +View the [Data Products guide][guide] for more information. + +[guide]: https://docs.cloud.google.com/dataplex/docs/data-products-overview + +## Compatible Sources + +{{< compatible-sources >}} + +## Requirements + +### IAM Permissions + +Knowledge Catalog uses [Identity and Access Management (IAM)][iam-overview] to control +user and group access to Knowledge Catalog resources. Toolbox will use your +[Application Default Credentials (ADC)][adc] to authorize and authenticate when +interacting with [Knowledge Catalog][dataplex-docs]. + +In addition to [setting the ADC for your server][set-adc], you need to ensure +the IAM identity has been given the correct IAM permissions for the tasks you +intend to perform. See [Knowledge Catalog IAM permissions][iam-permissions] +and [Knowledge Catalog IAM roles][iam-roles] for more information on +applying IAM permissions and roles to an identity. + +[iam-overview]: https://cloud.google.com/dataplex/docs/iam-and-access-control +[adc]: https://cloud.google.com/docs/authentication#adc +[set-adc]: https://cloud.google.com/docs/authentication/provide-credentials-adc +[iam-permissions]: https://cloud.google.com/dataplex/docs/iam-permissions +[iam-roles]: https://cloud.google.com/dataplex/docs/iam-roles +[dataplex-docs]: https://cloud.google.com/dataplex + +## Parameters + +The `dataplex-list-data-products` tool has the following optional parameters: + +| **field** | **type** | **required** | **description** | +| --------- | -------- | ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| filter | string | false | Filter string to list data products. Use `=` for exact matching and `:` for contains matching. String literals must be enclosed within double quotes. E.g. `display_name:"my-product"`. | +| pageSize | integer | false | Number of returned data products in the page. Defaults to `10`. | +| orderBy | string | false | Specifies the ordering of results. | + +## Example + +```yaml +kind: tool +name: list_data_products +type: dataplex-list-data-products +source: my-dataplex-source +description: Use this tool to list Data Products. +``` + +## Reference + +| **field** | **type** | **required** | **description** | +| ----------- | -------- | ------------ | -------------------------------------------------- | +| type | string | true | Must be "dataplex-list-data-products". | +| source | string | true | Name of the source the tool should execute on. | +| description | string | true | Description of the tool that is passed to the LLM. | diff --git a/internal/prebuiltconfigs/tools/dataplex.yaml b/internal/prebuiltconfigs/tools/dataplex.yaml index 4a71b8d9c8e9..37161bc6dca9 100644 --- a/internal/prebuiltconfigs/tools/dataplex.yaml +++ b/internal/prebuiltconfigs/tools/dataplex.yaml @@ -48,6 +48,18 @@ source: dataplex-source description: Use this tool to search for data quality scans in Dataplex. --- kind: tool +name: list_data_products +type: dataplex-list-data-products +source: dataplex-source +description: Lists Data Products across all locations. +--- +kind: tool +name: get_data_product +type: dataplex-get-data-product +source: dataplex-source +description: Retrieves specific metadata regarding a Data Product. +--- +kind: tool name: generate_data_insights type: dataplex-generate-data-insights source: dataplex-source @@ -232,6 +244,16 @@ tools: - search_dq_scans --- kind: toolset +name: data-products +tools: +- search_entries +- lookup_entry +- search_aspect_types +- lookup_context +- list_data_products +- get_data_product +--- +kind: toolset name: enrich tools: - search_entries diff --git a/internal/sources/dataplex/dataplex.go b/internal/sources/dataplex/dataplex.go index 60aab700d809..9b38872ff1ff 100644 --- a/internal/sources/dataplex/dataplex.go +++ b/internal/sources/dataplex/dataplex.go @@ -19,6 +19,7 @@ import ( "encoding/json" "fmt" "regexp" + "strings" dataplexapi "cloud.google.com/go/dataplex/apiv1" "cloud.google.com/go/dataplex/apiv1/dataplexpb" @@ -77,14 +78,15 @@ func (r Config) SourceConfigType() string { func (r Config) Initialize(ctx context.Context, tracer trace.Tracer) (sources.Source, error) { // Initializes a Dataplex source - client, dataScanClient, err := initDataplexConnection(ctx, tracer, r.Name, r.Project, r.ImpersonateServiceAccount, r.Scopes) + client, dataScanClient, dataProductClient, err := initDataplexConnection(ctx, tracer, r.Name, r.Project, r.ImpersonateServiceAccount, r.Scopes) if err != nil { return nil, err } s := &Source{ - Config: r, - Client: client, - DataScanClient: dataScanClient, + Config: r, + Client: client, + DataScanClient: dataScanClient, + dataProductClient: dataProductClient, } return s, nil @@ -94,8 +96,9 @@ var _ sources.Source = &Source{} type Source struct { Config - Client *dataplexapi.CatalogClient - DataScanClient *dataplexapi.DataScanClient + Client *dataplexapi.CatalogClient + DataScanClient *dataplexapi.DataScanClient + dataProductClient *dataplexapi.DataProductClient } func (s *Source) SourceType() string { @@ -119,6 +122,10 @@ func (s *Source) GetDataScanClient() *dataplexapi.DataScanClient { return s.DataScanClient } +func (s *Source) GetDataProductClient() *dataplexapi.DataProductClient { + return s.dataProductClient +} + func initDataplexConnection( ctx context.Context, tracer trace.Tracer, @@ -126,13 +133,13 @@ func initDataplexConnection( project string, impersonateServiceAccount string, scopes []string, -) (*dataplexapi.CatalogClient, *dataplexapi.DataScanClient, error) { +) (*dataplexapi.CatalogClient, *dataplexapi.DataScanClient, *dataplexapi.DataProductClient, error) { ctx, span := sources.InitConnectionSpan(ctx, tracer, SourceType, name) defer span.End() userAgent, err := util.UserAgentFromContext(ctx) if err != nil { - return nil, nil, err + return nil, nil, nil, err } var opts []option.ClientOption @@ -149,7 +156,7 @@ func initDataplexConnection( Scopes: credScopes, }) if err != nil { - return nil, nil, fmt.Errorf("failed to create impersonated credentials for %q for project %q: %w", impersonateServiceAccount, project, err) + return nil, nil, nil, fmt.Errorf("failed to create impersonated credentials for %q for project %q: %w", impersonateServiceAccount, project, err) } opts = []option.ClientOption{ option.WithUserAgent(userAgent), @@ -159,7 +166,7 @@ func initDataplexConnection( // Use default credentials cred, err := google.FindDefaultCredentials(ctx, credScopes...) if err != nil { - return nil, nil, fmt.Errorf("failed to find default Google Cloud credentials for project %q: %w", project, err) + return nil, nil, nil, fmt.Errorf("failed to find default Google Cloud credentials for project %q: %w", project, err) } opts = []option.ClientOption{ option.WithUserAgent(userAgent), @@ -169,14 +176,19 @@ func initDataplexConnection( client, err := dataplexapi.NewCatalogClient(ctx, opts...) if err != nil { - return nil, nil, fmt.Errorf("failed to create Dataplex client for project %q: %w", project, err) + return nil, nil, nil, fmt.Errorf("failed to create Dataplex client for project %q: %w", project, err) } dataScanClient, err := dataplexapi.NewDataScanClient(ctx, opts...) if err != nil { - return nil, nil, fmt.Errorf("failed to create Dataplex DataScan client for project %q: %w", project, err) + return nil, nil, nil, fmt.Errorf("failed to create Dataplex DataScan client for project %q: %w", project, err) } - return client, dataScanClient, nil + + dataProductClient, err := dataplexapi.NewDataProductClient(ctx, opts...) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to create Dataplex DataProduct client for project %q: %w", project, err) + } + return client, dataScanClient, dataProductClient, nil } func (s *Source) LookupEntry(ctx context.Context, name string, view int, aspectTypes []string, entry string) (*dataplexpb.Entry, error) { @@ -346,6 +358,127 @@ func (s *Source) SearchDataQualityScans(ctx context.Context, filter string, page return results, nil } +type DataProductSummary struct { + LocationID string `json:"locationId"` + DataProductID string `json:"dataProductId"` + DisplayName string `json:"displayName"` + OwnerEmails []string `json:"ownerEmails"` + AssetCount int32 `json:"assetCount"` +} + +func (s *Source) ListDataProducts( + ctx context.Context, + filter string, + pageSize int, + orderBy string, +) ([]*DataProductSummary, error) { + if s.GetDataProductClient() == nil { + return nil, fmt.Errorf("dataplex data product client is not initialized") + } + if pageSize <= 0 { + return nil, fmt.Errorf("pageSize must be positive: %d", pageSize) + } + parent := fmt.Sprintf("projects/%s/locations/-", s.ProjectID()) + req := &dataplexpb.ListDataProductsRequest{ + Parent: parent, + Filter: filter, + PageSize: int32(pageSize), + OrderBy: orderBy, + } + + it := s.GetDataProductClient().ListDataProducts(ctx, req) + var results []*DataProductSummary + + for len(results) < pageSize { + dp, err := it.Next() + if err == iterator.Done { + break + } + if err != nil { + if st, ok := grpcstatus.FromError(err); ok { + return nil, fmt.Errorf("failed to list data products: code=%s message=%s", st.Code(), st.Message()) + } + return nil, fmt.Errorf("failed to list data products: %w", err) + } + parts := strings.Split(dp.GetName(), "/") + var locID, prodID string + if len(parts) >= 6 && parts[0] == "projects" && parts[2] == "locations" && parts[4] == "dataProducts" { + locID = parts[3] + prodID = parts[5] + } + results = append(results, &DataProductSummary{ + LocationID: locID, + DataProductID: prodID, + DisplayName: dp.GetDisplayName(), + OwnerEmails: dp.GetOwnerEmails(), + AssetCount: dp.GetAssetCount(), + }) + } + return results, nil +} + +type AccessGroup struct { + ID string `json:"id"` + DisplayName string `json:"displayName"` + Description string `json:"description"` + GoogleGroup string `json:"googleGroup,omitempty"` + ServiceAccount string `json:"serviceAccount,omitempty"` +} + +type DataProduct struct { + LocationID string `json:"locationId"` + DataProductID string `json:"dataProductId"` + DisplayName string `json:"displayName"` + Description string `json:"description"` + OwnerEmails []string `json:"ownerEmails"` + AssetCount int32 `json:"assetCount"` + Labels map[string]string `json:"labels"` + AccessGroups []AccessGroup `json:"accessGroups"` +} + +func (s *Source) GetDataProduct(ctx context.Context, locationID string, dataProductID string) (*DataProduct, error) { + if s.GetDataProductClient() == nil { + return nil, fmt.Errorf("dataplex data product client is not initialized") + } + name := fmt.Sprintf("projects/%s/locations/%s/dataProducts/%s", s.ProjectID(), locationID, dataProductID) + req := &dataplexpb.GetDataProductRequest{ + Name: name, + } + resp, err := s.GetDataProductClient().GetDataProduct(ctx, req) + if err != nil { + return nil, err + } + + accessGroups := []AccessGroup{} + for _, ag := range resp.GetAccessGroups() { + accessGroups = append(accessGroups, AccessGroup{ + ID: ag.GetId(), + DisplayName: ag.GetDisplayName(), + Description: ag.GetDescription(), + GoogleGroup: ag.GetPrincipal().GetGoogleGroup(), + ServiceAccount: ag.GetPrincipal().GetServiceAccount(), + }) + } + + parts := strings.Split(resp.GetName(), "/") + var locID, prodID string + if len(parts) >= 6 && parts[0] == "projects" && parts[2] == "locations" && parts[4] == "dataProducts" { + locID = parts[3] + prodID = parts[5] + } + + return &DataProduct{ + LocationID: locID, + DataProductID: prodID, + DisplayName: resp.GetDisplayName(), + Description: resp.GetDescription(), + OwnerEmails: resp.GetOwnerEmails(), + AssetCount: resp.GetAssetCount(), + Labels: resp.GetLabels(), + AccessGroups: accessGroups, + }, nil +} + func (s *Source) GenerateDataInsights(ctx context.Context, location, resourcePath string, publish bool) (string, error) { parent := fmt.Sprintf("projects/%s/locations/%s", s.ProjectID(), location) dataScanID := fmt.Sprintf("nq-doc-%s", uuid.New().String()) diff --git a/internal/tools/dataplex/dataplexgetdataproduct/dataplexgetdataproduct.go b/internal/tools/dataplex/dataplexgetdataproduct/dataplexgetdataproduct.go new file mode 100644 index 000000000000..f1d71ad80cb6 --- /dev/null +++ b/internal/tools/dataplex/dataplexgetdataproduct/dataplexgetdataproduct.go @@ -0,0 +1,114 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dataplexgetdataproduct + +import ( + "context" + "fmt" + "net/http" + + yaml "github.com/goccy/go-yaml" + "github.com/googleapis/mcp-toolbox/internal/sources/dataplex" + "github.com/googleapis/mcp-toolbox/internal/tools" + "github.com/googleapis/mcp-toolbox/internal/util" + "github.com/googleapis/mcp-toolbox/internal/util/parameters" +) + +const resourceType string = "dataplex-get-data-product" + +func init() { + if !tools.Register(resourceType, newConfig) { + panic(fmt.Sprintf("tool type %q already registered", resourceType)) + } +} + +func newConfig(ctx context.Context, name string, decoder *yaml.Decoder) (tools.ToolConfig, error) { + actual := Config{ConfigBase: tools.ConfigBase{Name: name}} + if err := decoder.DecodeContext(ctx, &actual); err != nil { + return nil, err + } + return actual, nil +} + +type compatibleSource interface { + GetDataProduct(ctx context.Context, locationID string, dataProductID string) (*dataplex.DataProduct, error) +} + +type Config struct { + tools.ConfigBase `yaml:",inline"` + Type string `yaml:"type" validate:"required"` + Source string `yaml:"source" validate:"required"` + Annotations *tools.ToolAnnotations `yaml:"annotations,omitempty"` +} + +// validate interface +var _ tools.ToolConfig = Config{} + +func (cfg Config) ToolConfigType() string { + return resourceType +} + +func (cfg Config) Initialize() (tools.Tool, error) { + locationId := parameters.NewStringParameter("locationId", "Required. The location ID (e.g., 'us', 'us-central1') where the Data Product is located.") + dataProductId := parameters.NewStringParameter("dataProductId", "Required. The unique ID of the Data Product.") + params := parameters.Parameters{locationId, dataProductId} + + return Tool{ + BaseTool: tools.NewBaseTool( + cfg, + tools.GetAnnotationsOrDefault(cfg.Annotations, tools.NewReadOnlyAnnotations), + tools.Manifest{ + Description: cfg.Description, + Parameters: params.Manifest(), + AuthRequired: cfg.AuthRequired, + }, + params, + ), + }, nil +} + +// validate interface +var _ tools.Tool = Tool{} + +type Tool struct { + tools.BaseTool[Config] +} + +func (t Tool) ToConfig() tools.ToolConfig { + return t.Cfg +} + +func (t Tool) Invoke(ctx context.Context, resourceMgr tools.SourceProvider, params parameters.ParamValues, accessToken tools.AccessToken) (any, util.ToolboxError) { + source, err := tools.GetCompatibleSource[compatibleSource](resourceMgr, t.Cfg.Source, t.Cfg.Name, t.Cfg.Type) + if err != nil { + return nil, util.NewClientServerError("source used is not compatible with the tool", http.StatusInternalServerError, err) + } + + paramsMap := params.AsMap() + locationID, ok := paramsMap["locationId"].(string) + if !ok { + return nil, util.NewAgentError(fmt.Sprintf("error casting 'locationId' parameter: %v", paramsMap["locationId"]), nil) + } + dataProductID, ok := paramsMap["dataProductId"].(string) + if !ok { + return nil, util.NewAgentError(fmt.Sprintf("error casting 'dataProductId' parameter: %v", paramsMap["dataProductId"]), nil) + } + + resp, err := source.GetDataProduct(ctx, locationID, dataProductID) + if err != nil { + return nil, util.ProcessGcpError(err) + } + return resp, nil +} diff --git a/internal/tools/dataplex/dataplexgetdataproduct/dataplexgetdataproduct_test.go b/internal/tools/dataplex/dataplexgetdataproduct/dataplexgetdataproduct_test.go new file mode 100644 index 000000000000..d2430d2e4ef9 --- /dev/null +++ b/internal/tools/dataplex/dataplexgetdataproduct/dataplexgetdataproduct_test.go @@ -0,0 +1,71 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dataplexgetdataproduct_test + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/googleapis/mcp-toolbox/internal/server" + "github.com/googleapis/mcp-toolbox/internal/testutils" + "github.com/googleapis/mcp-toolbox/internal/tools" + "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexgetdataproduct" +) + +func TestParseFromYamlDataplexGetDataProduct(t *testing.T) { + ctx, err := testutils.ContextWithNewLogger() + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + tcs := []struct { + desc string + in string + want server.ToolConfigs + }{ + { + desc: "basic example", + in: ` + kind: tool + name: example_tool + type: dataplex-get-data-product + source: my-instance + description: some description + `, + want: server.ToolConfigs{ + "example_tool": dataplexgetdataproduct.Config{ + ConfigBase: tools.ConfigBase{ + Name: "example_tool", + Description: "some description", + AuthRequired: []string{}, + }, + Type: "dataplex-get-data-product", + Source: "my-instance", + }, + }, + }, + } + for _, tc := range tcs { + t.Run(tc.desc, func(t *testing.T) { + // Parse contents + _, _, _, got, _, _, err := server.UnmarshalResourceConfig(ctx, testutils.FormatYaml(tc.in)) + if err != nil { + t.Fatalf("unable to unmarshal: %s", err) + } + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Fatalf("incorrect parse: diff %v", diff) + } + }) + } +} diff --git a/internal/tools/dataplex/dataplexlistdataproducts/dataplexlistdataproducts.go b/internal/tools/dataplex/dataplexlistdataproducts/dataplexlistdataproducts.go new file mode 100644 index 000000000000..6a4456761e16 --- /dev/null +++ b/internal/tools/dataplex/dataplexlistdataproducts/dataplexlistdataproducts.go @@ -0,0 +1,119 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dataplexlistdataproducts + +import ( + "context" + "fmt" + "net/http" + + "github.com/goccy/go-yaml" + "github.com/googleapis/mcp-toolbox/internal/sources/dataplex" + "github.com/googleapis/mcp-toolbox/internal/tools" + "github.com/googleapis/mcp-toolbox/internal/util" + "github.com/googleapis/mcp-toolbox/internal/util/parameters" +) + +const resourceType string = "dataplex-list-data-products" + +func init() { + if !tools.Register(resourceType, newConfig) { + panic(fmt.Sprintf("tool type %q already registered", resourceType)) + } +} + +func newConfig(ctx context.Context, name string, decoder *yaml.Decoder) (tools.ToolConfig, error) { + actual := Config{ConfigBase: tools.ConfigBase{Name: name}} + if err := decoder.DecodeContext(ctx, &actual); err != nil { + return nil, err + } + return actual, nil +} + +type compatibleSource interface { + ListDataProducts(ctx context.Context, filter string, pageSize int, orderBy string) ([]*dataplex.DataProductSummary, error) +} + +type Config struct { + tools.ConfigBase `yaml:",inline"` + Type string `yaml:"type" validate:"required"` + Source string `yaml:"source" validate:"required"` + Annotations *tools.ToolAnnotations `yaml:"annotations,omitempty"` +} + +// validate interface +var _ tools.ToolConfig = Config{} + +func (cfg Config) ToolConfigType() string { + return resourceType +} + +func (cfg Config) Initialize() (tools.Tool, error) { + filter := parameters.NewStringParameterWithDefault("filter", "", "Optional. Filter string to list data products. Based on the AIP-160 proposal. Use '=' for exact, and ':' for contains matching. String literals must be enclosed within \"\". Matching across all fields at once is not yet supported. E.g. \"display_name:\\\"my-product\\\"\"") + pageSize := parameters.NewIntParameterWithDefault("pageSize", 10, "Number of returned data products in the page.") + orderBy := parameters.NewStringParameterWithDefault("orderBy", "", "Specifies the ordering of results.") + params := parameters.Parameters{filter, pageSize, orderBy} + + return Tool{ + BaseTool: tools.NewBaseTool( + cfg, + tools.GetAnnotationsOrDefault(cfg.Annotations, tools.NewReadOnlyAnnotations), + tools.Manifest{ + Description: cfg.Description, + Parameters: params.Manifest(), + AuthRequired: cfg.AuthRequired, + }, + params, + ), + }, nil +} + +// validate interface +var _ tools.Tool = Tool{} + +type Tool struct { + tools.BaseTool[Config] +} + +func (t Tool) ToConfig() tools.ToolConfig { + return t.Cfg +} + +func (t Tool) Invoke(ctx context.Context, resourceMgr tools.SourceProvider, params parameters.ParamValues, accessToken tools.AccessToken) (any, util.ToolboxError) { + source, err := tools.GetCompatibleSource[compatibleSource](resourceMgr, t.Cfg.Source, t.Cfg.Name, t.Cfg.Type) + if err != nil { + return nil, util.NewClientServerError("source used is not compatible with the tool", http.StatusInternalServerError, err) + } + + paramsMap := params.AsMap() + filter, ok := paramsMap["filter"].(string) + if !ok { + return nil, util.NewAgentError(fmt.Sprintf("error casting 'filter' parameter: %v", paramsMap["filter"]), nil) + } + pageSize, ok := paramsMap["pageSize"].(int) + if !ok { + return nil, util.NewAgentError(fmt.Sprintf("error casting 'pageSize' parameter: %v", paramsMap["pageSize"]), nil) + } + orderBy, ok := paramsMap["orderBy"].(string) + if !ok { + return nil, util.NewAgentError(fmt.Sprintf("error casting 'orderBy' parameter: %v", paramsMap["orderBy"]), nil) + } + + resp, err := source.ListDataProducts(ctx, filter, pageSize, orderBy) + if err != nil { + return nil, util.ProcessGcpError(err) + } + return resp, nil +} diff --git a/internal/tools/dataplex/dataplexlistdataproducts/dataplexlistdataproducts_test.go b/internal/tools/dataplex/dataplexlistdataproducts/dataplexlistdataproducts_test.go new file mode 100644 index 000000000000..a39d5b23a03d --- /dev/null +++ b/internal/tools/dataplex/dataplexlistdataproducts/dataplexlistdataproducts_test.go @@ -0,0 +1,71 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dataplexlistdataproducts_test + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/googleapis/mcp-toolbox/internal/server" + "github.com/googleapis/mcp-toolbox/internal/testutils" + "github.com/googleapis/mcp-toolbox/internal/tools" + "github.com/googleapis/mcp-toolbox/internal/tools/dataplex/dataplexlistdataproducts" +) + +func TestParseFromYamlDataplexListDataProducts(t *testing.T) { + ctx, err := testutils.ContextWithNewLogger() + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + tcs := []struct { + desc string + in string + want server.ToolConfigs + }{ + { + desc: "basic example", + in: ` + kind: tool + name: example_tool + type: dataplex-list-data-products + source: my-instance + description: some description + `, + want: server.ToolConfigs{ + "example_tool": dataplexlistdataproducts.Config{ + ConfigBase: tools.ConfigBase{ + Name: "example_tool", + Description: "some description", + AuthRequired: []string{}, + }, + Type: "dataplex-list-data-products", + Source: "my-instance", + }, + }, + }, + } + for _, tc := range tcs { + t.Run(tc.desc, func(t *testing.T) { + // Parse contents + _, _, _, got, _, _, err := server.UnmarshalResourceConfig(ctx, testutils.FormatYaml(tc.in)) + if err != nil { + t.Fatalf("unable to unmarshal: %s", err) + } + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Fatalf("incorrect parse: diff %v", diff) + } + }) + } +} diff --git a/tests/dataplex/dataplex_integration_test.go b/tests/dataplex/dataplex_integration_test.go index 1319c8c7874f..ea27adea85c6 100644 --- a/tests/dataplex/dataplex_integration_test.go +++ b/tests/dataplex/dataplex_integration_test.go @@ -24,6 +24,7 @@ import ( "os" "regexp" "strings" + "sync" "testing" "time" @@ -48,6 +49,8 @@ var ( DataplexLookupEntryToolType = "dataplex-lookup-entry" DataplexSearchAspectTypesToolType = "dataplex-search-aspect-types" DataplexSearchDataQualityScansToolType = "dataplex-search-dq-scans" + DataplexListDataProductsToolType = "dataplex-list-data-products" + DataplexGetDataProductToolType = "dataplex-get-data-product" DataplexGenerateDataProfileToolType = "dataplex-generate-data-profile" DataplexGetDataProfileToolType = "dataplex-get-data-profile" DataplexGetOperationToolType = "dataplex-get-operation" @@ -193,15 +196,17 @@ func setupDataplexSearchDataQualityScan(t *testing.T, ctx context.Context, clien } return func(t *testing.T) { + cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cleanupCancel() deleteDataScanReq := &dataplexpb.DeleteDataScanRequest{ Name: fmt.Sprintf("%s/dataScans/%s", parent, dataScanId), } - op, err := client.DeleteDataScan(ctx, deleteDataScanReq) + op, err := client.DeleteDataScan(cleanupCtx, deleteDataScanReq) if err != nil { t.Errorf("Failed to delete data scan %s: %v", dataScanId, err) return } - if err := op.Wait(ctx); err != nil { + if err := op.Wait(cleanupCtx); err != nil { t.Logf("Warning: Failed to wait for delete data scan %s: %v", dataScanId, err) } } @@ -220,6 +225,19 @@ func initDataplexDataScanConnection(ctx context.Context) (*dataplex.DataScanClie return client, nil } +func initDataplexDataProductConnection(ctx context.Context) (*dataplex.DataProductClient, error) { + cred, err := google.FindDefaultCredentials(ctx, sources.CloudPlatformScope) + if err != nil { + return nil, fmt.Errorf("failed to find default Google Cloud credentials: %w", err) + } + + client, err := dataplex.NewDataProductClient(ctx, option.WithCredentials(cred)) + if err != nil { + return nil, fmt.Errorf("failed to create Dataplex DataProduct client %w", err) + } + return client, nil +} + func TestDataplexToolEndpoints(t *testing.T) { sourceConfig := getDataplexVars(t) ctx, cancel := context.WithTimeout(context.Background(), 25*time.Minute) @@ -241,6 +259,11 @@ func TestDataplexToolEndpoints(t *testing.T) { t.Fatalf("unable to create Dataplex DataScan connection: %s", err) } + dataplexDataProductClient, err := initDataplexDataProductConnection(ctx) + if err != nil { + t.Fatalf("unable to create Dataplex DataProduct connection: %s", err) + } + // Cleanup older aspecttypes cleanupOldAspectTypes(t, ctx, dataplexClient, 1*time.Hour) @@ -249,18 +272,31 @@ func TestDataplexToolEndpoints(t *testing.T) { tableName := fmt.Sprintf("param_table_%s", strings.ReplaceAll(uuid.New().String(), "-", "")) aspectTypeId := fmt.Sprintf("param-aspect-type-%s", strings.ReplaceAll(uuid.New().String(), "-", "")) dataScanId := fmt.Sprintf("param-data-scan-%s", strings.ReplaceAll(uuid.New().String(), "-", "")) + dataProductId1 := fmt.Sprintf("param-data-product-%s", strings.ReplaceAll(uuid.New().String(), "-", "")) + dataProductId2 := fmt.Sprintf("param-data-product-%s", strings.ReplaceAll(uuid.New().String(), "-", "")) bucketName := fmt.Sprintf("temp-toolbox-test-%s", strings.ReplaceAll(uuid.New().String(), "-", "")) - teardownTable1 := setupBigQueryTable(t, ctx, bigqueryClient, datasetName, tableName) - teardownAspectType1 := setupDataplexThirdPartyAspectType(t, ctx, dataplexClient, aspectTypeId) - teardownDataScan1 := setupDataplexSearchDataQualityScan(t, ctx, dataplexDataScanClient, dataScanId, datasetName, tableName) - teardownBucket1 := setupGcsBucket(t, ctx, DataplexProject, bucketName) - - time.Sleep(2 * time.Minute) // wait for table and aspect type to be ingested - defer teardownTable1(t) - defer teardownAspectType1(t) - defer teardownDataScan1(t) - defer teardownBucket1(t) + var teardowns []func(*testing.T) + teardowns = append(teardowns, setupBigQueryTable(t, ctx, bigqueryClient, datasetName, tableName)) + teardowns = append(teardowns, setupDataplexThirdPartyAspectType(t, ctx, dataplexClient, aspectTypeId)) + teardowns = append(teardowns, setupDataplexSearchDataQualityScan(t, ctx, dataplexDataScanClient, dataScanId, datasetName, tableName)) + teardowns = append(teardowns, setupDataplexDataProduct(t, ctx, dataplexDataProductClient, dataProductId1)) + teardowns = append(teardowns, setupDataplexDataProduct(t, ctx, dataplexDataProductClient, dataProductId2)) + teardowns = append(teardowns, setupGcsBucket(t, ctx, DataplexProject, bucketName)) + + time.Sleep(1 * time.Minute) // wait for table and aspect type to be ingested + // Execute teardowns concurrently using a WaitGroup to minimize overall test cleanup duration + defer func() { + var wg sync.WaitGroup + for _, fn := range teardowns { + wg.Add(1) + go func(cleanup func(*testing.T)) { + defer wg.Done() + cleanup(t) + }(fn) + } + wg.Wait() + }() toolsFile := getDataplexToolsConfig(sourceConfig) @@ -284,6 +320,8 @@ func TestDataplexToolEndpoints(t *testing.T) { runDataplexSearchAspectTypesToolInvokeTest(t, aspectTypeId) runDataplexLookupContextToolInvokeTest(t, tableName, datasetName) runDataplexSearchDataQualityScansToolInvokeTest(t, dataScanId, tableName, datasetName) + runDataplexListDataProductsToolInvokeTest(t, dataProductId1, dataProductId2) + runDataplexGetDataProductToolInvokeTest(t, dataProductId1) runDataplexEnrichmentToolInvokeTest(t, tableName, datasetName, bucketName, dataplexDataScanClient) } @@ -315,14 +353,17 @@ func setupBigQueryTable(t *testing.T, ctx context.Context, client *bigqueryapi.C } return func(t *testing.T) { + cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cleanupCancel() + // tear down table dropSQL := fmt.Sprintf("drop table %s.%s", datasetName, tableName) - dropJob, err := client.Query(dropSQL).Run(ctx) + dropJob, err := client.Query(dropSQL).Run(cleanupCtx) if err != nil { t.Errorf("Failed to start drop table job for %s: %v", tableName, err) return } - dropStatus, err := dropJob.Wait(ctx) + dropStatus, err := dropJob.Wait(cleanupCtx) if err != nil { t.Errorf("Failed to wait for drop table job for %s: %v", tableName, err) return @@ -333,11 +374,11 @@ func setupBigQueryTable(t *testing.T, ctx context.Context, client *bigqueryapi.C // tear down dataset datasetToTeardown := client.Dataset(datasetName) - tablesIterator := datasetToTeardown.Tables(ctx) + tablesIterator := datasetToTeardown.Tables(cleanupCtx) _, err = tablesIterator.Next() if err == iterator.Done { - if err := datasetToTeardown.Delete(ctx); err != nil { + if err := datasetToTeardown.Delete(cleanupCtx); err != nil { t.Errorf("Failed to delete dataset %s: %v", datasetName, err) } } else if err != nil { @@ -346,6 +387,65 @@ func setupBigQueryTable(t *testing.T, ctx context.Context, client *bigqueryapi.C } } +func setupDataplexDataProduct(t *testing.T, ctx context.Context, client *dataplex.DataProductClient, dataProductId string) func(*testing.T) { + parent := fmt.Sprintf("projects/%s/locations/us-central1", DataplexProject) + ownerEmail := tests.ServiceAccountEmail + if ownerEmail == "" { + t.Fatalf("Service account email is required, but tests.ServiceAccountEmail was empty") + } + createReq := &dataplexpb.CreateDataProductRequest{ + Parent: parent, + DataProductId: dataProductId, + DataProduct: &dataplexpb.DataProduct{ + DisplayName: dataProductId, + Description: "Temporary Data Product for MCP Toolbox integration tests", + OwnerEmails: []string{ownerEmail}, + AccessGroups: map[string]*dataplexpb.DataProduct_AccessGroup{ + "test-group": { + Id: "test-group", + DisplayName: "Test Group", + Description: "Test Group Description", + Principal: &dataplexpb.DataProduct_Principal{ + Type: &dataplexpb.DataProduct_Principal_GoogleGroup{ + GoogleGroup: ownerEmail, + }, + }, + }, + }, + }, + } + + teardown := func(t *testing.T) { + cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cleanupCancel() + deleteReq := &dataplexpb.DeleteDataProductRequest{ + Name: fmt.Sprintf("%s/dataProducts/%s", parent, dataProductId), + } + op, err := client.DeleteDataProduct(cleanupCtx, deleteReq) + if err != nil { + t.Errorf("Failed to initiate DeleteDataProduct for %s: %v", dataProductId, err) + return + } + err = op.Wait(cleanupCtx) + if err != nil { + t.Logf("Warning: Failed to wait for DeleteDataProduct for %s: %v", dataProductId, err) + } + } + + op, err := client.CreateDataProduct(ctx, createReq) + if err != nil { + t.Fatalf("Failed to initiate CreateDataProduct for %s: %v", dataProductId, err) + } + + _, err = op.Wait(ctx) + if err != nil { + teardown(t) + t.Fatalf("Failed to wait for CreateDataProduct for %s: %v", dataProductId, err) + } + + return teardown +} + func setupGcsBucket(t *testing.T, ctx context.Context, project string, bucketName string) func(*testing.T) { cred, err := google.FindDefaultCredentials(ctx) if err != nil { @@ -387,11 +487,14 @@ func setupDataplexThirdPartyAspectType(t *testing.T, ctx context.Context, client } return func(t *testing.T) { + cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cleanupCancel() + // tear down aspect type deleteAspectTypeReq := &dataplexpb.DeleteAspectTypeRequest{ Name: fmt.Sprintf("%s/aspectTypes/%s", parent, aspectTypeId), } - if _, err := client.DeleteAspectType(ctx, deleteAspectTypeReq); err != nil { + if _, err := client.DeleteAspectType(cleanupCtx, deleteAspectTypeReq); err != nil { t.Errorf("Failed to delete aspect type %s: %v", aspectTypeId, err) } } @@ -465,6 +568,28 @@ func getDataplexToolsConfig(sourceConfig map[string]any) map[string]any { "description": "Simple dataplex search dq scans tool to test end to end functionality.", "authRequired": []string{"my-google-auth"}, }, + "my-dataplex-list-data-products-tool": map[string]any{ + "type": DataplexListDataProductsToolType, + "source": "my-dataplex-instance", + "description": "Simple dataplex list data products tool to test end to end functionality.", + }, + "my-auth-dataplex-list-data-products-tool": map[string]any{ + "type": DataplexListDataProductsToolType, + "source": "my-dataplex-instance", + "description": "Simple dataplex list data products tool to test end to end functionality.", + "authRequired": []string{"my-google-auth"}, + }, + "my-dataplex-get-data-product-tool": map[string]any{ + "type": DataplexGetDataProductToolType, + "source": "my-dataplex-instance", + "description": "Simple dataplex get data product tool to test end to end functionality.", + }, + "my-auth-dataplex-get-data-product-tool": map[string]any{ + "type": DataplexGetDataProductToolType, + "source": "my-dataplex-instance", + "description": "Simple dataplex get data product tool to test end to end functionality.", + "authRequired": []string{"my-google-auth"}, + }, "my-dataplex-generate-data-profile-tool": map[string]any{ "type": DataplexGenerateDataProfileToolType, "source": "my-dataplex-instance", @@ -547,6 +672,16 @@ func runDataplexToolGetTest(t *testing.T) { toolName: "my-dataplex-search-dq-scans-tool", expectedParams: []string{"filter", "dataScanId", "resourcePath", "pageSize", "orderBy"}, }, + { + name: "get my-dataplex-list-data-products-tool", + toolName: "my-dataplex-list-data-products-tool", + expectedParams: []string{"filter", "pageSize", "orderBy"}, + }, + { + name: "get my-dataplex-get-data-product-tool", + toolName: "my-dataplex-get-data-product-tool", + expectedParams: []string{"locationId", "dataProductId"}, + }, { name: "get my-dataplex-generate-data-profile-tool", toolName: "my-dataplex-generate-data-profile-tool", @@ -1343,6 +1478,263 @@ func runDataplexSearchDataQualityScansToolInvokeTest(t *testing.T, dataScanId st } } +func runDataplexListDataProductsToolInvokeTest(t *testing.T, dataProductId1 string, dataProductId2 string) { + idToken, err := tests.GetGoogleIdToken(t) + if err != nil { + t.Fatalf("error getting Google ID token: %s", err) + } + + testCases := []struct { + name string + api string + requestHeader map[string]string + requestBody io.Reader + wantStatusCode int + expectResult bool + wantLocationID string + wantDataProductID string + }{ + { + name: "Success - Filter Extracts One Product (Authorized)", + api: "http://127.0.0.1:5000/api/tool/my-auth-dataplex-list-data-products-tool/invoke", + requestHeader: map[string]string{"my-google-auth_token": idToken}, + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"filter\":\"display_name:\\\"%s\\\"\"}", dataProductId1))), + wantStatusCode: 200, + expectResult: true, + wantLocationID: "us-central1", + wantDataProductID: dataProductId1, + }, + { + name: "Success - PageSize Limits to One (Un-authorized)", + api: "http://127.0.0.1:5000/api/tool/my-dataplex-list-data-products-tool/invoke", + requestHeader: map[string]string{}, + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"pageSize\":1, \"filter\":\"display_name:\\\"%s\\\" OR display_name:\\\"%s\\\"\"}", dataProductId1, dataProductId2))), + wantStatusCode: 200, + expectResult: true, + wantLocationID: "us-central1", + wantDataProductID: "", + }, + { + name: "Failure - Invalid Authorization Token", + api: "http://127.0.0.1:5000/api/tool/my-auth-dataplex-list-data-products-tool/invoke", + requestHeader: map[string]string{"my-google-auth_token": "invalid_token"}, + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"filter\":\"display_name:\\\"%s\\\"\"}", dataProductId1))), + wantStatusCode: 401, + expectResult: false, + }, + { + name: "Failure - Without Authorization Token", + api: "http://127.0.0.1:5000/api/tool/my-auth-dataplex-list-data-products-tool/invoke", + requestHeader: map[string]string{}, + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"filter\":\"display_name:\\\"%s\\\"\"}", dataProductId1))), + wantStatusCode: 401, + expectResult: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + req, err := http.NewRequest(http.MethodPost, tc.api, tc.requestBody) + if err != nil { + t.Fatalf("unable to create request: %s", err) + } + req.Header.Add("Content-type", "application/json") + for k, v := range tc.requestHeader { + req.Header.Add(k, v) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("unable to send request: %s", err) + } + defer resp.Body.Close() + if resp.StatusCode != tc.wantStatusCode { + t.Fatalf("response status code is not %d. It is %d", tc.wantStatusCode, resp.StatusCode) + } + if !tc.expectResult { + return + } + var result map[string]interface{} + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + t.Fatalf("error parsing response body: %s", err) + } + resultStr, ok := result["result"].(string) + if !ok { + t.Fatalf("expected 'result' field to be a string, got %T", result["result"]) + } + var entries []interface{} + if err := json.Unmarshal([]byte(resultStr), &entries); err != nil { + t.Fatalf("error unmarshalling result string: %v", err) + } + + if len(entries) != 1 { + t.Fatalf("expected exactly one entry, but got %d", len(entries)) + } + entry, ok := entries[0].(map[string]interface{}) + if !ok { + t.Fatalf("expected entry to be a map, got %T", entries[0]) + } + locID, ok := entry["locationId"].(string) + if !ok { + t.Fatalf("expected entry to have key 'locationId' as string, but it was not found or not a string in %v", entry) + } + if tc.wantLocationID != "" && locID != tc.wantLocationID { + t.Fatalf("expected locationId to be %q, got %q", tc.wantLocationID, locID) + } + prodID, ok := entry["dataProductId"].(string) + if !ok { + t.Fatalf("expected entry to have key 'dataProductId' as string, but it was not found or not a string in %v", entry) + } + if tc.wantDataProductID != "" && prodID != tc.wantDataProductID { + t.Fatalf("expected dataProductId to be %q, got %q", tc.wantDataProductID, prodID) + } + // Assert raw SDK fields are cleaned/removed + if _, ok := entry["uid"]; ok { + t.Errorf("expected entry to NOT have 'uid' field, but it was found") + } + if _, ok := entry["etag"]; ok { + t.Errorf("expected entry to NOT have 'etag' field, but it was found") + } + if _, ok := entry["createTime"]; ok { + t.Errorf("expected entry to NOT have 'createTime' field, but it was found") + } + }) + } +} + +func runDataplexGetDataProductToolInvokeTest(t *testing.T, dataProductId string) { + idToken, err := tests.GetGoogleIdToken(t) + if err != nil { + t.Fatalf("error getting Google ID token: %s", err) + } + + testCases := []struct { + name string + api string + requestHeader map[string]string + requestBody io.Reader + wantStatusCode int + expectResult bool + wantLocationID string + wantDataProductID string + }{ + { + name: "Success - Get Product (Authorized)", + api: "http://127.0.0.1:5000/api/tool/my-auth-dataplex-get-data-product-tool/invoke", + requestHeader: map[string]string{"my-google-auth_token": idToken}, + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us-central1\",\"dataProductId\":\"%s\"}", dataProductId))), + wantStatusCode: 200, + expectResult: true, + wantLocationID: "us-central1", + wantDataProductID: dataProductId, + }, + { + name: "Success - Get Product (Un-authorized)", + api: "http://127.0.0.1:5000/api/tool/my-dataplex-get-data-product-tool/invoke", + requestHeader: map[string]string{}, + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us-central1\",\"dataProductId\":\"%s\"}", dataProductId))), + wantStatusCode: 200, + expectResult: true, + wantLocationID: "us-central1", + wantDataProductID: dataProductId, + }, + { + name: "Failure - Invalid Authorization Token", + api: "http://127.0.0.1:5000/api/tool/my-auth-dataplex-get-data-product-tool/invoke", + requestHeader: map[string]string{"my-google-auth_token": "invalid_token"}, + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us-central1\",\"dataProductId\":\"%s\"}", dataProductId))), + wantStatusCode: 401, + expectResult: false, + }, + { + name: "Failure - Without Authorization Token", + api: "http://127.0.0.1:5000/api/tool/my-auth-dataplex-get-data-product-tool/invoke", + requestHeader: map[string]string{}, + requestBody: bytes.NewBuffer([]byte(fmt.Sprintf("{\"locationId\":\"us-central1\",\"dataProductId\":\"%s\"}", dataProductId))), + wantStatusCode: 401, + expectResult: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + req, err := http.NewRequest(http.MethodPost, tc.api, tc.requestBody) + if err != nil { + t.Fatalf("unable to create request: %s", err) + } + req.Header.Add("Content-type", "application/json") + for k, v := range tc.requestHeader { + req.Header.Add(k, v) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("unable to send request: %s", err) + } + defer resp.Body.Close() + if resp.StatusCode != tc.wantStatusCode { + t.Fatalf("response status code is not %d. It is %d", tc.wantStatusCode, resp.StatusCode) + } + if !tc.expectResult { + return + } + var result map[string]interface{} + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + t.Fatalf("error parsing response body: %s", err) + } + resultStr, ok := result["result"].(string) + if !ok { + t.Fatalf("expected 'result' field to be a string, got %T", result["result"]) + } + var entry map[string]interface{} + if err := json.Unmarshal([]byte(resultStr), &entry); err != nil { + t.Fatalf("error unmarshalling result string: %v", err) + } + + locID, ok := entry["locationId"].(string) + if !ok { + t.Fatalf("expected entry to have key 'locationId' as string, but it was not found or not a string in %v", entry) + } + if tc.wantLocationID != "" && locID != tc.wantLocationID { + t.Fatalf("expected locationId to be %q, got %q", tc.wantLocationID, locID) + } + prodID, ok := entry["dataProductId"].(string) + if !ok { + t.Fatalf("expected entry to have key 'dataProductId' as string, but it was not found or not a string in %v", entry) + } + if tc.wantDataProductID != "" && prodID != tc.wantDataProductID { + t.Fatalf("expected dataProductId to be %q, got %q", tc.wantDataProductID, prodID) + } + // Additionally assert key fields are populated + if entry["displayName"] == "" { + t.Errorf("displayName should not be empty") + } + if entry["ownerEmails"] == nil { + t.Errorf("ownerEmails should not be nil") + } + // Assert access groups are mapped correctly + accessGroups, ok := entry["accessGroups"].([]interface{}) + if !ok { + t.Fatalf("expected 'accessGroups' to be a slice, got %T", entry["accessGroups"]) + } + if len(accessGroups) != 1 { + t.Fatalf("expected 1 access group, got %d", len(accessGroups)) + } + ag, ok := accessGroups[0].(map[string]interface{}) + if !ok { + t.Fatalf("expected access group to be a map, got %T", accessGroups[0]) + } + if ag["id"] != "test-group" { + t.Errorf("expected access group id 'test-group', got %q", ag["id"]) + } + if ag["googleGroup"] != tests.ServiceAccountEmail { + t.Errorf("expected googleGroup %q, got %q", tests.ServiceAccountEmail, ag["googleGroup"]) + } + if ag["serviceAccount"] != nil { + t.Errorf("expected serviceAccount to be nil, got %q", ag["serviceAccount"]) + } + }) + } +} + func runDataplexEnrichmentToolInvokeTest(t *testing.T, tableName string, datasetName string, bucketName string, client *dataplex.DataScanClient) { ctx := context.Background() tableResource := fmt.Sprintf("//bigquery.googleapis.com/projects/%s/datasets/%s/tables/%s", DataplexProject, datasetName, tableName)