Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ workflow_combined:
operator: airflow.providers.amazon.aws.operators.glue.GlueJobOperator
retries: 0
job_name: setup-covid-db-job
script_location: '{proj.connection.default.s3_shared.s3Uri}dashboard-glue-quick/bundle/glue_setup_covid_db.py'
script_location: '{proj.connection.default.s3_shared.s3Uri}etl-scripts/bundle/glue_setup_covid_db.py'
Comment thread
driftnine marked this conversation as resolved.
s3_bucket: '{proj.connection.default.s3_shared.bucket}'
iam_role_name: '{proj.iam_role_name}'
region_name: '{domain.region}'
Expand All @@ -22,7 +22,7 @@ workflow_combined:
operator: airflow.providers.amazon.aws.operators.glue.GlueJobOperator
retries: 0
job_name: summary-glue-job
script_location: '{proj.connection.default.s3_shared.s3Uri}dashboard-glue-quick/bundle/glue_covid_summary_job.py'
script_location: '{proj.connection.default.s3_shared.s3Uri}etl-scripts/bundle/glue_covid_summary_job.py'
s3_bucket: '{proj.connection.default.s3_shared.bucket}'
iam_role_name: '{proj.iam_role_name}'
region_name: '{domain.region}'
Expand All @@ -31,7 +31,7 @@ workflow_combined:
'--DATABASE_NAME': 'covid19_db'
'--TABLE_NAME': 'us_simplified'
'--SUMMARY_DATABASE_NAME': 'covid19_summary_db'
'--S3_DATABASE_PATH': '{proj.connection.default.s3_shared.s3Uri}dashboard-glue-quick/output/databases/covid19_summary_db/'
'--S3_DATABASE_PATH': '{proj.connection.default.s3_shared.s3Uri}etl-scripts/output/databases/covid19_summary_db/'
'--BUCKET_NAME': '{proj.connection.default.s3_shared.bucket}'
dependencies: [setup_covid_db_task]
create_job_kwargs:
Expand All @@ -43,7 +43,7 @@ workflow_combined:
operator: airflow.providers.amazon.aws.operators.glue.GlueJobOperator
retries: 0
job_name: set-permission-check-job
script_location: '{proj.connection.default.s3_shared.s3Uri}dashboard-glue-quick/bundle/glue_set_permission_check.py'
script_location: '{proj.connection.default.s3_shared.s3Uri}etl-scripts/bundle/glue_set_permission_check.py'
s3_bucket: '{proj.connection.default.s3_shared.bucket}'
iam_role_name: '{proj.iam_role_name}'
region_name: '{domain.region}'
Expand Down
53 changes: 25 additions & 28 deletions examples/analytic-workflow/dashboard-glue-quick/manifest.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
applicationName: IntegrationTestETLWorkflow
content:
storage:
- name: dashboard-glue-quick
- name: etl-scripts
include:
- "*.py"
exclude:
Expand All @@ -27,18 +27,17 @@ stages:
dev:
stage: DEV
domain:
Comment thread
driftnine marked this conversation as resolved.
name: Default_02042026_Domain
tags:
purpose: smus-cicd-testing
region: ${DEV_DOMAIN_REGION:us-east-2}
region: us-east-1
project:
name: dev-marketing
name: admin-project-383445062261
owners:
- Eng1
- arn:aws:iam::${AWS_ACCOUNT_ID}:role/GitHubActionsRole-SMUS-CLI-Tests
- arn:aws:iam::${AWS_ACCOUNT_ID}:role/Admin
environment_variables:
S3_PREFIX: dev
AWS_REGION: ${DEV_DOMAIN_REGION:us-east-2}
AWS_REGION: us-east-1
GRANT_TO: Admin,service-role/aws-quicksight-service-role-v0
bootstrap:
actions:
Expand All @@ -53,12 +52,12 @@ stages:
wait: false
deployment_configuration:
storage:
- name: dashboard-glue-quick
- name: etl-scripts
connectionName: default.s3_shared
targetDirectory: dashboard-glue-quick/bundle
targetDirectory: etl-scripts/bundle
- name: workflows
connectionName: default.s3_shared
targetDirectory: dashboard-glue-quick/bundle/workflows
targetDirectory: etl-scripts/bundle/workflows
git:
- name: covid-19-dataset
connectionName: default.s3_shared
Expand All @@ -76,38 +75,37 @@ stages:
test:
stage: TEST
domain:
name: Default_03052026_Domain
tags:
purpose: smus-cicd-testing
region: ${TEST_DOMAIN_REGION:us-east-1}
region: us-west-2
project:
name: test-marketing
name: admin-project-383445062261
owners:
- Eng1
- arn:aws:iam::${AWS_ACCOUNT_ID}:role/GitHubActionsRole-SMUS-CLI-Tests
- arn:aws:iam::${AWS_ACCOUNT_ID}:role/Admin
environment_variables:
S3_PREFIX: test
AWS_REGION: ${TEST_DOMAIN_REGION:us-east-1}
AWS_REGION: us-west-2
GRANT_TO: Admin,service-role/aws-quicksight-service-role-v0
bootstrap:
actions:
- type: workflow.create
workflowName: covid_dashboard_glue_quick_pipeline
- type: workflow.run
workflowName: covid_dashboard_glue_quick_pipeline
trailLogs: true
# - type: workflow.run
# workflowName: covid_dashboard_glue_quick_pipeline
# trailLogs: true
- type: quicksight.refresh_dataset
refreshScope: IMPORTED
ingestionType: FULL_REFRESH
wait: false
deployment_configuration:
storage:
- name: dashboard-glue-quick
- name: etl-scripts
connectionName: default.s3_shared
targetDirectory: dashboard-glue-quick/bundle
targetDirectory: etl-scripts/bundle
- name: workflows
connectionName: default.s3_shared
targetDirectory: dashboard-glue-quick/bundle/workflows
targetDirectory: etl-scripts/bundle/workflows
git:
- name: covid-19-dataset
connectionName: default.s3_shared
Expand All @@ -125,18 +123,17 @@ stages:
prod:
stage: PROD
domain:
name: Default_03052026_Domain
tags:
purpose: smus-cicd-production
region: ${PROD_DOMAIN_REGION:us-east-1}
region: us-east-2
project:
name: prod-marketing
name: admin-project-383445062261
owners:
- Eng1
- arn:aws:iam::${AWS_ACCOUNT_ID}:role/GitHubActionsRole-SMUS-CLI-Tests
- arn:aws:iam::${AWS_ACCOUNT_ID}:role/Admin
environment_variables:
S3_PREFIX: prod
AWS_REGION: ${PROD_DOMAIN_REGION:us-east-1}
AWS_REGION: us-east-2
GRANT_TO: Admin,service-role/aws-quicksight-service-role-v0
bootstrap:
actions:
Expand All @@ -151,12 +148,12 @@ stages:
wait: false
deployment_configuration:
storage:
- name: dashboard-glue-quick
- name: etl-scripts
connectionName: default.s3_shared
targetDirectory: dashboard-glue-quick/bundle
targetDirectory: etl-scripts/bundle
- name: workflows
connectionName: default.s3_shared
targetDirectory: dashboard-glue-quick/bundle/workflows
targetDirectory: etl-scripts/bundle/workflows
git:
- name: covid-19-dataset
connectionName: default.s3_shared
Expand Down
69 changes: 59 additions & 10 deletions src/smus_cicd/helpers/datazone.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ def get_project_user_role_arn(project_name: str, domain_name: str, region: str)
f"Project '{project_name}' not found in domain {domain_id}"
)

role_arn = None

# List environments to find tooling environment
datazone_client = _get_datazone_client(region)
environments_response = datazone_client.list_environments(
Expand All @@ -157,6 +159,28 @@ def get_project_user_role_arn(project_name: str, domain_name: str, region: str)
typer.echo(f"✅ DEBUG: Found userRoleArn={role_arn}")
return role_arn

# Fallback: Get role from project owners (for SMUS compatibility)
Comment thread
driftnine marked this conversation as resolved.
if not role_arn:
project_details = get_project_details(project_name, region, domain_name)
owners = project_details.get("owners", "")
if owners and isinstance(owners, str) and owners != "N/A":
# Parse the first owner ARN (owners is comma-separated string)
owner_list = [o.strip() for o in owners.split(",")]
if owner_list:
role_arn = owner_list[0] # Assume first owner is the execution role
typer.echo(f"✅ DEBUG: Using project owner role as fallback: {role_arn}")
return role_arn
else:
# Fallback for SMUS: parse account from project name and use Admin role
# Project name format: *-project-{account_id}
import re
match = re.search(r'-project-(\d+)$', project_name)
if match:
account_id = match.group(1)
role_arn = f"arn:aws:iam::{account_id}:role/Admin"
typer.echo(f"✅ DEBUG: Using default Admin role as fallback: {role_arn}")
return role_arn

raise ValueError(
f"No tooling environment with userRoleArn found for project '{project_name}'"
)
Expand Down Expand Up @@ -772,18 +796,43 @@ def get_project_details(project_name, region, domain_name):

project = response.get("project", {})

# Get project members using list_project_memberships (get_project may not include members in SMUS)
memberships_response = datazone_client.list_project_memberships(
domainIdentifier=domain_id, projectIdentifier=project_id
)

members = memberships_response.get("members", [])

# Collect owner ARNs
owner_arns = []
for member in members:
if member.get("designation") == "PROJECT_OWNER":
details = member.get("memberDetails", {})
if "user" in details:
user_id = details["user"].get("userIdentifier")
if user_id:
owner_arns.append(user_id)
elif "group" in details:
group_id = details["group"].get("groupId")
if group_id:
# For IAM role groups, get the role ARN from group profile
try:
group_response = datazone_client.search_group_profiles(
domainIdentifier=domain_id,
groupType="IAM_ROLE_SESSION_GROUP"
)
for group in group_response.get("items", []):
if group.get("id") == group_id:
role_arn = group.get("rolePrincipalArn") or group.get("groupName")
if role_arn:
owner_arns.append(role_arn)
break
except Exception:
pass # Skip if can't get group profile

return {
"status": project.get("projectStatus", "UNKNOWN"),
"owners": ", ".join(
[
member.get("memberDetails", {})
.get("user", {})
.get("userIdentifier", "Unknown")
for member in project.get("projectMembers", [])
if member.get("designation") == "PROJECT_OWNER"
]
)
or "N/A",
"owners": ", ".join(owner_arns) or "N/A",
"projectId": project_id,
"domainId": domain_id,
}
Expand Down
Loading