Test warehouse platform #144
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test warehouse platform | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| warehouse-type: | |
| type: choice | |
| required: true | |
| description: Type of warehouse platform | |
| options: | |
| - postgres | |
| - snowflake | |
| - bigquery | |
| - redshift | |
| - databricks_catalog | |
| - spark | |
| - athena | |
| - clickhouse | |
| - duckdb | |
| - trino | |
| - dremio | |
| - fabric | |
| - sqlserver | |
| - vertica | |
| elementary-ref: | |
| type: string | |
| required: false | |
| description: Branch or tag to checkout for 'elementary' repository | |
| dbt-data-reliability-ref: | |
| type: string | |
| required: false | |
| description: Branch or tag to checkout for 'dbt-data-reliability' repository | |
| dbt-version: | |
| type: string | |
| required: false | |
| description: dbt's version to test with | |
| generate-data: | |
| type: boolean | |
| required: false | |
| default: false | |
| description: Whether to generate new data | |
| workflow_call: | |
| inputs: | |
| warehouse-type: | |
| type: string | |
| required: true | |
| elementary-ref: | |
| type: string | |
| required: false | |
| dbt-data-reliability-ref: | |
| type: string | |
| required: false | |
| dbt-version: | |
| type: string | |
| required: false | |
| generate-data: | |
| type: boolean | |
| required: false | |
| default: false | |
| secrets: | |
| CI_WAREHOUSE_SECRETS: | |
| required: true | |
| CI_SLACK_WEBHOOK: | |
| required: true | |
| CI_SLACK_TOKEN: | |
| required: true | |
| AWS_OIDC_ROLE_ARN: | |
| required: true | |
| permissions: {} | |
| env: | |
| BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | |
| DBT_VERSION: ${{ inputs.dbt-version || '' }} | |
| WAREHOUSE_TYPE: ${{ inputs.warehouse-type }} | |
| ELEMENTARY_DBT_PACKAGE_PATH: ${{ github.workspace }}/dbt-data-reliability | |
| CLI_INTERNAL_DBT_PKG_DIR: ${{ github.workspace }}/elementary/elementary/monitor/dbt_project | |
| E2E_DBT_PROJECT_DIR: ${{ github.workspace }}/elementary/tests/e2e_dbt_project | |
| jobs: | |
| test: | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| id-token: write | |
| defaults: | |
| run: | |
| working-directory: elementary | |
| concurrency: | |
| # Serialises runs for the same warehouse × dbt-version × branch. | |
| # The schema name is derived from a hash of this group (see "Write dbt profiles"). | |
| group: tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${{ github.head_ref || github.ref_name }} | |
| cancel-in-progress: true | |
| steps: | |
| - name: Checkout Elementary | |
| uses: actions/checkout@v6 | |
| with: | |
| path: elementary | |
| ref: ${{ inputs.elementary-ref }} | |
| - name: Configure AWS credentials | |
| uses: aws-actions/configure-aws-credentials@v4 | |
| with: | |
| role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }} | |
| aws-region: eu-west-1 | |
| - name: Checkout dbt package | |
| uses: actions/checkout@v6 | |
| with: | |
| repository: elementary-data/dbt-data-reliability | |
| path: dbt-data-reliability | |
| ref: ${{ inputs.dbt-data-reliability-ref }} | |
| - name: Validate workflow inputs | |
| run: | | |
| case "$WAREHOUSE_TYPE" in | |
| postgres|snowflake|bigquery|redshift|databricks_catalog|spark|athena|clickhouse|duckdb|trino|dremio|fabric|sqlserver|vertica) ;; | |
| *) | |
| echo "Unsupported warehouse type: $WAREHOUSE_TYPE" >&2 | |
| exit 1 | |
| ;; | |
| esac | |
| if [ -n "$DBT_VERSION" ] && ! [[ "$DBT_VERSION" =~ ^[0-9]+(\.[0-9]+){1,2}([a-zA-Z0-9._+-]+)?$ ]]; then | |
| echo "Unsupported dbt version: $DBT_VERSION" >&2 | |
| exit 1 | |
| fi | |
| # ── Seed cache: compute key & restore volumes BEFORE starting services ── | |
| # This ensures Docker volumes are populated before containers initialize. | |
| - name: Compute seed cache key | |
| id: seed-cache-key | |
| if: inputs.warehouse-type == 'postgres' || inputs.warehouse-type == 'clickhouse' || inputs.warehouse-type == 'duckdb' | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| run: | | |
| # Cache key is a hash of seed-related files so that cache busts when | |
| # the data generation script, dbt project config, or seed schemas change. | |
| SEED_HASH=$( | |
| { | |
| cat generate_data.py \ | |
| dbt_project.yml \ | |
| docker-compose.yml \ | |
| ${{ github.workspace }}/elementary/tests/profiles/profiles.yml.j2 | |
| echo "dbt_version=$DBT_VERSION" | |
| } | sha256sum | head -c 16 | |
| ) | |
| echo "seed-hash=$SEED_HASH" >> "$GITHUB_OUTPUT" | |
| - name: Restore seed cache | |
| id: seed-cache | |
| if: steps.seed-cache-key.outputs.seed-hash | |
| uses: actions/cache@v5 | |
| with: | |
| path: /tmp/seed-cache-${{ inputs.warehouse-type }} | |
| key: seed-${{ inputs.warehouse-type }}-${{ steps.seed-cache-key.outputs.seed-hash }} | |
| - name: Restore cached seed data into Docker volumes | |
| if: steps.seed-cache.outputs.cache-hit == 'true' && inputs.warehouse-type != 'duckdb' | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| run: bash ci/restore_seed_cache.sh "$WAREHOUSE_TYPE" | |
| - name: Restore cached DuckDB seed | |
| if: steps.seed-cache.outputs.cache-hit == 'true' && inputs.warehouse-type == 'duckdb' | |
| run: | | |
| cp /tmp/seed-cache-duckdb/elementary_test.duckdb /tmp/elementary_test.duckdb | |
| echo "DuckDB seed cache restored." | |
| # ── Start warehouse services ────────────────────────────────────────── | |
| - name: Start Postgres | |
| if: inputs.warehouse-type == 'postgres' | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| run: docker compose up -d postgres | |
| - name: Start Clickhouse | |
| if: inputs.warehouse-type == 'clickhouse' | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| run: docker compose up -d clickhouse | |
| - name: Start Trino | |
| if: inputs.warehouse-type == 'trino' | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| run: | | |
| docker compose up -d --wait trino | |
| - name: Start Dremio | |
| if: inputs.warehouse-type == 'dremio' | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| run: | | |
| # Start Dremio services in detached mode with healthchecks, then | |
| # run the setup container separately. Using --exit-code-from would | |
| # imply --abort-on-container-exit, killing all services when the | |
| # setup container finishes. | |
| docker compose up -d --wait dremio dremio-minio nessie | |
| docker compose run --rm dremio-setup | |
| - name: Start Spark | |
| if: inputs.warehouse-type == 'spark' | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| run: | | |
| docker compose up -d --build --wait spark-thrift | |
| - name: Start SQL Server | |
| if: inputs.warehouse-type == 'sqlserver' | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| run: | | |
| docker compose up -d --wait sqlserver | |
| - name: Start Vertica | |
| if: inputs.warehouse-type == 'vertica' | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| run: | | |
| docker compose up -d --wait vertica | |
| - name: Setup Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.10" | |
| - name: Install Spark requirements | |
| if: inputs.warehouse-type == 'spark' | |
| run: sudo apt-get install -y python3-dev libsasl2-dev gcc | |
| - name: Install ODBC driver for SQL Server | |
| if: inputs.warehouse-type == 'fabric' || inputs.warehouse-type == 'sqlserver' | |
| run: | | |
| curl -fsSL https://packages.microsoft.com/keys/microsoft.asc | sudo tee /etc/apt/trusted.gpg.d/microsoft.asc > /dev/null | |
| curl -fsSL https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list > /dev/null | |
| sudo apt-get update | |
| sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18 unixodbc-dev | |
| - name: Install dbt | |
| if: inputs.warehouse-type != 'vertica' | |
| run: | | |
| DBT_CORE_SPEC="dbt-core" | |
| DBT_ADAPTER="$WAREHOUSE_TYPE" | |
| DBT_ADAPTER_EXTRA="" | |
| DBT_ADAPTER_VERSION_SPEC="" | |
| case "$WAREHOUSE_TYPE" in | |
| databricks_catalog) DBT_ADAPTER="databricks" ;; | |
| athena) DBT_ADAPTER="athena-community" ;; | |
| spark) DBT_ADAPTER_EXTRA="[PyHive]" ;; | |
| esac | |
| if [ -n "$DBT_VERSION" ]; then | |
| DBT_CORE_SPEC="dbt-core==$DBT_VERSION" | |
| DBT_ADAPTER_VERSION_SPEC="~=$DBT_VERSION" | |
| fi | |
| pip install "$DBT_CORE_SPEC" "dbt-${DBT_ADAPTER}${DBT_ADAPTER_EXTRA}${DBT_ADAPTER_VERSION_SPEC}" | |
| # dbt-vertica pins dbt-core~=1.8 which lacks the 'arguments' attribute | |
| # used by newer dbt-core. Install dbt-vertica without deps first, then | |
| # install the latest compatible dbt-core separately. We also install | |
| # vertica-python (dbt-vertica's runtime dep) explicitly. | |
| - name: Install dbt (Vertica) | |
| if: inputs.warehouse-type == 'vertica' | |
| run: | | |
| pip install --no-deps dbt-vertica | |
| pip install vertica-python | |
| if [ -n "$DBT_VERSION" ]; then | |
| pip install "dbt-core==$DBT_VERSION" | |
| else | |
| pip install dbt-core | |
| fi | |
| - name: Install Elementary | |
| run: | | |
| pip install -r dev-requirements.txt | |
| # For Vertica, dbt-vertica is already installed with --no-deps above; | |
| # using ".[vertica]" would re-resolve dbt-vertica's deps and downgrade | |
| # dbt-core to ~=1.8. Install elementary without the adapter extra. | |
| if [ "$WAREHOUSE_TYPE" = "vertica" ]; then | |
| pip install "." | |
| else | |
| EXTRA="$WAREHOUSE_TYPE" | |
| if [ "$WAREHOUSE_TYPE" = "databricks_catalog" ]; then | |
| EXTRA="databricks" | |
| fi | |
| pip install ".[$EXTRA]" | |
| fi | |
| - name: Write dbt profiles | |
| env: | |
| CI_WAREHOUSE_SECRETS: ${{ secrets.CI_WAREHOUSE_SECRETS || '' }} | |
| run: | | |
| # Docker-based adapters use ephemeral containers, so a fixed schema | |
| # name is safe (the concurrency group prevents parallel collisions). | |
| # This enables caching the seeded database state between runs. | |
| IS_DOCKER=false | |
| case "$WAREHOUSE_TYPE" in | |
| postgres|clickhouse|trino|dremio|duckdb|spark|sqlserver|vertica) IS_DOCKER=true ;; | |
| esac | |
| if [ "$IS_DOCKER" = "true" ]; then | |
| SCHEMA_NAME="elementary_tests" | |
| echo "Schema name: $SCHEMA_NAME (fixed for Docker adapter '$WAREHOUSE_TYPE')" | |
| else | |
| # Cloud adapters: unique schema per run to avoid collisions. | |
| # Schema name = py_<YYMMDD_HHMMSS>_<branch≤19>_<8-char hash> | |
| CONCURRENCY_GROUP="tests_${WAREHOUSE_TYPE}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | |
| SHORT_HASH=$(echo -n "$CONCURRENCY_GROUP" | sha256sum | head -c 8) | |
| SAFE_BRANCH=$(echo "${BRANCH_NAME}" | awk '{print tolower($0)}' | sed "s/[^a-z0-9]/_/g; s/__*/_/g" | head -c 19) | |
| DATE_STAMP=$(date -u +%y%m%d_%H%M%S) | |
| SCHEMA_NAME="py_${DATE_STAMP}_${SAFE_BRANCH}_${SHORT_HASH}" | |
| echo "Schema name: $SCHEMA_NAME (branch='${BRANCH_NAME}', timestamp=${DATE_STAMP}, hash of concurrency group)" | |
| fi | |
| echo "SCHEMA_NAME=$SCHEMA_NAME" >> "$GITHUB_ENV" | |
| python "${{ github.workspace }}/elementary/tests/profiles/generate_profiles.py" \ | |
| --template "${{ github.workspace }}/elementary/tests/profiles/profiles.yml.j2" \ | |
| --output ~/.dbt/profiles.yml \ | |
| --schema-name "$SCHEMA_NAME" | |
| - name: Run Python package unit tests | |
| run: pytest -vv tests/unit --warehouse-type "$WAREHOUSE_TYPE" | |
| - name: Run Python package integration tests | |
| run: pytest -vv tests/integration --warehouse-type "$WAREHOUSE_TYPE" | |
| - name: Install dbt package | |
| run: | | |
| ELEMENTARY_PKG_LOCATION=$(pip show elementary-data | grep -i location | awk '{print $2}') | |
| DBT_PROJECT_PATH="$ELEMENTARY_PKG_LOCATION/elementary/monitor/dbt_project" | |
| DBT_PKGS_PATH="$DBT_PROJECT_PATH/dbt_packages" | |
| dbt deps --project-dir "$DBT_PROJECT_PATH" | |
| rm -rf "$DBT_PKGS_PATH/elementary" | |
| ln -vs "$GITHUB_WORKSPACE/dbt-data-reliability" "$DBT_PKGS_PATH/elementary" | |
| - name: Run deps for E2E dbt project | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| env: | |
| ELEMENTARY_DBT_PACKAGE_PATH: ${{ env.ELEMENTARY_DBT_PACKAGE_PATH }} | |
| run: | | |
| dbt deps | |
| - name: Generate seed data | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| if: steps.seed-cache.outputs.cache-hit != 'true' | |
| run: python generate_data.py | |
| - name: Seed e2e dbt project (external) | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| if: steps.seed-cache.outputs.cache-hit != 'true' && (inputs.warehouse-type == 'dremio' || inputs.warehouse-type == 'spark') | |
| run: python load_seeds_external.py "$WAREHOUSE_TYPE" "$SCHEMA_NAME" data | |
| - name: Seed e2e dbt project | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| if: steps.seed-cache.outputs.cache-hit != 'true' && inputs.warehouse-type != 'dremio' && inputs.warehouse-type != 'spark' | |
| run: dbt seed -f --target "$WAREHOUSE_TYPE" | |
| - name: Save seed cache from Docker volumes | |
| if: steps.seed-cache.outputs.cache-hit != 'true' && (inputs.warehouse-type == 'postgres' || inputs.warehouse-type == 'clickhouse') | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| run: bash ci/save_seed_cache.sh "$WAREHOUSE_TYPE" | |
| - name: Save DuckDB seed cache | |
| if: steps.seed-cache.outputs.cache-hit != 'true' && inputs.warehouse-type == 'duckdb' | |
| run: | | |
| mkdir -p /tmp/seed-cache-duckdb | |
| cp /tmp/elementary_test.duckdb /tmp/seed-cache-duckdb/elementary_test.duckdb | |
| echo "DuckDB seed cache saved." | |
| - name: Run e2e dbt project | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| run: | | |
| # Dremio needs single-threaded execution to avoid Nessie catalog race conditions | |
| EXTRA_ARGS=() | |
| if [ "$WAREHOUSE_TYPE" = "dremio" ]; then | |
| EXTRA_ARGS+=(--threads 1) | |
| fi | |
| dbt run --target "$WAREHOUSE_TYPE" "${EXTRA_ARGS[@]}" || true | |
| # Validate run_results.json: only error_model should be non-success | |
| jq -e ' | |
| [.results[] | select(.status != "success") | .unique_id] | |
| | length == 1 and .[0] == "model.elementary_integration_tests.error_model" | |
| ' target/run_results.json > /dev/null | |
| jq_exit=$? | |
| if [ $jq_exit -eq 0 ]; then | |
| echo "✅ Validation passed: only error_model failed." | |
| else | |
| echo "❌ Validation failed. Unexpected failures:" | |
| jq '[.results[] | select(.status != "success") | .unique_id] | join(", ")' target/run_results.json | |
| fi | |
| exit $jq_exit | |
| - name: Test e2e dbt project | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| continue-on-error: true | |
| run: | | |
| # Dremio needs single-threaded execution to avoid Nessie catalog race conditions | |
| EXTRA_ARGS=() | |
| if [ "$WAREHOUSE_TYPE" = "dremio" ]; then | |
| EXTRA_ARGS+=(--threads 1 --exclude tag:ephemeral_model) | |
| fi | |
| dbt test --target "$WAREHOUSE_TYPE" "${EXTRA_ARGS[@]}" | |
| - name: Run help | |
| run: edr --help | |
| - name: Run monitor | |
| env: | |
| SLACK_WEBHOOK: ${{ secrets.CI_SLACK_WEBHOOK }} | |
| run: | | |
| MONITOR_ARGS=( | |
| -t "$WAREHOUSE_TYPE" | |
| --slack-webhook "$SLACK_WEBHOOK" | |
| --group-by table | |
| --project-dir "${{ env.E2E_DBT_PROJECT_DIR }}" | |
| --project-profile-target "$WAREHOUSE_TYPE" | |
| ) | |
| edr monitor "${MONITOR_ARGS[@]}" | |
| - name: Validate alerts statuses were updated | |
| working-directory: ${{ env.CLI_INTERNAL_DBT_PKG_DIR }} | |
| run: | | |
| dbt deps | |
| dbt run-operation validate_alert_statuses_are_updated -t "$WAREHOUSE_TYPE" | |
| - name: Run report | |
| run: > | |
| edr monitor report | |
| -t "$WAREHOUSE_TYPE" | |
| --project-dir "${{ env.E2E_DBT_PROJECT_DIR }}" | |
| --project-profile-target "$WAREHOUSE_TYPE" | |
| - name: Set report artifact name | |
| id: set_report_artifact_name | |
| run: | | |
| ARTIFACT_NAME=$(echo "report_${WAREHOUSE_TYPE}_${BRANCH_NAME}_dbt_${DBT_VERSION}.html" | awk '{print tolower($0)}' | sed 's#[":/\\<>|*?-]#_#g') | |
| echo "artifact_name=$ARTIFACT_NAME" >> "$GITHUB_OUTPUT" | |
| - name: Upload report artifact | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| name: ${{ steps.set_report_artifact_name.outputs.artifact_name }} | |
| path: elementary/edr_target/elementary_report.html | |
| - name: Run send report | |
| env: | |
| SLACK_TOKEN: ${{ secrets.CI_SLACK_TOKEN }} | |
| run: > | |
| edr monitor send-report | |
| -t "$WAREHOUSE_TYPE" | |
| --project-dir "$E2E_DBT_PROJECT_DIR" | |
| --project-profile-target "$WAREHOUSE_TYPE" | |
| --slack-file-name "report_${WAREHOUSE_TYPE}_${BRANCH_NAME}.html" | |
| --slack-token "$SLACK_TOKEN" | |
| --slack-channel-name oss-ci-tests | |
| --bucket-file-path "ci_reports/report_${WAREHOUSE_TYPE}_${BRANCH_NAME}.html" | |
| --s3-bucket-name elementary-ci-artifacts | |
| --update-bucket-website true | |
| - name: Set artifact name | |
| id: set_artifact_name | |
| run: | | |
| ARTIFACT_NAME=$(echo "edr_${WAREHOUSE_TYPE}_${BRANCH_NAME}_dbt_${DBT_VERSION}.log" | awk '{print tolower($0)}' | sed 's#[":/\\<>|*?-]#_#g') | |
| echo "artifact_name=$ARTIFACT_NAME" >> "$GITHUB_OUTPUT" | |
| - name: Upload edr log | |
| if: ${{ always() }} | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| name: ${{ steps.set_artifact_name.outputs.artifact_name }} | |
| path: elementary/edr_target/edr.log | |
| - name: Run Python package e2e tests | |
| run: pytest -vv tests/e2e --warehouse-type "$WAREHOUSE_TYPE" | |
| - name: Drop test schemas | |
| if: always() | |
| working-directory: ${{ env.E2E_DBT_PROJECT_DIR }} | |
| continue-on-error: true | |
| run: | | |
| dbt run-operation elementary_integration_tests.drop_test_schemas --target "$WAREHOUSE_TYPE" |