Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
263 changes: 263 additions & 0 deletions .github/workflows/backend-selective.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
name: backend (selective)

on:
pull_request:

# Cancel in progress workflows on pull_requests.
# https://docs.github.com/en/actions/using-jobs/using-concurrency#example-using-a-fallback-value
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

# hack for https://github.com/actions/cache/issues/810#issuecomment-1222550359
env:
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 3
SNUBA_NO_WORKERS: 1

jobs:
files-changed:
name: detect what files changed
runs-on: ubuntu-24.04
timeout-minutes: 3
outputs:
api_docs: ${{ steps.changes.outputs.api_docs }}
backend: ${{ steps.changes.outputs.backend_all }}
backend_dependencies: ${{ steps.changes.outputs.backend_dependencies }}
backend_api_urls: ${{ steps.changes.outputs.backend_api_urls }}
backend_any_type: ${{ steps.changes.outputs.backend_any_type }}
migration_lockfile: ${{ steps.changes.outputs.migration_lockfile }}
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7

- name: Check for backend file changes
uses: dorny/paths-filter@0bc4621a3135347011ad047f9ecf449bf72ce2bd # v3.0.0
id: changes
with:
token: ${{ github.token }}
filters: .github/file-filters.yml

prepare-selective-tests:
if: needs.files-changed.outputs.backend == 'true'
needs: files-changed
name: prepare selective tests
runs-on: ubuntu-24.04
timeout-minutes: 10
permissions:
contents: read
id-token: write
outputs:
has-coverage: ${{ steps.find-coverage.outputs.found }}
coverage-sha: ${{ steps.find-coverage.outputs.coverage-sha }}
changed-files: ${{ steps.changed-files.outputs.files }}
test-count: ${{ steps.compute-tests.outputs.test-count }}
has-selected-tests: ${{ steps.compute-tests.outputs.has-selected-tests }}
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
fetch-depth: 0 # Need full history for git diff

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.13.1'

- name: Authenticate to Google Cloud
id: gcloud-auth
uses: google-github-actions/auth@v2
with:
project_id: sentry-dev-tooling
workload_identity_provider: ${{ secrets.SENTRY_GCP_DEV_WORKLOAD_IDENTITY_POOL }}
service_account: ${{ secrets.COLLECT_TEST_DATA_SERVICE_ACCOUNT_EMAIL }}

- name: Find coverage data for selective testing
id: find-coverage
env:
GCS_BUCKET: sentry-coverage-data
run: |
set -euo pipefail

# Get the base commit (what the PR branches from)
BASE_SHA="${{ github.event.pull_request.base.sha }}"

echo "Looking for coverage data starting from base commit: $BASE_SHA"

COVERAGE_SHA=""
for sha in $(git rev-list "$BASE_SHA" --max-count=30); do
# Check if coverage exists in GCS for this commit
if gcloud storage ls "gs://${GCS_BUCKET}/${sha}/" &>/dev/null; then
COVERAGE_SHA="$sha"
echo "Found coverage data at commit: $sha"
break
fi
echo "No coverage at $sha, checking parent..."
done

if [[ -z "$COVERAGE_SHA" ]]; then
echo "No coverage found in last 30 commits, will run full test suite"
echo "found=false" >> "$GITHUB_OUTPUT"
else
echo "found=true" >> "$GITHUB_OUTPUT"
echo "coverage-sha=$COVERAGE_SHA" >> "$GITHUB_OUTPUT"
fi

- name: Download coverage database
id: download-coverage
if: steps.find-coverage.outputs.found == 'true'
env:
COVERAGE_SHA: ${{ steps.find-coverage.outputs.coverage-sha }}
run: |
set -euxo pipefail
mkdir -p .coverage
gcloud storage cp "gs://sentry-coverage-data/${COVERAGE_SHA}/*" .coverage/ || true

# Find the coverage file (could be .coverage.* format)
COVERAGE_FILE=$(ls .coverage/.coverage.* 2>/dev/null | head -1 || true)
if [[ -z "$COVERAGE_FILE" ]]; then
echo "Warning: No coverage file found in downloaded data"
ls -la .coverage/ || true
echo "coverage-file=" >> "$GITHUB_OUTPUT"
else
echo "Downloaded coverage file: $COVERAGE_FILE"
echo "coverage-file=$COVERAGE_FILE" >> "$GITHUB_OUTPUT"
fi

- name: Get changed files
id: changed-files
run: |
# Get files changed between base and head of PR
BASE_SHA="${{ github.event.pull_request.base.sha }}"
HEAD_SHA="${{ github.event.pull_request.head.sha }}"

CHANGED_FILES=$(git diff --name-only "$BASE_SHA" "$HEAD_SHA" | tr '\n' ' ')
echo "Changed files: $CHANGED_FILES"
echo "files=$CHANGED_FILES" >> "$GITHUB_OUTPUT"

- name: Compute selected tests
id: compute-tests
if: steps.download-coverage.outputs.coverage-file != ''
run: |
python3 .github/workflows/scripts/compute-selected-tests.py \
--coverage-db "${{ steps.download-coverage.outputs.coverage-file }}" \
--changed-files "${{ steps.changed-files.outputs.files }}" \
--output .artifacts/selected-tests.txt \
--github-output

- name: Upload coverage database artifact
if: steps.download-coverage.outputs.coverage-file != ''
uses: actions/upload-artifact@v4
with:
name: coverage-db-${{ github.run_id }}
path: .coverage/
retention-days: 1
include-hidden-files: true

- name: Upload selected tests artifact
if: steps.compute-tests.outputs.has-selected-tests == 'true'
uses: actions/upload-artifact@v4
with:
name: selected-tests-${{ github.run_id }}
path: .artifacts/selected-tests.txt
retention-days: 1

calculate-shards:
if: needs.files-changed.outputs.backend == 'true'
needs: [files-changed, prepare-selective-tests]
name: calculate test shards
runs-on: ubuntu-24.04
timeout-minutes: 5
outputs:
shard-count: ${{ steps.calculate-shards.outputs.shard-count }}
shard-indices: ${{ steps.calculate-shards.outputs.shard-indices }}
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7

- name: Setup sentry env
uses: ./.github/actions/setup-sentry
id: setup
with:
mode: backend-ci
skip-devservices: true

- name: Download selected tests artifact
if: needs.prepare-selective-tests.outputs.has-selected-tests == 'true'
uses: actions/download-artifact@v4
with:
name: selected-tests-${{ github.run_id }}
path: .artifacts/

- name: Calculate test shards
id: calculate-shards
env:
SELECTED_TESTS_FILE: ${{ needs.prepare-selective-tests.outputs.has-selected-tests == 'true' && '.artifacts/selected-tests.txt' || '' }}
SELECTED_TEST_COUNT: ${{ needs.prepare-selective-tests.outputs.test-count }}
run: |
python3 .github/workflows/scripts/calculate-backend-test-shards.py

backend-test-selective:
if: needs.files-changed.outputs.backend == 'true'
needs: [files-changed, prepare-selective-tests, calculate-shards]
name: backend test
runs-on: ubuntu-24.04
timeout-minutes: 60
permissions:
contents: read
id-token: write
actions: read
strategy:
fail-fast: false
matrix:
instance: ${{ fromJSON(needs.calculate-shards.outputs.shard-indices) }}

env:
MATRIX_INSTANCE_TOTAL: ${{ needs.calculate-shards.outputs.shard-count }}
TEST_GROUP_STRATEGY: roundrobin

steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
fetch-depth: 0

- name: Setup sentry env
uses: ./.github/actions/setup-sentry
id: setup
with:
mode: backend-ci

- name: Download coverage database artifact
if: needs.prepare-selective-tests.outputs.has-coverage == 'true'
uses: actions/download-artifact@v4
with:
name: coverage-db-${{ github.run_id }}
path: .coverage/

- name: Find coverage file
id: find-coverage-file
if: needs.prepare-selective-tests.outputs.has-coverage == 'true'
run: |
COVERAGE_FILE=$(ls .coverage/.coverage.* 2>/dev/null | head -1 || true)
echo "coverage-file=$COVERAGE_FILE" >> "$GITHUB_OUTPUT"

- name: Run backend tests (${{ steps.setup.outputs.matrix-instance-number }} of ${{ steps.setup.outputs.matrix-instance-total }})
id: run_backend_tests
run: make test-python-ci
env:
CHANGED_FILES: ${{ needs.prepare-selective-tests.outputs.changed-files }}
COVERAGE_DB_PATH: ${{ steps.find-coverage-file.outputs.coverage-file }}

- name: Inspect failure
if: failure()
run: |
if command -v devservices; then
devservices logs
fi

- name: Collect test data
uses: ./.github/actions/collect-test-data
if: ${{ !cancelled() }}
with:
artifact_path: .artifacts/pytest.json
gcs_bucket: ${{ secrets.COLLECT_TEST_DATA_GCS_BUCKET }}
gcp_project_id: ${{ secrets.COLLECT_TEST_DATA_GCP_PROJECT_ID }}
workload_identity_provider: ${{ secrets.SENTRY_GCP_DEV_WORKLOAD_IDENTITY_POOL }}
service_account_email: ${{ secrets.COLLECT_TEST_DATA_SERVICE_ACCOUNT_EMAIL }}
matrix_instance_number: ${{ steps.setup.outputs.matrix-instance-number }}
32 changes: 27 additions & 5 deletions .github/workflows/scripts/calculate-backend-test-shards.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,20 @@
import re
import subprocess
import sys
from pathlib import Path

TESTS_PER_SHARD = 1200
MIN_SHARDS = 1
MAX_SHARDS = 22
DEFAULT_SHARDS = 22

PYTEST_ARGS = [
PYTEST_BASE_ARGS = [
"pytest",
"--collect-only",
"--quiet",
"tests",
]

PYTEST_IGNORE_ARGS = [
"--ignore=tests/acceptance",
"--ignore=tests/apidocs",
"--ignore=tests/js",
Expand All @@ -24,9 +27,30 @@


def collect_test_count():
"""Collect test count, optionally filtering to selected test files."""
selected_tests_file = os.environ.get("SELECTED_TESTS_FILE")

if selected_tests_file:
path = Path(selected_tests_file)
if not path.exists():
print(f"Selected tests file not found: {selected_tests_file}", file=sys.stderr)
return None

with path.open() as f:
selected_files = [line.strip() for line in f if line.strip()]

if not selected_files:
print("No selected test files, running 0 tests", file=sys.stderr)
return 0

print(f"Counting tests in {len(selected_files)} selected files", file=sys.stderr)
pytest_args = PYTEST_BASE_ARGS + selected_files
else:
pytest_args = PYTEST_BASE_ARGS + ["tests"] + PYTEST_IGNORE_ARGS

try:
result = subprocess.run(
PYTEST_ARGS,
pytest_args,
capture_output=True,
text=True,
check=False,
Expand All @@ -40,7 +64,6 @@ def collect_test_count():
print(f"Collected {count} tests", file=sys.stderr)
return count

# If no match, check if pytest failed
if result.returncode != 0:
print(
f"Pytest collection failed (exit {result.returncode})",
Expand Down Expand Up @@ -85,7 +108,6 @@ def calculate_shards(test_count):
def main():
test_count = collect_test_count()
shard_count = calculate_shards(test_count)
# Generate a JSON array of shard indices [0, 1, 2, ..., shard_count-1]
shard_indices = json.dumps(list(range(shard_count)))

github_output = os.getenv("GITHUB_OUTPUT")
Expand Down
Loading
Loading