diff --git a/.github/workflows/backend-selective.yml b/.github/workflows/backend-selective.yml new file mode 100644 index 00000000000000..fc01d10520c6a3 --- /dev/null +++ b/.github/workflows/backend-selective.yml @@ -0,0 +1,271 @@ +name: backend (selective) + +on: + pull_request: + +# Cancel in progress workflows on pull_requests. +# https://docs.github.com/en/actions/using-jobs/using-concurrency#example-using-a-fallback-value +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +# hack for https://github.com/actions/cache/issues/810#issuecomment-1222550359 +env: + SEGMENT_DOWNLOAD_TIMEOUT_MINS: 3 + SNUBA_NO_WORKERS: 1 + +jobs: + files-changed: + name: detect what files changed + runs-on: ubuntu-24.04 + timeout-minutes: 3 + outputs: + api_docs: ${{ steps.changes.outputs.api_docs }} + backend: ${{ steps.changes.outputs.backend_all }} + backend_dependencies: ${{ steps.changes.outputs.backend_dependencies }} + backend_api_urls: ${{ steps.changes.outputs.backend_api_urls }} + backend_any_type: ${{ steps.changes.outputs.backend_any_type }} + migration_lockfile: ${{ steps.changes.outputs.migration_lockfile }} + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Check for backend file changes + uses: dorny/paths-filter@0bc4621a3135347011ad047f9ecf449bf72ce2bd # v3.0.0 + id: changes + with: + token: ${{ github.token }} + filters: .github/file-filters.yml + + prepare-selective-tests: + if: needs.files-changed.outputs.backend == 'true' + needs: files-changed + name: prepare selective tests + runs-on: ubuntu-24.04 + timeout-minutes: 10 + permissions: + contents: read + id-token: write + outputs: + has-coverage: ${{ steps.find-coverage.outputs.found }} + coverage-sha: ${{ steps.find-coverage.outputs.coverage-sha }} + changed-files: ${{ steps.changed-files.outputs.files }} + test-count: ${{ steps.compute-tests.outputs.test-count }} + has-selected-tests: ${{ steps.compute-tests.outputs.has-selected-tests }} + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + fetch-depth: 0 # Need full history for git diff + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.13.1' + + - name: Authenticate to Google Cloud + id: gcloud-auth + uses: google-github-actions/auth@v2 + with: + project_id: sentry-dev-tooling + workload_identity_provider: ${{ secrets.SENTRY_GCP_DEV_WORKLOAD_IDENTITY_POOL }} + service_account: ${{ secrets.COLLECT_TEST_DATA_SERVICE_ACCOUNT_EMAIL }} + + - name: Find coverage data for selective testing + id: find-coverage + env: + GCS_BUCKET: sentry-coverage-data + run: | + set -euo pipefail + + # Get the base commit (what the PR branches from) + BASE_SHA="${{ github.event.pull_request.base.sha }}" + + echo "Looking for coverage data starting from base commit: $BASE_SHA" + + COVERAGE_SHA="" + for sha in $(git rev-list "$BASE_SHA" --max-count=30); do + # Check if coverage exists in GCS for this commit + if gcloud storage ls "gs://${GCS_BUCKET}/${sha}/" &>/dev/null; then + COVERAGE_SHA="$sha" + echo "Found coverage data at commit: $sha" + break + fi + echo "No coverage at $sha, checking parent..." + done + + if [[ -z "$COVERAGE_SHA" ]]; then + echo "No coverage found in last 30 commits, will run full test suite" + echo "found=false" >> "$GITHUB_OUTPUT" + else + echo "found=true" >> "$GITHUB_OUTPUT" + echo "coverage-sha=$COVERAGE_SHA" >> "$GITHUB_OUTPUT" + fi + + - name: Download coverage database + id: download-coverage + if: steps.find-coverage.outputs.found == 'true' + env: + COVERAGE_SHA: ${{ steps.find-coverage.outputs.coverage-sha }} + run: | + set -euxo pipefail + mkdir -p .coverage + + if ! gcloud storage cp "gs://sentry-coverage-data/${COVERAGE_SHA}/.coverage.combined" .coverage/; then + echo "Warning: Failed to download coverage file" + echo "coverage-file=" >> "$GITHUB_OUTPUT" + exit 0 + fi + + if [[ ! -f .coverage/.coverage.combined ]]; then + echo "Warning: Coverage file not found after download" + ls -la .coverage/ || true + echo "coverage-file=" >> "$GITHUB_OUTPUT" + else + echo "Downloaded coverage file: .coverage/.coverage.combined" + echo "coverage-file=.coverage/.coverage.combined" >> "$GITHUB_OUTPUT" + fi + + - name: Get changed files + id: changed-files + run: | + # Get files changed between base and head of PR + BASE_SHA="${{ github.event.pull_request.base.sha }}" + HEAD_SHA="${{ github.event.pull_request.head.sha }}" + + CHANGED_FILES=$(git diff --name-only "$BASE_SHA" "$HEAD_SHA" | tr '\n' ' ') + echo "Changed files: $CHANGED_FILES" + echo "files=$CHANGED_FILES" >> "$GITHUB_OUTPUT" + + - name: Compute selected tests + id: compute-tests + if: steps.download-coverage.outputs.coverage-file != '' + env: + COVERAGE_DB: ${{ steps.download-coverage.outputs.coverage-file }} + CHANGED_FILES: ${{ steps.changed-files.outputs.files }} + IS_CI: true + run: make compute-selected-tests + + - name: Upload coverage database artifact + if: steps.download-coverage.outputs.coverage-file != '' + uses: actions/upload-artifact@v4 + with: + name: coverage-db-${{ github.run_id }} + path: .coverage/ + retention-days: 1 + include-hidden-files: true + + - name: Upload selected tests artifact + if: steps.compute-tests.outputs.has-selected-tests == 'true' + uses: actions/upload-artifact@v4 + with: + name: selected-tests-${{ github.run_id }} + path: .artifacts/selected-tests.txt + retention-days: 1 + + calculate-shards: + if: needs.files-changed.outputs.backend == 'true' + needs: [files-changed, prepare-selective-tests] + name: calculate test shards + runs-on: ubuntu-24.04 + timeout-minutes: 5 + outputs: + shard-count: ${{ steps.calculate-shards.outputs.shard-count }} + shard-indices: ${{ steps.calculate-shards.outputs.shard-indices }} + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Setup sentry env + uses: ./.github/actions/setup-sentry + id: setup + with: + mode: backend-ci + skip-devservices: true + + - name: Download selected tests artifact + if: needs.prepare-selective-tests.outputs.has-selected-tests == 'true' + uses: actions/download-artifact@v4 + with: + name: selected-tests-${{ github.run_id }} + path: .artifacts/ + + - name: Calculate test shards + id: calculate-shards + env: + SELECTED_TESTS_FILE: ${{ needs.prepare-selective-tests.outputs.has-selected-tests == 'true' && '.artifacts/selected-tests.txt' || '' }} + SELECTED_TEST_COUNT: ${{ needs.prepare-selective-tests.outputs.test-count }} + run: | + python3 .github/workflows/scripts/calculate-backend-test-shards.py + + backend-test-selective: + if: needs.files-changed.outputs.backend == 'true' + needs: [files-changed, prepare-selective-tests, calculate-shards] + name: backend test + runs-on: ubuntu-24.04 + timeout-minutes: 60 + permissions: + contents: read + id-token: write + actions: read + strategy: + fail-fast: false + matrix: + instance: ${{ fromJSON(needs.calculate-shards.outputs.shard-indices) }} + + env: + MATRIX_INSTANCE_TOTAL: ${{ needs.calculate-shards.outputs.shard-count }} + TEST_GROUP_STRATEGY: roundrobin + + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + fetch-depth: 0 + + - name: Setup sentry env + uses: ./.github/actions/setup-sentry + id: setup + with: + mode: backend-ci + + - name: Download coverage database artifact + id: download-coverage + if: needs.prepare-selective-tests.outputs.has-coverage == 'true' + continue-on-error: true + uses: actions/download-artifact@v4 + with: + name: coverage-db-${{ github.run_id }} + path: .coverage/ + + - name: Find coverage file + id: find-coverage-file + run: | + if [[ -f .coverage/.coverage.combined ]]; then + echo "coverage-file=.coverage/.coverage.combined" >> "$GITHUB_OUTPUT" + echo "Coverage file found, selective testing enabled" + else + echo "coverage-file=" >> "$GITHUB_OUTPUT" + echo "No coverage file found, running full test suite" + fi + + - name: Run backend tests (${{ steps.setup.outputs.matrix-instance-number }} of ${{ steps.setup.outputs.matrix-instance-total }}) + id: run_backend_tests + run: make test-python-ci + env: + CHANGED_FILES: ${{ needs.prepare-selective-tests.outputs.changed-files }} + COVERAGE_DB_PATH: ${{ steps.find-coverage-file.outputs.coverage-file }} + + - name: Inspect failure + if: failure() + run: | + if command -v devservices; then + devservices logs + fi + + - name: Collect test data + uses: ./.github/actions/collect-test-data + if: ${{ !cancelled() }} + with: + artifact_path: .artifacts/pytest.json + gcs_bucket: ${{ secrets.COLLECT_TEST_DATA_GCS_BUCKET }} + gcp_project_id: ${{ secrets.COLLECT_TEST_DATA_GCP_PROJECT_ID }} + workload_identity_provider: ${{ secrets.SENTRY_GCP_DEV_WORKLOAD_IDENTITY_POOL }} + service_account_email: ${{ secrets.COLLECT_TEST_DATA_SERVICE_ACCOUNT_EMAIL }} + matrix_instance_number: ${{ steps.setup.outputs.matrix-instance-number }} diff --git a/.github/workflows/development-environment.yml b/.github/workflows/development-environment.yml deleted file mode 100644 index 42804f51630dda..00000000000000 --- a/.github/workflows/development-environment.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: dev env -on: - pull_request: - paths: - - '.pre-commit-config.yaml' - - 'Makefile' - - '.github/workflows/development-environment.yml' - - 'requirements-*.txt' - - 'pyproject.toml' - - 'uv.lock' - - '.python-version' - - '.envrc' - - 'Brewfile' - - 'scripts/**' - - 'tools/**' - - 'src/sentry/runner/commands/devserver.py' - - 'src/sentry/runner/commands/devservices.py' - - 'bin/load-mocks' - -# Cancel in progress workflows on pull_requests. -# https://docs.github.com/en/actions/using-jobs/using-concurrency#example-using-a-fallback-value -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -# hack for https://github.com/actions/cache/issues/810#issuecomment-1222550359 -env: - SEGMENT_DOWNLOAD_TIMEOUT_MINS: 3 - -jobs: - test: - runs-on: ubuntu-24.04 - timeout-minutes: 5 - steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - - - uses: astral-sh/setup-uv@884ad927a57e558e7a70b92f2bccf9198a4be546 # v6 - with: - version: '0.8.2' - # we just cache the venv-dir directly in action-setup-venv - enable-cache: false - - - uses: getsentry/action-setup-venv@5a80476d175edf56cb205b08bc58986fa99d1725 # v3.2.0 - with: - cache-dependency-path: uv.lock - install-cmd: uv sync --only-dev --frozen --active - - - name: test-tools - run: make test-tools - - devenv: - runs-on: ubuntu-24.04 - timeout-minutes: 10 - steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - - - uses: astral-sh/setup-uv@884ad927a57e558e7a70b92f2bccf9198a4be546 # v6 - with: - version: '0.8.2' - # we just cache the venv-dir directly in action-setup-venv - enable-cache: false - - - uses: getsentry/action-setup-venv@5a80476d175edf56cb205b08bc58986fa99d1725 # v3.2.0 - with: - cache-dependency-path: uv.lock - # technically we can just use --only-dev but more cache is nice - install-cmd: uv sync --frozen --active - - - name: devenv sync - run: | - devenv --nocoderoot sync diff --git a/.github/workflows/scripts/calculate-backend-test-shards.py b/.github/workflows/scripts/calculate-backend-test-shards.py index fca159736127d4..13049589716796 100755 --- a/.github/workflows/scripts/calculate-backend-test-shards.py +++ b/.github/workflows/scripts/calculate-backend-test-shards.py @@ -5,17 +5,20 @@ import re import subprocess import sys +from pathlib import Path TESTS_PER_SHARD = 1200 MIN_SHARDS = 1 MAX_SHARDS = 22 DEFAULT_SHARDS = 22 -PYTEST_ARGS = [ +PYTEST_BASE_ARGS = [ "pytest", "--collect-only", "--quiet", - "tests", +] + +PYTEST_IGNORE_ARGS = [ "--ignore=tests/acceptance", "--ignore=tests/apidocs", "--ignore=tests/js", @@ -24,9 +27,30 @@ def collect_test_count(): + """Collect test count, optionally filtering to selected test files.""" + selected_tests_file = os.environ.get("SELECTED_TESTS_FILE") + + if selected_tests_file: + path = Path(selected_tests_file) + if not path.exists(): + print(f"Selected tests file not found: {selected_tests_file}", file=sys.stderr) + return None + + with path.open() as f: + selected_files = [line.strip() for line in f if line.strip()] + + if not selected_files: + print("No selected test files, running 0 tests", file=sys.stderr) + return 0 + + print(f"Counting tests in {len(selected_files)} selected files", file=sys.stderr) + pytest_args = PYTEST_BASE_ARGS + selected_files + else: + pytest_args = PYTEST_BASE_ARGS + ["tests"] + PYTEST_IGNORE_ARGS + try: result = subprocess.run( - PYTEST_ARGS, + pytest_args, capture_output=True, text=True, check=False, @@ -40,7 +64,6 @@ def collect_test_count(): print(f"Collected {count} tests", file=sys.stderr) return count - # If no match, check if pytest failed if result.returncode != 0: print( f"Pytest collection failed (exit {result.returncode})", @@ -85,7 +108,6 @@ def calculate_shards(test_count): def main(): test_count = collect_test_count() shard_count = calculate_shards(test_count) - # Generate a JSON array of shard indices [0, 1, 2, ..., shard_count-1] shard_indices = json.dumps(list(range(shard_count))) github_output = os.getenv("GITHUB_OUTPUT") diff --git a/.github/workflows/scripts/compute-selected-tests.py b/.github/workflows/scripts/compute-selected-tests.py new file mode 100644 index 00000000000000..1a8b13bbe03cef --- /dev/null +++ b/.github/workflows/scripts/compute-selected-tests.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +""" +Compute selected tests based on coverage data and changed files. + +This script queries a coverage database to find which test files cover +the changed source files, outputting the list for selective test runs. +""" +from __future__ import annotations + +import argparse +import os +import sqlite3 +import sys +from pathlib import Path + +PYTEST_IGNORED_FILES = [ + "sentry/testutils/pytest/sentry.py", +] + + +def get_affected_test_files(coverage_db_path: str, changed_files: list[str]) -> set[str]: + """Query coverage DB to find test files that cover the changed source files.""" + affected_test_files: set[str] = set() + + conn = sqlite3.connect(coverage_db_path) + cur = conn.cursor() + + # Verify required tables exist (need context tracking enabled) + tables = { + r[0] for r in cur.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall() + } + if "line_bits" not in tables or "context" not in tables: + raise ValueError( + "Coverage database missing line_bits/context tables. " + "Coverage must be collected with --cov-context=test" + ) + + test_contexts: set[str] = set() + + for file_path in changed_files: + if any(file_path.endswith(ignored_file) for ignored_file in PYTEST_IGNORED_FILES): + continue + + cleaned_file_path = file_path + if cleaned_file_path.startswith("/src"): + cleaned_file_path = cleaned_file_path[len("/src") :] + + cur.execute( + """ + SELECT c.context, lb.numbits + FROM line_bits lb + JOIN file f ON lb.file_id = f.id + JOIN context c ON lb.context_id = c.id + WHERE f.path LIKE '%' || ? + AND c.context != '' + """, + (f"%{cleaned_file_path}",), + ) + + for context, bitblob in cur.fetchall(): + if any(b != 0 for b in bytes(bitblob)): + test_contexts.add(context) + + conn.close() + + # Extract test file paths from contexts + # Context format: 'tests/foo/bar.py::TestClass::test_function|run' + for context in test_contexts: + test_file = context.split("::", 1)[0] + affected_test_files.add(test_file) + + return affected_test_files + + +def main() -> int: + parser = argparse.ArgumentParser(description="Compute selected tests from coverage data") + parser.add_argument("--coverage-db", required=True, help="Path to coverage SQLite database") + parser.add_argument( + "--changed-files", required=True, help="Space-separated list of changed files" + ) + parser.add_argument("--output", help="Output file path for selected test files (one per line)") + parser.add_argument("--github-output", action="store_true", help="Write to GITHUB_OUTPUT") + args = parser.parse_args() + + coverage_db = Path(args.coverage_db) + if not coverage_db.exists(): + print(f"Error: Coverage database not found: {coverage_db}", file=sys.stderr) + return 1 + + changed_files = [f.strip() for f in args.changed_files.split() if f.strip()] + if not changed_files: + print("No changed files provided, selecting all tests") + affected_test_files: set[str] = set() + else: + print(f"Computing selected tests for {len(changed_files)} changed files...") + try: + affected_test_files = get_affected_test_files(str(coverage_db), changed_files) + except sqlite3.Error as e: + print(f"Error querying coverage database: {e}", file=sys.stderr) + return 1 + + print(f"Found {len(affected_test_files)} affected test files") + + if args.output: + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w") as f: + for test_file in sorted(affected_test_files): + f.write(f"{test_file}\n") + print(f"Wrote selected tests to {output_path}") + + if args.github_output: + github_output = os.environ.get("GITHUB_OUTPUT") + if github_output: + with open(github_output, "a") as f: + f.write(f"test-count={len(affected_test_files)}\n") + f.write(f"has-selected-tests={'true' if affected_test_files else 'false'}\n") + print(f"Wrote to GITHUB_OUTPUT: test-count={len(affected_test_files)}") + + if affected_test_files: + print("\nAffected test files:") + for test_file in sorted(affected_test_files): + print(f" {test_file}") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/Makefile b/Makefile index 6d4c283b6a25fd..3f778688daf302 100644 --- a/Makefile +++ b/Makefile @@ -134,6 +134,15 @@ test-python-ci: -o junit_suite_name=pytest @echo "" +compute-selected-tests: + @echo "--> Computing selected tests from coverage data" + python3 .github/workflows/scripts/compute-selected-tests.py \ + --coverage-db "$(COVERAGE_DB)" \ + --changed-files "$(CHANGED_FILES)" \ + --output .artifacts/selected-tests.txt \ + $(if $(IS_CI),--github-output,) + @echo "" + # it's not possible to change settings.DATABASE after django startup, so # unfortunately these tests must be run in a separate pytest process. References: # * https://docs.djangoproject.com/en/4.2/topics/testing/tools/#overriding-settings diff --git a/src/sentry/preprod/size_analysis/compare.py b/src/sentry/preprod/size_analysis/compare.py index 42d9b309236bdd..2996ca8c9d7cab 100644 --- a/src/sentry/preprod/size_analysis/compare.py +++ b/src/sentry/preprod/size_analysis/compare.py @@ -155,6 +155,8 @@ def compare_size_analysis( base_download_size=base_size_analysis.max_download_size, ) + # Placeholder + # Compare insights only if we're not skipping the comparison insight_diff_items = [] if not skip_diff_item_comparison: diff --git a/src/sentry/testutils/pytest/selective_testing.py b/src/sentry/testutils/pytest/selective_testing.py new file mode 100644 index 00000000000000..f284d6232c7d19 --- /dev/null +++ b/src/sentry/testutils/pytest/selective_testing.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import sqlite3 + +import pytest + +PYTEST_IGNORED_FILES = [ + # the pytest code itself is not part of the test suite but will be referenced by most tests + "sentry/testutils/pytest/sentry.py", +] + + +def filter_items_by_coverage( + config: pytest.Config, + items: list[pytest.Item], + changed_files: list[str], + coverage_db_path: str, +) -> tuple[list[pytest.Item], list[pytest.Item], set[str]]: + affected_test_files = set() + try: + conn = sqlite3.connect(coverage_db_path) + cur = conn.cursor() + + test_contexts = set() + + for file_path in changed_files: + if any(file_path.endswith(ignored_file) for ignored_file in PYTEST_IGNORED_FILES): + continue + + cleaned_file_path = file_path + if cleaned_file_path.startswith("/src"): + cleaned_file_path = cleaned_file_path[len("/src") :] + + cur.execute( + """ + SELECT c.context, lb.numbits + FROM line_bits lb + JOIN file f ON lb.file_id = f.id + JOIN context c ON lb.context_id = c.id + WHERE f.path LIKE '%' || ? + AND c.context != '' + """, + (f"%{cleaned_file_path}",), + ) + + for context, bitblob in cur.fetchall(): + if any(b != 0 for b in bytes(bitblob)): + test_contexts.add(context) + + conn.close() + + # Extract test file paths from contexts + # Context format: 'tests/foo/bar.py::TestClass::test_function|run' + for context in test_contexts: + test_file = context.split("::", 1)[0] + affected_test_files.add(test_file) + + except Exception as e: + raise ValueError(f"Could not query coverage database: {e}") from e + + config.get_terminal_writer().line(f"Found {len(affected_test_files)} affected test files") + config.get_terminal_writer().line(f"Affected test files: {affected_test_files}") + + selected_items = [] + discarded_items = [] + + for item in items: + test_file = item.nodeid.split("::", 1)[0] + if test_file in affected_test_files: + selected_items.append(item) + else: + discarded_items.append(item) + + return selected_items, discarded_items, affected_test_files diff --git a/src/sentry/testutils/pytest/sentry.py b/src/sentry/testutils/pytest/sentry.py index 547e8dfc0630f8..b9aaa86eb00758 100644 --- a/src/sentry/testutils/pytest/sentry.py +++ b/src/sentry/testutils/pytest/sentry.py @@ -399,6 +399,39 @@ def _shuffle_d(dct: dict[K, V]) -> dict[K, V]: def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None: """After collection, we need to select tests based on group and group strategy""" + # Selective test filtering based on coverage data + # If COVERAGE_DB_PATH and CHANGED_FILES are set, filter to tests that cover those files + coverage_db_path = os.environ.get("COVERAGE_DB_PATH") + changed_files_str = os.environ.get("CHANGED_FILES") + + if coverage_db_path and changed_files_str: + from sentry.testutils.pytest.selective_testing import filter_items_by_coverage + + changed_files = [f.strip() for f in changed_files_str.split() if f.strip()] + + if changed_files: + original_count = len(items) + try: + selected_items, deselected_items, _ = filter_items_by_coverage( + config=config, + items=items, + changed_files=changed_files, + coverage_db_path=coverage_db_path, + ) + + items[:] = selected_items + + if deselected_items: + config.hook.pytest_deselected(items=deselected_items) + + config.get_terminal_writer().line( + f"Selective testing: {len(items)}/{original_count} tests selected based on coverage" + ) + except ValueError as e: + config.get_terminal_writer().line( + f"Warning: Selective testing failed ({e}), running all tests" + ) + total_groups = int(os.environ.get("TOTAL_TEST_GROUPS", 1)) current_group = int(os.environ.get("TEST_GROUP", 0)) grouping_strategy = os.environ.get("TEST_GROUP_STRATEGY", "scope")