From 396a1f52e4770595bf300a644eff409dba7cdf00 Mon Sep 17 00:00:00 2001 From: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:57:51 +0000 Subject: [PATCH] Replace trufflehog with detect-secrets (#4404) * Added secret-scan Signed-off-by: Elena Khaustova * Removed trufflehog Signed-off-by: Elena Khaustova * Removed secret scan pre-commit hook Signed-off-by: Elena Khaustova * Added action to detect secrets Signed-off-by: Elena Khaustova * Renamed scan command for GH action Signed-off-by: Elena Khaustova * Detect secrets test Signed-off-by: Elena Khaustova * Removed test check Signed-off-by: Elena Khaustova * Updated release notes Signed-off-by: Elena Khaustova --------- Signed-off-by: Elena Khaustova --- .github/workflows/all-checks.yml | 12 +- .github/workflows/detect-secrets.yml | 38 +++++ .pre-commit-config.yaml | 10 +- .secrets.baseline | 219 +++++++++++++++++++++++++++ Makefile | 3 - RELEASE.md | 3 +- pyproject.toml | 2 +- trufflehog-ignore.txt | 13 -- 8 files changed, 275 insertions(+), 25 deletions(-) create mode 100644 .github/workflows/detect-secrets.yml create mode 100644 .secrets.baseline delete mode 100644 trufflehog-ignore.txt diff --git a/.github/workflows/all-checks.yml b/.github/workflows/all-checks.yml index c108d1ad42..3b8dbab421 100644 --- a/.github/workflows/all-checks.yml +++ b/.github/workflows/all-checks.yml @@ -54,7 +54,6 @@ jobs: python-version: ${{ matrix.python-version }} branch: ${{ inputs.branch }} - pip-compile: strategy: matrix: @@ -65,3 +64,14 @@ jobs: os: ${{ matrix.os }} python-version: ${{ matrix.python-version }} branch: ${{ inputs.branch }} + + detect-secrets: + strategy: + matrix: + os: [ ubuntu-latest ] + python-version: [ "3.11" ] + uses: ./.github/workflows/detect-secrets.yml + with: + os: ${{ matrix.os }} + python-version: ${{ matrix.python-version }} + branch: ${{ inputs.branch }} diff --git a/.github/workflows/detect-secrets.yml b/.github/workflows/detect-secrets.yml new file mode 100644 index 0000000000..15c3d77524 --- /dev/null +++ b/.github/workflows/detect-secrets.yml @@ -0,0 +1,38 @@ +name: Detect secrets on Kedro + +on: + workflow_call: + inputs: + os: + type: string + python-version: + type: string + branch: + type: string + default: '' + +jobs: + lint: + runs-on: ${{ inputs.os }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: ${{ inputs.branch }} + - name: Set up Python ${{ inputs.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + - name: Cache python packages + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{inputs.os}}-python-${{inputs.python-version}} + - name: Install dependencies + run: | + make install-test-requirements + make install-pre-commit + - name: pip freeze + run: uv pip freeze --system + - name: Scan all tracked files + run: git ls-files -z | xargs -0 detect-secrets-hook --baseline .secrets.baseline diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index aea1855741..fd0e331184 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,11 +40,9 @@ repos: pass_filenames: false entry: lint-imports - - repo: local + - repo: https://github.com/Yelp/detect-secrets + rev: v1.5.0 hooks: - - id: secret_scan - name: "Secret scan" - language: system + - id: detect-secrets + args: ['--baseline', '.secrets.baseline'] exclude: ^features/steps/test_starter - pass_filenames: false - entry: make secret-scan diff --git a/.secrets.baseline b/.secrets.baseline new file mode 100644 index 0000000000..965f48cbac --- /dev/null +++ b/.secrets.baseline @@ -0,0 +1,219 @@ +{ + "version": "1.5.0", + "plugins_used": [ + { + "name": "ArtifactoryDetector" + }, + { + "name": "AWSKeyDetector" + }, + { + "name": "AzureStorageKeyDetector" + }, + { + "name": "Base64HighEntropyString", + "limit": 4.5 + }, + { + "name": "BasicAuthDetector" + }, + { + "name": "CloudantDetector" + }, + { + "name": "DiscordBotTokenDetector" + }, + { + "name": "GitHubTokenDetector" + }, + { + "name": "GitLabTokenDetector" + }, + { + "name": "HexHighEntropyString", + "limit": 3.0 + }, + { + "name": "IbmCloudIamDetector" + }, + { + "name": "IbmCosHmacDetector" + }, + { + "name": "IPPublicDetector" + }, + { + "name": "JwtTokenDetector" + }, + { + "name": "KeywordDetector", + "keyword_exclude": "" + }, + { + "name": "MailchimpDetector" + }, + { + "name": "NpmDetector" + }, + { + "name": "OpenAIDetector" + }, + { + "name": "PrivateKeyDetector" + }, + { + "name": "PypiTokenDetector" + }, + { + "name": "SendGridDetector" + }, + { + "name": "SlackDetector" + }, + { + "name": "SoftlayerDetector" + }, + { + "name": "SquareOAuthDetector" + }, + { + "name": "StripeDetector" + }, + { + "name": "TelegramBotTokenDetector" + }, + { + "name": "TwilioKeyDetector" + } + ], + "filters_used": [ + { + "path": "detect_secrets.filters.allowlist.is_line_allowlisted" + }, + { + "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", + "min_level": 2 + }, + { + "path": "detect_secrets.filters.heuristic.is_indirect_reference" + }, + { + "path": "detect_secrets.filters.heuristic.is_likely_id_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_lock_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_potential_uuid" + }, + { + "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" + }, + { + "path": "detect_secrets.filters.heuristic.is_sequential_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_swagger_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_templated_secret" + } + ], + "results": { + "features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/local/credentials.yml": [ + { + "type": "Secret Keyword", + "filename": "features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/local/credentials.yml", + "hashed_secret": "a62f2225bf70bfaccbc7f1ef2a397836717377de", + "is_verified": false, + "line_number": 8 + }, + { + "type": "Secret Keyword", + "filename": "features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/local/credentials.yml", + "hashed_secret": "d033e22ae348aeb5660fc2140aec35850c4da997", + "is_verified": false, + "line_number": 16 + } + ], + "kedro/templates/project/{{ cookiecutter.repo_name }}/conf/local/credentials.yml": [ + { + "type": "Secret Keyword", + "filename": "kedro/templates/project/{{ cookiecutter.repo_name }}/conf/local/credentials.yml", + "hashed_secret": "e5e9fa1ba31ecd1ae84f75caaa474f3a663f05f4", + "is_verified": false, + "line_number": 9 + }, + { + "type": "Secret Keyword", + "filename": "kedro/templates/project/{{ cookiecutter.repo_name }}/conf/local/credentials.yml", + "hashed_secret": "d033e22ae348aeb5660fc2140aec35850c4da997", + "is_verified": false, + "line_number": 18 + } + ], + "tests/config/test_omegaconf_config.py": [ + { + "type": "Basic Auth Credentials", + "filename": "tests/config/test_omegaconf_config.py", + "hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684", + "is_verified": false, + "line_number": 39 + } + ], + "tests/framework/context/test_context.py": [ + { + "type": "Basic Auth Credentials", + "filename": "tests/framework/context/test_context.py", + "hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684", + "is_verified": false, + "line_number": 63 + } + ], + "tests/io/conftest.py": [ + { + "type": "Secret Keyword", + "filename": "tests/io/conftest.py", + "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e", + "is_verified": false, + "line_number": 71 + }, + { + "type": "Secret Keyword", + "filename": "tests/io/conftest.py", + "hashed_secret": "3c3b274d119ff5a5ec6c1e215c1cb794d9973ac1", + "is_verified": false, + "line_number": 117 + }, + { + "type": "Secret Keyword", + "filename": "tests/io/conftest.py", + "hashed_secret": "15dd2c9ccec914f1470b4dccb45789844e49cf70", + "is_verified": false, + "line_number": 131 + } + ], + "tests/io/test_data_catalog.py": [ + { + "type": "Secret Keyword", + "filename": "tests/io/test_data_catalog.py", + "hashed_secret": "15dd2c9ccec914f1470b4dccb45789844e49cf70", + "is_verified": false, + "line_number": 529 + } + ], + "tests/io/test_kedro_data_catalog.py": [ + { + "type": "Secret Keyword", + "filename": "tests/io/test_kedro_data_catalog.py", + "hashed_secret": "15dd2c9ccec914f1470b4dccb45789844e49cf70", + "is_verified": false, + "line_number": 482 + } + ] + }, + "generated_at": "2025-01-08T12:21:43Z" +} diff --git a/Makefile b/Makefile index ccc8b5a63e..bfc0f3b47d 100644 --- a/Makefile +++ b/Makefile @@ -27,9 +27,6 @@ e2e-tests-fast: pip-compile: pip-compile -q -o - -secret-scan: - trufflehog --max_depth 1 --exclude_paths trufflehog-ignore.txt . - build-docs: uv pip install -e ".[docs]" ./docs/build-docs.sh "docs" diff --git a/RELEASE.md b/RELEASE.md index 552fa27f41..f522b029b8 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -2,7 +2,8 @@ ## Major features and improvements * Implemented `KedroDataCatalog.to_config()` method that converts the catalog instance into a configuration format suitable for serialization. -* Improve OmegaConfigLoader performance +* Improve OmegaConfigLoader performance. +* Replaced `trufflehog` with `detect-secrets` for detecting secrets within a code base. ## Bug fixes and other changes * Added validation to ensure dataset versions consistency across catalog. diff --git a/pyproject.toml b/pyproject.toml index 0881b6627b..f98d2ebef5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ dynamic = ["readme", "version"] test = [ "behave==1.2.6", "coverage[toml]", + "detect-secrets~=1.5.0", "import-linter==2.1", "ipylab>=1.0.0", "ipython~=8.10", @@ -72,7 +73,6 @@ test = [ "pytest>=7.2,<9.0", "s3fs>=2021.4, <2025.1", # Upper bound set arbitrarily, to be reassessed in late 2024 "requests_mock", - "trufflehog~=2.1", # mypy related dependencies "pandas-stubs", "types-PyYAML", diff --git a/trufflehog-ignore.txt b/trufflehog-ignore.txt deleted file mode 100644 index cb5551a327..0000000000 --- a/trufflehog-ignore.txt +++ /dev/null @@ -1,13 +0,0 @@ -docs/package.json -docs/package-lock.json -docs/source/meta/images/KedroArchitecture.drawio -docs/source/nodes_and_pipelines/nodes.md -static/img/kedro_gitflow.svg -.idea/ -.git/ -.mypy_cache/ -.coverage.* -.*\.log -.*\.iml -tests/extras/datasets/tensorflow/test_tensorflow_model_dataset.py -docs/source/meta/images/kedro_gitflow.svg