-
-
Notifications
You must be signed in to change notification settings - Fork 30
feat(logs): secret-redaction core for the Logs app #1558
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from tinyagentos.log_redaction import PLACEHOLDER, redact, redact_lines | ||
|
|
||
|
|
||
| class TestKeyValue: | ||
| def test_equals_form(self): | ||
| assert redact("api_key=sk-abc123def456ghi789") == f"api_key={PLACEHOLDER}" | ||
|
|
||
| def test_colon_form(self): | ||
| assert redact("password: hunter2secret") == f"password: {PLACEHOLDER}" | ||
|
|
||
| def test_json_form(self): | ||
| out = redact('{"secret": "topsecretvalue123"}') | ||
| assert "topsecretvalue123" not in out | ||
| assert PLACEHOLDER in out | ||
|
|
||
| def test_flag_form(self): | ||
| assert redact("--token deadbeefcafebabe01") == f"--token {PLACEHOLDER}" | ||
|
|
||
| def test_case_insensitive(self): | ||
| assert redact("PASSWORD=SuperSecret99") == f"PASSWORD={PLACEHOLDER}" | ||
|
|
||
| def test_does_not_match_substring_key(self): | ||
| # "monkey" must not trip the "key" rule. | ||
| assert redact("monkey=banana") == "monkey=banana" | ||
|
|
||
| def test_preserves_surrounding_text(self): | ||
| out = redact("connecting with token=abcdef123456 to host db1") | ||
| assert out == f"connecting with token={PLACEHOLDER} to host db1" | ||
|
|
||
|
|
||
| class TestBearer: | ||
| def test_header(self): | ||
| assert redact("Authorization: Bearer abc123def456ghi") == \ | ||
| f"Authorization: Bearer {PLACEHOLDER}" | ||
|
|
||
|
|
||
| class TestTokenShapes: | ||
| def test_openai_style(self): | ||
| assert redact("using sk-taos-abcdefghij0123456789 now").count(PLACEHOLDER) == 1 | ||
| assert "abcdefghij0123456789" not in redact("sk-taos-abcdefghij0123456789") | ||
|
|
||
| def test_github_pat(self): | ||
| assert PLACEHOLDER in redact("ghp_EXAMPLEEXAMPLEEXAMPLEEXAMPLE00") | ||
|
|
||
| def test_slack(self): | ||
| assert PLACEHOLDER in redact("xoxb-EXAMPLEEXAMPLE-EXAMPLEEXAMPLETOKEN") | ||
|
|
||
| def test_aws_akia(self): | ||
| assert PLACEHOLDER in redact("AKIAEXAMPLEEXAMPLE00") | ||
|
|
||
|
|
||
| class TestPem: | ||
| def test_private_key_block(self): | ||
| block = ( | ||
| "-----BEGIN OPENSSH PRIVATE KEY-----\n" | ||
| "b3BlbnNzaC1rZXktdjEAAAAABG5vbmU\nAAAAAAAA\n" | ||
| "-----END OPENSSH PRIVATE KEY-----" | ||
| ) | ||
| out = redact(f"key material:\n{block}\ndone") | ||
| assert "b3BlbnNz" not in out | ||
| assert out.startswith("key material:") | ||
| assert out.endswith("done") | ||
|
|
||
|
|
||
| class TestConnectionString: | ||
| def test_masks_only_password(self): | ||
| out = redact("dsn=postgres://taos:s3cr3tpw@db.internal:5432/app") | ||
| assert "s3cr3tpw" not in out | ||
| assert "postgres://taos:" in out | ||
| assert "@db.internal:5432/app" in out | ||
|
|
||
|
|
||
| class TestKnownValues: | ||
| def test_literal_value_masked(self): | ||
| out = redact("the model returned plainlookingkey987", known_values=["plainlookingkey987"]) | ||
| assert "plainlookingkey987" not in out | ||
| assert PLACEHOLDER in out | ||
|
|
||
| def test_short_known_value_ignored(self): | ||
| # too short to safely mask -> left alone (no runaway redaction) | ||
| assert redact("abc appears here", known_values=["abc"]) == "abc appears here" | ||
|
|
||
| def test_empty_known_values_noop(self): | ||
| assert redact("nothing sensitive here", known_values=[]) == "nothing sensitive here" | ||
|
|
||
| def test_none_known_values(self): | ||
| assert redact("nothing sensitive here") == "nothing sensitive here" | ||
|
|
||
|
|
||
| class TestSafety: | ||
| def test_empty_string(self): | ||
| assert redact("") == "" | ||
|
|
||
| def test_clean_line_untouched(self): | ||
| line = "2026-07-02 19:00:00 INFO controller ready on port 6969" | ||
| assert redact(line) == line | ||
|
|
||
| def test_redact_lines(self): | ||
| out = redact_lines(["password=abcdef123456", "all good here"]) | ||
| assert out[0] == f"password={PLACEHOLDER}" | ||
| assert out[1] == "all good here" | ||
|
|
||
|
|
||
| class TestFoldedFindings: | ||
| """Negative/regression tests for the review folds (base64 bearer, JWT, | ||
| partial-leak known values, PEM framing, coercion).""" | ||
|
|
||
| def test_base64_bearer_masked_whole(self): | ||
| # + / = must not truncate the value (AWS/OAuth2 token shape). | ||
| out = redact("Authorization: Bearer abc123+def/456ghi789=") | ||
| assert "abc123" not in out | ||
| assert out == f"Authorization: Bearer {PLACEHOLDER}" | ||
|
|
||
| def test_bare_jwt_masked(self): | ||
| jwt = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.SflKxwRJSMeKKF2QT4" | ||
| out = redact(f"id_token={jwt}") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SUGGESTION: The input is Reply with |
||
| assert "SflKxwRJSMeKKF2QT4" not in out | ||
|
|
||
| def test_google_api_key(self): | ||
| assert PLACEHOLDER in redact("AIzaSyA0000000000000000000000000000000X") | ||
|
|
||
| def test_stripe_live_key(self): | ||
| assert PLACEHOLDER in redact("sk_live_0000000000000000abcdef") | ||
|
|
||
| def test_known_value_masks_whole_token(self): | ||
| # secret is a prefix of a longer id: the whole token must vanish, no | ||
| # trailing leak, no garbled placeholder. | ||
| out = redact("request_id=abcdef1234560000", known_values=["abcdef123456"]) | ||
| assert "0000" not in out | ||
| assert "[REDACTED]0" not in out | ||
| assert PLACEHOLDER in out | ||
|
|
||
| def test_pem_keeps_framing(self): | ||
| block = ( | ||
| "-----BEGIN OPENSSH PRIVATE KEY-----\n" | ||
| "b3BlbnNzaC1rZXktdjEAAAAA\n" | ||
| "-----END OPENSSH PRIVATE KEY-----" | ||
| ) | ||
| out = redact(block) | ||
| assert "b3BlbnNz" not in out | ||
| assert "-----BEGIN OPENSSH PRIVATE KEY-----" in out | ||
| assert "-----END OPENSSH PRIVATE KEY-----" in out | ||
|
|
||
| def test_redact_lines_coerces_non_str(self): | ||
| out = redact_lines(["password=abcdef123456", None, 42]) | ||
| assert out[0] == f"password={PLACEHOLDER}" | ||
| assert out[1] == "None" | ||
| assert out[2] == "42" | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,141 @@ | ||||||
| """Redaction for operator-facing log output (Logs app, bug-report bundle). | ||||||
|
|
||||||
| Every log line that leaves the box through the system-logs API passes through | ||||||
| `redact()` first. The threat is a well-meaning operator copying a log bundle | ||||||
| into a public GitHub issue and leaking a live credential that happened to be | ||||||
| logged by a dependency, a stack trace, or an env dump. | ||||||
|
|
||||||
| Design choices: | ||||||
| - Pure functions, no I/O, exhaustively tested. Nothing in the logs path may | ||||||
| bypass this. | ||||||
| - Redact by PATTERN (key=value, bearer tokens, connection strings, private-key | ||||||
| blocks, high-entropy provider-key shapes) AND by KNOWN SECRET VALUE (the | ||||||
| literal values from the secrets store, so a secret logged verbatim is caught | ||||||
| even if it does not match a generic shape). | ||||||
| - Fail closed on the value side: an empty or too-short known value is ignored | ||||||
| rather than redacting everything. | ||||||
| - Never widen a match to swallow surrounding context; replace only the secret | ||||||
| span with a fixed placeholder so the log stays readable. | ||||||
| """ | ||||||
| from __future__ import annotations | ||||||
|
|
||||||
| import re | ||||||
|
|
||||||
| PLACEHOLDER = "[REDACTED]" | ||||||
|
|
||||||
| # Keys whose value must be masked when they appear as key=value / key: value / | ||||||
| # "key": "value" / --key value. Case-insensitive, matched as whole words so | ||||||
| # "monkey" does not trip "key". | ||||||
| _SENSITIVE_KEYS = ( | ||||||
| "password", "passwd", "secret", "token", "api_key", "apikey", "api-key", | ||||||
| "access_key", "access-key", "secret_key", "secret-key", "private_key", | ||||||
| "private-key", "client_secret", "client-secret", "authorization", "auth", | ||||||
| "bearer", "session", "cookie", "credential", "credentials", "passphrase", | ||||||
| "id_token", "refresh_token", "access_token", "jwt", "assertion", | ||||||
| ) | ||||||
| _KEY_ALT = "|".join(sorted((re.escape(k) for k in _SENSITIVE_KEYS), key=len, reverse=True)) | ||||||
|
|
||||||
| # key = value / key: value / "key": "value". The value runs until a quote, | ||||||
| # whitespace, or comma; brackets/URLs are kept in the value so a token is not | ||||||
| # truncated mid-string (leaving a readable tail). Capped to avoid runaway | ||||||
| # matches on pathological single-line input. | ||||||
| _KV_RE = re.compile( | ||||||
| r'(?P<pre>["\']?(?:' + _KEY_ALT + r')["\']?\s*[:=]\s*)' | ||||||
| r'(?P<quote>["\']?)(?P<val>[^\s,"\']{1,4096})(?P=quote)', | ||||||
| re.IGNORECASE, | ||||||
| ) | ||||||
|
|
||||||
| # --key value (CLI flag form, space-separated). | ||||||
| _FLAG_RE = re.compile(r'(?P<pre>--(?:' + _KEY_ALT + r')\s+)(?P<val>\S+)', re.IGNORECASE) | ||||||
|
|
||||||
| # Auth SCHEME words that legitimately follow "authorization:"; the real secret | ||||||
| # is the NEXT token (handled by the bearer rule), so the KV rule must not treat | ||||||
| # the scheme word itself as the value and stop there, leaving the token exposed. | ||||||
| _AUTH_SCHEMES = {"bearer", "basic", "digest", "token", "negotiate"} | ||||||
|
|
||||||
| # Authorization: Bearer <token> (header form). The value class includes the | ||||||
| # base64/base64url trailing set (+ / =) so AWS session tokens and OAuth2 | ||||||
| # access tokens are masked whole, not truncated at the first + or /. | ||||||
| _BEARER_RE = re.compile(r'(?P<pre>bearer\s+)(?P<val>[A-Za-z0-9._\-+/=]{8,})', re.IGNORECASE) | ||||||
|
Comment on lines
+51
to
+59
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔒 Security & Privacy | 🟠 Major | ⚡ Quick win Non-Bearer auth schemes leak their credential.
Broaden the header rule to cover all schemes in 🔒 Proposed fix: mask the token after any auth scheme-# Authorization: Bearer <token> (header form). The value class includes the
-# base64/base64url trailing set (+ / =) so AWS session tokens and OAuth2
-# access tokens are masked whole, not truncated at the first + or /.
-_BEARER_RE = re.compile(r'(?P<pre>bearer\s+)(?P<val>[A-Za-z0-9._\-+/=]{8,})', re.IGNORECASE)
+# Authorization: <scheme> <token> (header form). Covers every scheme in
+# _AUTH_SCHEMES so Basic/Digest/Token/Negotiate credentials are masked too,
+# not just Bearer. The value class includes the base64/base64url trailing set
+# (+ / =) so AWS session tokens and OAuth2 access tokens are masked whole.
+_AUTH_TOKEN_RE = re.compile(
+ r'(?P<pre>(?:bearer|basic|digest|token|negotiate)\s+)(?P<val>[A-Za-z0-9._\-+/=]{8,})',
+ re.IGNORECASE,
+)And update the application site: - text = _BEARER_RE.sub(lambda m: m.group("pre") + PLACEHOLDER, text)
+ text = _AUTH_TOKEN_RE.sub(lambda m: m.group("pre") + PLACEHOLDER, text)🤖 Prompt for AI Agents |
||||||
|
|
||||||
| # Bare secret shapes that carry no key= prefix. JWTs (three base64url segments) | ||||||
| # are common in logs with no Bearer word, so they get their own rule. | ||||||
| _TOKEN_SHAPE_RE = re.compile( | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. WARNING: No detection for JWT-style tokens or other common bare-secret shapes.
At minimum, consider adding Reply with |
||||||
| r'(?:' | ||||||
| r'\bsk-[A-Za-z0-9._\-]{16,}' # OpenAI / sk-taos | ||||||
| r'|\bsk_(?:live|test)_[0-9A-Za-z]{16,}' # Stripe | ||||||
| r'|\bgh[posru]_[A-Za-z0-9]{20,}' # GitHub PAT | ||||||
| r'|\bxox[baprs]-[A-Za-z0-9\-]{10,}' # Slack | ||||||
| r'|\bAKIA[0-9A-Z]{16}\b' # AWS access key id | ||||||
| r'|\bAIza[0-9A-Za-z_\-]{35}\b' # Google API key | ||||||
| r'|\bnpm_[A-Za-z0-9]{20,}' # npm token | ||||||
| r'|\bSG\.[A-Za-z0-9_\-]{16,}\.[A-Za-z0-9_\-]{16,}' # SendGrid | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SUGGESTION: SendGrid branch is missing Every other branch in
Suggested change
Reply with |
||||||
| r'|\beyJ[A-Za-z0-9_\-]{8,}\.eyJ[A-Za-z0-9_\-]{6,}\.[A-Za-z0-9_\-]{6,}' # JWT | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SUGGESTION: JWT segment character class is missing The class
Suggested change
Reply with |
||||||
| r')' | ||||||
| ) | ||||||
|
|
||||||
| # PEM private-key blocks (SSH keys materialized on deploy, TLS keys). Keep the | ||||||
| # BEGIN/END markers so a key logged inside a stack trace still shows WHAT was | ||||||
| # redacted; only the base64 body is masked. | ||||||
| _PEM_RE = re.compile( | ||||||
| r'(?P<begin>-----BEGIN [A-Z0-9 ]*PRIVATE KEY-----).*?(?P<end>-----END [A-Z0-9 ]*PRIVATE KEY-----)', | ||||||
| re.DOTALL, | ||||||
| ) | ||||||
|
|
||||||
| # postgres://user:pass@host, mysql://..., redis://..., amqp:// -- mask the | ||||||
| # password component only. | ||||||
| _CONN_STR_RE = re.compile(r'(?P<pre>[a-zA-Z][a-zA-Z0-9+.\-]*://[^:/\s]+:)(?P<val>[^@\s]+)(?P<post>@)') | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔒 Security & Privacy | 🟡 Minor | ⚡ Quick win Connection strings with an empty username leak the password.
🔒 Proposed fix-_CONN_STR_RE = re.compile(r'(?P<pre>[a-zA-Z][a-zA-Z0-9+.\-]*://[^:/\s]+:)(?P<val>[^@\s]+)(?P<post>@)')
+_CONN_STR_RE = re.compile(r'(?P<pre>[a-zA-Z][a-zA-Z0-9+.\-]*://[^:/\s]*:)(?P<val>[^@\s]+)(?P<post>@)')📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||
|
|
||||||
| # A known secret value shorter than this is not masked: too likely to be a | ||||||
| # common substring and cause runaway redaction of unrelated text. | ||||||
| _MIN_KNOWN_VALUE_LEN = 6 | ||||||
|
|
||||||
|
|
||||||
| def redact(text: str, known_values: "list[str] | None" = None) -> str: | ||||||
| """Return `text` with credential-shaped spans replaced by PLACEHOLDER. | ||||||
|
|
||||||
| known_values: exact secret strings (e.g. from the secrets store) to mask | ||||||
| wherever they appear verbatim, in addition to the pattern rules. | ||||||
| """ | ||||||
| if not text: | ||||||
| return text | ||||||
|
|
||||||
| # Structural rules first (they anchor on keys/prefixes, least likely to | ||||||
| # over-match), then the bare token shapes. | ||||||
| text = _PEM_RE.sub(lambda m: m.group("begin") + PLACEHOLDER + m.group("end"), text) | ||||||
| text = _CONN_STR_RE.sub(lambda m: m.group("pre") + PLACEHOLDER + m.group("post"), text) | ||||||
| # Bearer BEFORE the key-value rule so "authorization: Bearer <tok>" has its | ||||||
| # token masked; the KV rule then leaves the bare scheme word alone. | ||||||
| text = _BEARER_RE.sub(lambda m: m.group("pre") + PLACEHOLDER, text) | ||||||
| text = _FLAG_RE.sub(lambda m: m.group("pre") + PLACEHOLDER, text) | ||||||
|
|
||||||
| def _kv_repl(m: "re.Match[str]") -> str: | ||||||
| if m.group("val").lower() in _AUTH_SCHEMES: | ||||||
| return m.group(0) # e.g. "authorization: Bearer" -> leave for bearer rule | ||||||
| return m.group("pre") + PLACEHOLDER | ||||||
|
|
||||||
| text = _KV_RE.sub(_kv_repl, text) | ||||||
| text = _TOKEN_SHAPE_RE.sub(PLACEHOLDER, text) | ||||||
|
|
||||||
| # Known literal secret values last: mask any that survived the shape rules | ||||||
| # (e.g. a plain-looking API key logged without a key= prefix). Mask the | ||||||
| # WHOLE surrounding non-whitespace token, not just the substring, so a | ||||||
| # secret that is a prefix of a longer identifier (request_id=<secret>0000) | ||||||
| # does not leak the trailing part or produce garbled "[REDACTED]0000" | ||||||
| # output. Longest first so a value containing a shorter one is fully masked. | ||||||
| if known_values: | ||||||
| for val in sorted((v for v in known_values if v), key=len, reverse=True): | ||||||
| if len(val) < _MIN_KNOWN_VALUE_LEN: | ||||||
| continue | ||||||
| text = re.sub(r'\S*' + re.escape(val) + r'\S*', PLACEHOLDER, text) | ||||||
|
|
||||||
| return text | ||||||
|
|
||||||
|
|
||||||
| def redact_lines(lines: "list[str]", known_values: "list[str] | None" = None) -> "list[str]": | ||||||
| """Redact a list of log lines (convenience for the paged log reader). | ||||||
|
|
||||||
| Coerces non-string elements to str so one malformed line (e.g. a None from | ||||||
| a buggy upstream parser) cannot take down the whole page response. | ||||||
| """ | ||||||
| return [redact(line if isinstance(line, str) else str(line), known_values) for line in lines] | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SUGGESTION: Test coverage is light for the documented threat model.
For security-critical code that is described as "nothing in the logs path may bypass this", the suite is missing negative tests that would catch the most likely regressions:
Authorizationheader without the wordBearer(e.g.Authorization: eyJhbGciOi…).+,/,=(the exact bypass flagged onlog_redaction.py:55).log_redaction.py:116).@in the userinfo.redact_linescalled with a list containingNoneor empty strings.Adding these would lock in the intended behaviour against the regression modes that are cheapest to introduce and hardest to spot in code review.
Reply with
@kilocode-bot fix itto have Kilo Code address this issue.