-
-
Notifications
You must be signed in to change notification settings - Fork 30
feat(logs): secret-redaction core for the Logs app #1558
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,103 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from tinyagentos.log_redaction import PLACEHOLDER, redact, redact_lines | ||
|
|
||
|
|
||
| class TestKeyValue: | ||
| def test_equals_form(self): | ||
| assert redact("api_key=sk-abc123def456ghi789") == f"api_key={PLACEHOLDER}" | ||
|
|
||
| def test_colon_form(self): | ||
| assert redact("password: hunter2secret") == f"password: {PLACEHOLDER}" | ||
|
|
||
| def test_json_form(self): | ||
| out = redact('{"secret": "topsecretvalue123"}') | ||
| assert "topsecretvalue123" not in out | ||
| assert PLACEHOLDER in out | ||
|
|
||
| def test_flag_form(self): | ||
| assert redact("--token deadbeefcafebabe01") == f"--token {PLACEHOLDER}" | ||
|
|
||
| def test_case_insensitive(self): | ||
| assert redact("PASSWORD=SuperSecret99") == f"PASSWORD={PLACEHOLDER}" | ||
|
|
||
| def test_does_not_match_substring_key(self): | ||
| # "monkey" must not trip the "key" rule. | ||
| assert redact("monkey=banana") == "monkey=banana" | ||
|
|
||
| def test_preserves_surrounding_text(self): | ||
| out = redact("connecting with token=abcdef123456 to host db1") | ||
| assert out == f"connecting with token={PLACEHOLDER} to host db1" | ||
|
|
||
|
|
||
| class TestBearer: | ||
| def test_header(self): | ||
| assert redact("Authorization: Bearer abc123def456ghi") == \ | ||
| f"Authorization: Bearer {PLACEHOLDER}" | ||
|
|
||
|
|
||
| class TestTokenShapes: | ||
| def test_openai_style(self): | ||
| assert redact("using sk-taos-abcdefghij0123456789 now").count(PLACEHOLDER) == 1 | ||
| assert "abcdefghij0123456789" not in redact("sk-taos-abcdefghij0123456789") | ||
|
|
||
| def test_github_pat(self): | ||
| assert PLACEHOLDER in redact("ghp_EXAMPLEEXAMPLEEXAMPLEEXAMPLE00") | ||
|
|
||
| def test_slack(self): | ||
| assert PLACEHOLDER in redact("xoxb-EXAMPLEEXAMPLE-EXAMPLEEXAMPLETOKEN") | ||
|
|
||
| def test_aws_akia(self): | ||
| assert PLACEHOLDER in redact("AKIAEXAMPLEEXAMPLE00") | ||
|
|
||
|
|
||
| class TestPem: | ||
| def test_private_key_block(self): | ||
| block = ( | ||
| "-----BEGIN OPENSSH PRIVATE KEY-----\n" | ||
| "b3BlbnNzaC1rZXktdjEAAAAABG5vbmU\nAAAAAAAA\n" | ||
| "-----END OPENSSH PRIVATE KEY-----" | ||
| ) | ||
| out = redact(f"key material:\n{block}\ndone") | ||
| assert "b3BlbnNz" not in out | ||
| assert out.startswith("key material:") | ||
| assert out.endswith("done") | ||
|
|
||
|
|
||
| class TestConnectionString: | ||
| def test_masks_only_password(self): | ||
| out = redact("dsn=postgres://taos:s3cr3tpw@db.internal:5432/app") | ||
| assert "s3cr3tpw" not in out | ||
| assert "postgres://taos:" in out | ||
| assert "@db.internal:5432/app" in out | ||
|
|
||
|
|
||
| class TestKnownValues: | ||
| def test_literal_value_masked(self): | ||
| out = redact("the model returned plainlookingkey987", known_values=["plainlookingkey987"]) | ||
| assert "plainlookingkey987" not in out | ||
| assert PLACEHOLDER in out | ||
|
|
||
| def test_short_known_value_ignored(self): | ||
| # too short to safely mask -> left alone (no runaway redaction) | ||
| assert redact("abc appears here", known_values=["abc"]) == "abc appears here" | ||
|
|
||
| def test_empty_known_values_noop(self): | ||
| assert redact("nothing sensitive here", known_values=[]) == "nothing sensitive here" | ||
|
|
||
| def test_none_known_values(self): | ||
| assert redact("nothing sensitive here") == "nothing sensitive here" | ||
|
|
||
|
|
||
| class TestSafety: | ||
| def test_empty_string(self): | ||
| assert redact("") == "" | ||
|
|
||
| def test_clean_line_untouched(self): | ||
| line = "2026-07-02 19:00:00 INFO controller ready on port 6969" | ||
| assert redact(line) == line | ||
|
|
||
| def test_redact_lines(self): | ||
| out = redact_lines(["password=abcdef123456", "all good here"]) | ||
| assert out[0] == f"password={PLACEHOLDER}" | ||
| assert out[1] == "all good here" | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,123 @@ | ||||||
| """Redaction for operator-facing log output (Logs app, bug-report bundle). | ||||||
|
|
||||||
| Every log line that leaves the box through the system-logs API passes through | ||||||
| `redact()` first. The threat is a well-meaning operator copying a log bundle | ||||||
| into a public GitHub issue and leaking a live credential that happened to be | ||||||
| logged by a dependency, a stack trace, or an env dump. | ||||||
|
|
||||||
| Design choices: | ||||||
| - Pure functions, no I/O, exhaustively tested. Nothing in the logs path may | ||||||
| bypass this. | ||||||
| - Redact by PATTERN (key=value, bearer tokens, connection strings, private-key | ||||||
| blocks, high-entropy provider-key shapes) AND by KNOWN SECRET VALUE (the | ||||||
| literal values from the secrets store, so a secret logged verbatim is caught | ||||||
| even if it does not match a generic shape). | ||||||
| - Fail closed on the value side: an empty or too-short known value is ignored | ||||||
| rather than redacting everything. | ||||||
| - Never widen a match to swallow surrounding context; replace only the secret | ||||||
| span with a fixed placeholder so the log stays readable. | ||||||
| """ | ||||||
| from __future__ import annotations | ||||||
|
|
||||||
| import re | ||||||
|
|
||||||
| PLACEHOLDER = "[REDACTED]" | ||||||
|
|
||||||
| # Keys whose value must be masked when they appear as key=value / key: value / | ||||||
| # "key": "value" / --key value. Case-insensitive, matched as whole words so | ||||||
| # "monkey" does not trip "key". | ||||||
| _SENSITIVE_KEYS = ( | ||||||
| "password", "passwd", "secret", "token", "api_key", "apikey", "api-key", | ||||||
| "access_key", "access-key", "secret_key", "secret-key", "private_key", | ||||||
| "private-key", "client_secret", "client-secret", "authorization", "auth", | ||||||
| "bearer", "session", "cookie", "credential", "credentials", "passphrase", | ||||||
| ) | ||||||
| _KEY_ALT = "|".join(sorted((re.escape(k) for k in _SENSITIVE_KEYS), key=len, reverse=True)) | ||||||
|
|
||||||
| # key = value / key: value / "key": "value" (value ends at quote, | ||||||
| # whitespace, comma, or line end). | ||||||
| _KV_RE = re.compile( | ||||||
| r'(?P<pre>["\']?(?:' + _KEY_ALT + r')["\']?\s*[:=]\s*)' | ||||||
| r'(?P<quote>["\']?)(?P<val>[^\s,"\'}{]+)(?P=quote)', | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SUGGESTION:
Consider broadening to Reply with |
||||||
| re.IGNORECASE, | ||||||
| ) | ||||||
|
|
||||||
| # --key value (CLI flag form, space-separated). | ||||||
| _FLAG_RE = re.compile(r'(?P<pre>--(?:' + _KEY_ALT + r')\s+)(?P<val>\S+)', re.IGNORECASE) | ||||||
|
|
||||||
| # Auth SCHEME words that legitimately follow "authorization:"; the real secret | ||||||
| # is the NEXT token (handled by the bearer rule), so the KV rule must not treat | ||||||
| # the scheme word itself as the value and stop there, leaving the token exposed. | ||||||
| _AUTH_SCHEMES = {"bearer", "basic", "digest", "token", "negotiate"} | ||||||
|
|
||||||
| # Authorization: Bearer <token> (header form; the key-value rule catches the | ||||||
| # "authorization=" form, this catches the header " Bearer <tok>" shape). | ||||||
| _BEARER_RE = re.compile(r'(?P<pre>bearer\s+)(?P<val>[A-Za-z0-9._\-]{8,})', re.IGNORECASE) | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CRITICAL: Bearer-token value class misses characters that commonly appear in real tokens. The value character class is This is exactly the shape used by AWS session tokens, many OAuth2 access tokens, and other
Suggested change
Reply with |
||||||
|
|
||||||
| # Provider-key shapes that are secrets on their own with no key= prefix: | ||||||
| # sk-..., sk-taos-..., ghp_/gho_/ghs_ (GitHub), xoxb-/xoxp- (Slack), AKIA... (AWS). | ||||||
| _TOKEN_SHAPE_RE = re.compile( | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. WARNING: No detection for JWT-style tokens or other common bare-secret shapes.
At minimum, consider adding Reply with |
||||||
| r'\b(?:' | ||||||
| r'sk-[A-Za-z0-9._\-]{16,}' | ||||||
| r'|gh[posru]_[A-Za-z0-9]{20,}' | ||||||
| r'|xox[baprs]-[A-Za-z0-9\-]{10,}' | ||||||
| r'|AKIA[0-9A-Z]{16}' | ||||||
| r')\b' | ||||||
| ) | ||||||
|
|
||||||
| # PEM private-key blocks (SSH keys materialized on deploy, TLS keys). | ||||||
| _PEM_RE = re.compile( | ||||||
| r'-----BEGIN [A-Z0-9 ]*PRIVATE KEY-----.*?-----END [A-Z0-9 ]*PRIVATE KEY-----', | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SUGGESTION: PEM rule swallows surrounding log context, not just the key block.
Reply with |
||||||
| re.DOTALL, | ||||||
| ) | ||||||
|
|
||||||
| # postgres://user:pass@host, mysql://..., redis://..., amqp:// -- mask the | ||||||
| # password component only. | ||||||
| _CONN_STR_RE = re.compile(r'(?P<pre>[a-zA-Z][a-zA-Z0-9+.\-]*://[^:/\s]+:)(?P<val>[^@\s]+)(?P<post>@)') | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔒 Security & Privacy | 🟡 Minor | ⚡ Quick win Connection strings with an empty username leak the password.
🔒 Proposed fix-_CONN_STR_RE = re.compile(r'(?P<pre>[a-zA-Z][a-zA-Z0-9+.\-]*://[^:/\s]+:)(?P<val>[^@\s]+)(?P<post>@)')
+_CONN_STR_RE = re.compile(r'(?P<pre>[a-zA-Z][a-zA-Z0-9+.\-]*://[^:/\s]*:)(?P<val>[^@\s]+)(?P<post>@)')📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||
|
|
||||||
| # A known secret value shorter than this is not masked: too likely to be a | ||||||
| # common substring and cause runaway redaction of unrelated text. | ||||||
| _MIN_KNOWN_VALUE_LEN = 6 | ||||||
|
|
||||||
|
|
||||||
| def redact(text: str, known_values: "list[str] | None" = None) -> str: | ||||||
| """Return `text` with credential-shaped spans replaced by PLACEHOLDER. | ||||||
|
|
||||||
| known_values: exact secret strings (e.g. from the secrets store) to mask | ||||||
| wherever they appear verbatim, in addition to the pattern rules. | ||||||
| """ | ||||||
| if not text: | ||||||
| return text | ||||||
|
|
||||||
| # Structural rules first (they anchor on keys/prefixes, least likely to | ||||||
| # over-match), then the bare token shapes. | ||||||
| text = _PEM_RE.sub(PLACEHOLDER, text) | ||||||
| text = _CONN_STR_RE.sub(lambda m: m.group("pre") + PLACEHOLDER + m.group("post"), text) | ||||||
| # Bearer BEFORE the key-value rule so "authorization: Bearer <tok>" has its | ||||||
| # token masked; the KV rule then leaves the bare scheme word alone. | ||||||
| text = _BEARER_RE.sub(lambda m: m.group("pre") + PLACEHOLDER, text) | ||||||
| text = _FLAG_RE.sub(lambda m: m.group("pre") + PLACEHOLDER, text) | ||||||
|
|
||||||
| def _kv_repl(m: "re.Match[str]") -> str: | ||||||
| if m.group("val").lower() in _AUTH_SCHEMES: | ||||||
| return m.group(0) # e.g. "authorization: Bearer" -> leave for bearer rule | ||||||
| return m.group("pre") + PLACEHOLDER | ||||||
|
|
||||||
| text = _KV_RE.sub(_kv_repl, text) | ||||||
| text = _TOKEN_SHAPE_RE.sub(PLACEHOLDER, text) | ||||||
|
|
||||||
| # Known literal secret values last: mask any that survived the shape rules | ||||||
| # (e.g. a plain-looking API key logged without a key= prefix). Longest | ||||||
| # first so a value that contains a shorter one is fully masked. | ||||||
| if known_values: | ||||||
| for val in sorted((v for v in known_values if v), key=len, reverse=True): | ||||||
| if len(val) < _MIN_KNOWN_VALUE_LEN: | ||||||
| continue | ||||||
| text = text.replace(val, PLACEHOLDER) | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. WARNING:
Consider matching on word boundaries ( Reply with |
||||||
|
|
||||||
| return text | ||||||
|
|
||||||
|
|
||||||
| def redact_lines(lines: "list[str]", known_values: "list[str] | None" = None) -> "list[str]": | ||||||
| """Redact a list of log lines (convenience for the paged log reader).""" | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SUGGESTION: The list comprehension Reply with |
||||||
| return [redact(line, known_values) for line in lines] | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SUGGESTION: Test coverage is light for the documented threat model.
For security-critical code that is described as "nothing in the logs path may bypass this", the suite is missing negative tests that would catch the most likely regressions:
Authorizationheader without the wordBearer(e.g.Authorization: eyJhbGciOi…).+,/,=(the exact bypass flagged onlog_redaction.py:55).log_redaction.py:116).@in the userinfo.redact_linescalled with a list containingNoneor empty strings.Adding these would lock in the intended behaviour against the regression modes that are cheapest to introduce and hardest to spot in code review.
Reply with
@kilocode-bot fix itto have Kilo Code address this issue.