jaylfc · jaylfc · Jul 2, 2026 · Jul 2, 2026 · Jul 2, 2026 · Jul 2, 2026
diff --git a/tests/test_log_redaction.py b/tests/test_log_redaction.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+from tinyagentos.log_redaction import PLACEHOLDER, redact, redact_lines
+
+
+class TestKeyValue:
+    def test_equals_form(self):
+        assert redact("api_key=sk-abc123def456ghi789") == f"api_key={PLACEHOLDER}"
+
+    def test_colon_form(self):
+        assert redact("password: hunter2secret") == f"password: {PLACEHOLDER}"
+
+    def test_json_form(self):
+        out = redact('{"secret": "topsecretvalue123"}')
+        assert "topsecretvalue123" not in out
+        assert PLACEHOLDER in out
+
+    def test_flag_form(self):
+        assert redact("--token deadbeefcafebabe01") == f"--token {PLACEHOLDER}"
+
+    def test_case_insensitive(self):
+        assert redact("PASSWORD=SuperSecret99") == f"PASSWORD={PLACEHOLDER}"
+
+    def test_does_not_match_substring_key(self):
+        # "monkey" must not trip the "key" rule.
+        assert redact("monkey=banana") == "monkey=banana"
+
+    def test_preserves_surrounding_text(self):
+        out = redact("connecting with token=abcdef123456 to host db1")
+        assert out == f"connecting with token={PLACEHOLDER} to host db1"
+
+
+class TestBearer:
+    def test_header(self):
+        assert redact("Authorization: Bearer abc123def456ghi") == \
+            f"Authorization: Bearer {PLACEHOLDER}"
+
+
+class TestTokenShapes:
+    def test_openai_style(self):
+        assert redact("using sk-taos-abcdefghij0123456789 now").count(PLACEHOLDER) == 1
+        assert "abcdefghij0123456789" not in redact("sk-taos-abcdefghij0123456789")
+
+    def test_github_pat(self):
+        assert PLACEHOLDER in redact("ghp_EXAMPLEEXAMPLEEXAMPLEEXAMPLE00")
+
+    def test_slack(self):
+        assert PLACEHOLDER in redact("xoxb-EXAMPLEEXAMPLE-EXAMPLEEXAMPLETOKEN")
+
+    def test_aws_akia(self):
+        assert PLACEHOLDER in redact("AKIAEXAMPLEEXAMPLE00")
+
+
+class TestPem:
+    def test_private_key_block(self):
+        block = (
+            "-----BEGIN OPENSSH PRIVATE KEY-----\n"
+            "b3BlbnNzaC1rZXktdjEAAAAABG5vbmU\nAAAAAAAA\n"
+            "-----END OPENSSH PRIVATE KEY-----"
+        )
+        out = redact(f"key material:\n{block}\ndone")
+        assert "b3BlbnNz" not in out
+        assert out.startswith("key material:")
+        assert out.endswith("done")
+
+
+class TestConnectionString:
+    def test_masks_only_password(self):
+        out = redact("dsn=postgres://taos:s3cr3tpw@db.internal:5432/app")
+        assert "s3cr3tpw" not in out
+        assert "postgres://taos:" in out
+        assert "@db.internal:5432/app" in out
+
+
+class TestKnownValues:
+    def test_literal_value_masked(self):
+        out = redact("the model returned plainlookingkey987", known_values=["plainlookingkey987"])
+        assert "plainlookingkey987" not in out
+        assert PLACEHOLDER in out
+
+    def test_short_known_value_ignored(self):
+        # too short to safely mask -> left alone (no runaway redaction)
+        assert redact("abc appears here", known_values=["abc"]) == "abc appears here"
+
+    def test_empty_known_values_noop(self):
+        assert redact("nothing sensitive here", known_values=[]) == "nothing sensitive here"
+
+    def test_none_known_values(self):
+        assert redact("nothing sensitive here") == "nothing sensitive here"
+
+
+class TestSafety:
+    def test_empty_string(self):
+        assert redact("") == ""
+
+    def test_clean_line_untouched(self):
+        line = "2026-07-02 19:00:00 INFO controller ready on port 6969"
+        assert redact(line) == line
+
+    def test_redact_lines(self):
+        out = redact_lines(["password=abcdef123456", "all good here"])
+        assert out[0] == f"password={PLACEHOLDER}"
+        assert out[1] == "all good here"
diff --git a/tinyagentos/log_redaction.py b/tinyagentos/log_redaction.py
@@ -0,0 +1,123 @@
+"""Redaction for operator-facing log output (Logs app, bug-report bundle).
+
+Every log line that leaves the box through the system-logs API passes through
+`redact()` first. The threat is a well-meaning operator copying a log bundle
+into a public GitHub issue and leaking a live credential that happened to be
+logged by a dependency, a stack trace, or an env dump.
+
+Design choices:
+- Pure functions, no I/O, exhaustively tested. Nothing in the logs path may
+  bypass this.
+- Redact by PATTERN (key=value, bearer tokens, connection strings, private-key
+  blocks, high-entropy provider-key shapes) AND by KNOWN SECRET VALUE (the
+  literal values from the secrets store, so a secret logged verbatim is caught
+  even if it does not match a generic shape).
+- Fail closed on the value side: an empty or too-short known value is ignored
+  rather than redacting everything.
+- Never widen a match to swallow surrounding context; replace only the secret
+  span with a fixed placeholder so the log stays readable.
+"""
+from __future__ import annotations
+
+import re
+
+PLACEHOLDER = "[REDACTED]"
+
+# Keys whose value must be masked when they appear as key=value / key: value /
+# "key": "value" / --key value. Case-insensitive, matched as whole words so
+# "monkey" does not trip "key".
+_SENSITIVE_KEYS = (
+    "password", "passwd", "secret", "token", "api_key", "apikey", "api-key",
+    "access_key", "access-key", "secret_key", "secret-key", "private_key",
+    "private-key", "client_secret", "client-secret", "authorization", "auth",
+    "bearer", "session", "cookie", "credential", "credentials", "passphrase",
+)
+_KEY_ALT = "|".join(sorted((re.escape(k) for k in _SENSITIVE_KEYS), key=len, reverse=True))
+
+# key = value  /  key: value  /  "key": "value"  (value ends at quote,
+# whitespace, comma, or line end).
+_KV_RE = re.compile(
+    r'(?P<pre>["\']?(?:' + _KEY_ALT + r')["\']?\s*[:=]\s*)'
+    r'(?P<quote>["\']?)(?P<val>[^\s,"\'}{]+)(?P=quote)',
+    re.IGNORECASE,
+)
+
+# --key value  (CLI flag form, space-separated).
+_FLAG_RE = re.compile(r'(?P<pre>--(?:' + _KEY_ALT + r')\s+)(?P<val>\S+)', re.IGNORECASE)
+
+# Auth SCHEME words that legitimately follow "authorization:"; the real secret
+# is the NEXT token (handled by the bearer rule), so the KV rule must not treat
+# the scheme word itself as the value and stop there, leaving the token exposed.
+_AUTH_SCHEMES = {"bearer", "basic", "digest", "token", "negotiate"}
+
+# Authorization: Bearer <token>  (header form; the key-value rule catches the
+# "authorization=" form, this catches the header " Bearer <tok>" shape).
+_BEARER_RE = re.compile(r'(?P<pre>bearer\s+)(?P<val>[A-Za-z0-9._\-]{8,})', re.IGNORECASE)
-_BEARER_RE = re.compile(r'(?P<pre>bearer\s+)(?P<val>[A-Za-z0-9._\-]{8,})', re.IGNORECASE)
+_BEARER_RE = re.compile(r'(?P<pre>bearer\s+)(?P<val>[A-Za-z0-9._\-+/=]{8,})', re.IGNORECASE)
-_BEARER_RE = re.compile(r'(?P<pre>bearer\s+)(?P<val>[A-Za-z0-9._\-]{8,})', re.IGNORECASE)
+_BEARER_RE = re.compile(r'(?P<pre>bearer\s+)(?P<val>[A-Za-z0-9._\-+/=]{8,})', re.IGNORECASE)
+
+# Provider-key shapes that are secrets on their own with no key= prefix:
+# sk-..., sk-taos-..., ghp_/gho_/ghs_ (GitHub), xoxb-/xoxp- (Slack), AKIA... (AWS).
+_TOKEN_SHAPE_RE = re.compile(
+    r'\b(?:'
+    r'sk-[A-Za-z0-9._\-]{16,}'
+    r'|gh[posru]_[A-Za-z0-9]{20,}'
+    r'|xox[baprs]-[A-Za-z0-9\-]{10,}'
+    r'|AKIA[0-9A-Z]{16}'
+    r')\b'
+)
+
+# PEM private-key blocks (SSH keys materialized on deploy, TLS keys).
+_PEM_RE = re.compile(
+    r'-----BEGIN [A-Z0-9 ]*PRIVATE KEY-----.*?-----END [A-Z0-9 ]*PRIVATE KEY-----',
+    re.DOTALL,
+)
+
+# postgres://user:pass@host, mysql://..., redis://..., amqp:// -- mask the
+# password component only.
+_CONN_STR_RE = re.compile(r'(?P<pre>[a-zA-Z][a-zA-Z0-9+.\-]*://[^:/\s]+:)(?P<val>[^@\s]+)(?P<post>@)')
-_CONN_STR_RE = re.compile(r'(?P<pre>[a-zA-Z][a-zA-Z0-9+.\-]*://[^:/\s]+:)(?P<val>[^@\s]+)(?P<post>@)')
+_CONN_STR_RE = re.compile(r'(?P<pre>[a-zA-Z][a-zA-Z0-9+.\-]*://[^:/\s]*:)(?P<val>[^@\s]+)(?P<post>@)')
-_CONN_STR_RE = re.compile(r'(?P<pre>[a-zA-Z][a-zA-Z0-9+.\-]*://[^:/\s]+:)(?P<val>[^@\s]+)(?P<post>@)')
+_CONN_STR_RE = re.compile(r'(?P<pre>[a-zA-Z][a-zA-Z0-9+.\-]*://[^:/\s]*:)(?P<val>[^@\s]+)(?P<post>@)')
+
+# A known secret value shorter than this is not masked: too likely to be a
+# common substring and cause runaway redaction of unrelated text.
+_MIN_KNOWN_VALUE_LEN = 6
+
+
+def redact(text: str, known_values: "list[str] | None" = None) -> str:
+    """Return `text` with credential-shaped spans replaced by PLACEHOLDER.
+
+    known_values: exact secret strings (e.g. from the secrets store) to mask
+    wherever they appear verbatim, in addition to the pattern rules.
+    """
+    if not text:
+        return text
+
+    # Structural rules first (they anchor on keys/prefixes, least likely to
+    # over-match), then the bare token shapes.
+    text = _PEM_RE.sub(PLACEHOLDER, text)
+    text = _CONN_STR_RE.sub(lambda m: m.group("pre") + PLACEHOLDER + m.group("post"), text)
+    # Bearer BEFORE the key-value rule so "authorization: Bearer <tok>" has its
+    # token masked; the KV rule then leaves the bare scheme word alone.
+    text = _BEARER_RE.sub(lambda m: m.group("pre") + PLACEHOLDER, text)
+    text = _FLAG_RE.sub(lambda m: m.group("pre") + PLACEHOLDER, text)
+
+    def _kv_repl(m: "re.Match[str]") -> str:
+        if m.group("val").lower() in _AUTH_SCHEMES:
+            return m.group(0)  # e.g. "authorization: Bearer" -> leave for bearer rule
+        return m.group("pre") + PLACEHOLDER
+
+    text = _KV_RE.sub(_kv_repl, text)
+    text = _TOKEN_SHAPE_RE.sub(PLACEHOLDER, text)
+
+    # Known literal secret values last: mask any that survived the shape rules
+    # (e.g. a plain-looking API key logged without a key= prefix). Longest
+    # first so a value that contains a shorter one is fully masked.
+    if known_values:
+        for val in sorted((v for v in known_values if v), key=len, reverse=True):
+            if len(val) < _MIN_KNOWN_VALUE_LEN:
+                continue
+            text = text.replace(val, PLACEHOLDER)
+
+    return text
+
+
+def redact_lines(lines: "list[str]", known_values: "list[str] | None" = None) -> "list[str]":
+    """Redact a list of log lines (convenience for the paged log reader)."""
+    return [redact(line, known_values) for line in lines]