Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .entire/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ settings.local.json
metadata/
current_session
logs/
redactors/local/
20 changes: 20 additions & 0 deletions cmd/entire/cli/settings/settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,13 @@ func (s *SummaryGenerationSettings) SetProvider(newProvider, newModel string) {
// RedactionSettings configures redaction behavior beyond the default secret detection.
type RedactionSettings struct {
PII *PIISettings `json:"pii,omitempty"`

// CustomSecrets is a label → RE2 regex map for user-defined credential
// shapes (custom env-var prefixes, internal service tokens, etc.). Each
// match is replaced with the bare "REDACTED" token used by the built-in
// secret layers, not the "[REDACTED_<LABEL>]" token used by PII. Failed
// regex compilations are logged via slog.Warn and the rule is skipped.
CustomSecrets map[string]string `json:"custom_secrets,omitempty"`
}

// PIISettings configures PII detection categories.
Expand Down Expand Up @@ -507,6 +514,19 @@ func mergeRedaction(dst *RedactionSettings, data json.RawMessage) error {
return err
}
}
if csRaw, ok := raw["custom_secrets"]; ok {
var cs map[string]string
if err := json.Unmarshal(csRaw, &cs); err != nil {
return fmt.Errorf("parsing redaction.custom_secrets: %w", err)
}
if dst.CustomSecrets == nil {
dst.CustomSecrets = cs
} else {
for k, v := range cs {
dst.CustomSecrets[k] = v
}
}
}
return nil
}

Expand Down
72 changes: 72 additions & 0 deletions cmd/entire/cli/settings/settings_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -972,3 +972,75 @@ func containsUnknownField(msg string) bool {
// Go's json package reports unknown fields with this message format
return strings.Contains(msg, "unknown field")
}

func TestLoadMerged_CustomSecretsPerKeyOverride(t *testing.T) {
t.Parallel()

dir := t.TempDir()
base := filepath.Join(dir, "settings.json")
local := filepath.Join(dir, "settings.local.json")

if err := os.WriteFile(base, []byte(`{
"redaction": {
"custom_secrets": {
"team_token": "TEAM_[A-Za-z0-9]{16,}",
"shared_token": "SHARED_[A-Z]{4}_[A-Za-z0-9]{12,}"
}
}
}`), 0o600); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(local, []byte(`{
"redaction": {
"custom_secrets": {
"shared_token": "SHARED_[A-Z]{4}_[A-Za-z0-9]{20,}",
"personal": "PERSONAL_[a-z]{32}"
}
}
}`), 0o600); err != nil {
t.Fatal(err)
}

merged, err := loadMergedSettings(base, local)
if err != nil {
t.Fatalf("loadMergedSettings: %v", err)
}

want := map[string]string{
"team_token": "TEAM_[A-Za-z0-9]{16,}",
"shared_token": "SHARED_[A-Z]{4}_[A-Za-z0-9]{20,}",
"personal": "PERSONAL_[a-z]{32}",
}
got := merged.Redaction.CustomSecrets
if len(got) != len(want) {
t.Fatalf("CustomSecrets size: want %d, have %d (%v)", len(want), len(got), got)
}
for k, v := range want {
if got[k] != v {
t.Errorf("CustomSecrets[%s]: want %q, have %q", k, v, got[k])
}
}
}

func TestLoadFromBytes_CustomSecrets(t *testing.T) {
t.Parallel()

data := []byte(`{
"redaction": {
"custom_secrets": {
"acme_token": "ACME_TOKEN_[A-Za-z0-9]{20,}"
}
}
}`)

got, err := LoadFromBytes(data)
if err != nil {
t.Fatalf("LoadFromBytes: %v", err)
}
if got.Redaction == nil {
t.Fatalf("Redaction is nil")
}
if want, have := "ACME_TOKEN_[A-Za-z0-9]{20,}", got.Redaction.CustomSecrets["acme_token"]; want != have {
t.Errorf("CustomSecrets[acme_token]: want %q, have %q", want, have)
}
}
60 changes: 43 additions & 17 deletions cmd/entire/cli/strategy/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,33 +343,58 @@ var (

var initRedactionOnce sync.Once

// EnsureRedactionConfigured loads PII redaction settings and configures the
// redact package. No-op if PII is not enabled in settings.
// Must be called at each process entry point before checkpoint writes
// (e.g., hook PersistentPreRunE, doctor PreRun).
// EnsureRedactionConfigured loads redaction settings and configures the
// redact package: PII detection (opt-in), inline custom_secrets, and rule
// packs auto-discovered from .entire/redactors/.
//
// Must be called at each process entry point before checkpoint writes.
func EnsureRedactionConfigured() {
initRedactionOnce.Do(func() {
ctx := context.Background()
s, err := settings.Load(ctx)
if err != nil {
logCtx := logging.WithComponent(ctx, "redaction")
logging.Warn(logCtx, "failed to load settings for PII redaction", slog.String("error", err.Error()))
logging.Warn(logCtx, "failed to load settings for redaction", slog.String("error", err.Error()))
return
}
if s.Redaction == nil || s.Redaction.PII == nil || !s.Redaction.PII.Enabled {
return

// PII detection (opt-in).
if s.Redaction != nil && s.Redaction.PII != nil && s.Redaction.PII.Enabled {
pii := s.Redaction.PII
cfg := redact.PIIConfig{
Enabled: true,
Categories: make(map[redact.PIICategory]bool),
CustomPatterns: pii.CustomPatterns,
}
cfg.Categories[redact.PIIEmail] = pii.Email == nil || *pii.Email
cfg.Categories[redact.PIIPhone] = pii.Phone == nil || *pii.Phone
cfg.Categories[redact.PIIAddress] = pii.Address != nil && *pii.Address
redact.ConfigurePII(cfg)
}

// Custom rules: inline + packs.
var inline map[string]string
if s.Redaction != nil {
inline = s.Redaction.CustomSecrets
}
packsRelPath := filepath.Join(paths.EntireDir, redact.RedactorsDirName)
packsDir, perr := paths.AbsPath(ctx, packsRelPath)
if perr != nil {
logCtx := logging.WithComponent(ctx, "redaction")
logging.Warn(logCtx, "failed to resolve redactors path", slog.String("error", perr.Error()))
packsDir = packsRelPath
}
packs, lerr := redact.LoadPacks(packsDir)
if lerr != nil {
logCtx := logging.WithComponent(ctx, "redaction")
logging.Warn(logCtx, "failed to load redactor packs", slog.String("error", lerr.Error()))
}
pii := s.Redaction.PII
cfg := redact.PIIConfig{
Enabled: true,
Categories: make(map[redact.PIICategory]bool),
CustomPatterns: pii.CustomPatterns,
if len(inline) > 0 || len(packs) > 0 {
redact.ConfigureCustomRules(redact.CustomRulesConfig{
Inline: inline,
Packs: packs,
})
}
// Email and phone default to true when PII is enabled; address defaults to false.
cfg.Categories[redact.PIIEmail] = pii.Email == nil || *pii.Email
cfg.Categories[redact.PIIPhone] = pii.Phone == nil || *pii.Phone
cfg.Categories[redact.PIIAddress] = pii.Address != nil && *pii.Address
redact.ConfigurePII(cfg)
})
}

Expand Down Expand Up @@ -1050,6 +1075,7 @@ func EnsureEntireGitignore(ctx context.Context) error {
"settings.local.json",
"metadata/",
"logs/",
redact.RedactorsDirName + "/local/",
}

// Track what needs to be added
Expand Down
23 changes: 23 additions & 0 deletions cmd/entire/cli/strategy/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"testing"

Expand Down Expand Up @@ -1752,3 +1753,25 @@ func TestReadAgentTypeFromTree_MetadataJSON_OverridesDir(t *testing.T) {
result := ReadAgentTypeFromTree(tree, "cp")
assert.Equal(t, agent.AgentTypeCursor, result)
}

func TestEnsureEntireGitignore_IncludesRedactorsLocal(t *testing.T) {
// Cannot t.Parallel(): EnsureEntireGitignore writes to the worktree root.

dir := t.TempDir()
testutil.InitRepo(t, dir)
t.Chdir(dir)
paths.ClearWorktreeRootCache()
t.Cleanup(paths.ClearWorktreeRootCache)

if err := EnsureEntireGitignore(context.Background()); err != nil {
t.Fatalf("EnsureEntireGitignore: %v", err)
}

body, err := os.ReadFile(filepath.Join(dir, ".entire", ".gitignore"))
if err != nil {
t.Fatalf("read .entire/.gitignore: %v", err)
}
if !strings.Contains(string(body), "redactors/local/") {
t.Errorf(".entire/.gitignore missing redactors/local/ entry; got:\n%s", body)
}
}
96 changes: 96 additions & 0 deletions docs/security-and-privacy.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,102 @@ All detected secrets are replaced with `REDACTED`.

To reduce over-redaction, Entire preserves structural transcript fields such as IDs and paths, ignores common placeholder values, and redacts only credential values for bounded key/value forms. When a connection string contains a real (non-placeholder) password, it is redacted as a unit because partial fragments can still expose sensitive material; connection strings whose passwords are placeholders (e.g. `${DB_PASSWORD}`) are left intact.

## Customizing redaction

The built-in detectors handle well-known secret formats. For internal credential shapes that aren't covered (custom env-var prefixes, internal service tokens, project-specific session formats), Entire offers two extension surfaces. Both feed the same engine and run as their own layer between connection-string detection and bounded credential KV detection.

### Surface 1: Inline `redaction.custom_secrets`

Add a label → regex map under `redaction.custom_secrets` in `.entire/settings.json`:

```json
{
"redaction": {
"custom_secrets": {
"acme_token": "ACME_TOKEN_[A-Za-z0-9]{20,}",
"internal_id": "INTERNAL_[a-z]{6}_[0-9]{4}"
}
}
}
```

- The label is for diagnostics only; matches are replaced with the bare `REDACTED` token (matching the built-in secret layers, not the `[REDACTED_<LABEL>]` token used for PII).
- Regexes follow [Go's RE2 syntax](https://pkg.go.dev/regexp/syntax). No lookarounds, no backreferences.
- A failed compile is logged once at startup and the rule is skipped — it will never crash the redactor.
- Override in `.entire/settings.local.json` for personal additions; entries merge per-key (override replaces the same key, leaves other keys intact).

### Surface 2: Rule packs

Drop a YAML or JSON file into `.entire/redactors/`:

```yaml
# .entire/redactors/acme-internal.yaml
name: acme-internal # MUST match the filename stem
version: 1.0.0
description: Internal ACME service tokens
rules:
- id: acme-token
description: Long-lived ACME service tokens
regex: 'ACME_TOKEN_[A-Za-z0-9]{20,}'
samples:
- { input: "key=ACME_TOKEN_abc123def456ghi789jkl", redacted: true }
- { input: "ACME_TOKEN_short", redacted: false }
- id: acme-session
regex: 'asess_[a-f0-9]{32}'
```

Equivalent JSON form:

```json
{
"name": "acme-internal",
"version": "1.0.0",
"rules": [
{
"id": "acme-token",
"regex": "ACME_TOKEN_[A-Za-z0-9]{20,}",
"samples": [
{ "input": "key=ACME_TOKEN_abc123def456ghi789jkl", "redacted": true },
{ "input": "ACME_TOKEN_short", "redacted": false }
]
}
]
}
```

**Required fields:** `name` (must equal the filename stem — `acme-internal.yaml` → `acme-internal`), `version` (any string; semver recommended), and `rules[]` (at least one entry, each with `id` and `regex`).

**Optional fields:** `description` (pack-level and rule-level), and `rules[].samples[]` (see "Self-tests" below).

### Self-tests via `samples[]`

Each rule may declare an array of `{input, redacted}` pairs. On the next process startup after editing the pack, Entire runs each sample and emits a `slog.Warn` for any mismatch:

```
WARN redactor pack sample mismatch pack=.entire/redactors/acme-internal.yaml
rule=acme-token sample="..." expected=true got=false
```

A failing sample never disables the rule — sample validation is informational. Use it to catch typos and false positives before they ship.

### Distribution

- **Within a team:** commit `.entire/settings.json` and/or `.entire/redactors/*` to your repo. Teammates pull and the rules apply.
- **Across teams:** copy the pack file or share a link to a gist; recipients drop the file into their `.entire/redactors/`.
- **Personal-only:** put the file in `.entire/redactors/local/` — `entire enable` writes that path into `.entire/.gitignore` so personal rules don't pollute team commits.

### When to write a rule vs. file an issue

Write a rule for internal service tokens (`ACME_*`, `INTERNAL_*`), custom env-var prefixes the bundled detectors don't know about, and project-specific session formats.

File an issue when the rule would benefit every Entire user (e.g., a major SaaS issued a new token format), when a built-in is producing false positives on common idioms in your codebase, or when a built-in is *not* catching a well-known shared format (we'd rather fix the built-in than have everyone ship the same custom rule).

### Troubleshooting

- **My rule doesn't redact anything.** Warnings about invalid patterns or sample mismatches appear on stderr the next time any `entire` command runs. Look for lines mentioning your label or pack path.
- **My pack file is silently ignored.** Filenames must end in `.yaml`, `.yml`, or `.json`. Other extensions are skipped.
- **I want to disable a rule temporarily.** Comment it out (prefix the YAML key with `#`) or remove the entry from `custom_secrets`. The rule reloads on the next CLI invocation.

## Limitations

- **Best-effort.** Novel or low-entropy secrets (short passwords, predictable tokens) may not be caught.
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ require (
golang.org/x/sync v0.20.0
golang.org/x/sys v0.43.0
golang.org/x/term v0.42.0
gopkg.in/yaml.v3 v3.0.1
)

require (
Expand Down Expand Up @@ -138,5 +139,4 @@ require (
google.golang.org/protobuf v1.36.11 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
Loading
Loading