Skip to content

Commit ba15812

Browse files
author
Gary Blankenship
committed
feat(commit): add commit analyzer subsystem with visibility-aware secret scanning
1 parent 122354f commit ba15812

5 files changed

Lines changed: 345 additions & 43 deletions

File tree

commit_analyze.go

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ func AnalyzeCommit(ctx context.Context, opts AnalyzeOptions) (*CommitAnalysis, e
7373
EarlyExit: true,
7474
EarlyReason: "no changes in working tree",
7575
Complexity: "simple",
76+
Remote: RemoteInfo{OriginURL: gs.OriginURL, Visibility: gs.Visibility},
77+
Secrets: SecretsSummary{Clean: true},
7678
Refs: refs,
7779
}, nil
7880
}
@@ -99,8 +101,9 @@ func AnalyzeCommit(ctx context.Context, opts AnalyzeOptions) (*CommitAnalysis, e
99101
// Dep bumps.
100102
bumps := detectDepBumps(ctx, root, gs.Files)
101103

102-
// Secrets scan.
103-
findings, summary := scanSecrets(ctx, root, gs.Files)
104+
// Secrets scan. Visibility is passed in so findings get a deterministic
105+
// default_action that the agent can act on without per-finding LLM calls.
106+
findings, summary := scanSecrets(ctx, root, gs.Files, gs.Visibility)
104107
if err := writeFindings(refs.Findings, findings); err != nil {
105108
return nil, fmt.Errorf("write findings: %w", err)
106109
}
@@ -114,26 +117,40 @@ func AnalyzeCommit(ctx context.Context, opts AnalyzeOptions) (*CommitAnalysis, e
114117
artifacts := collectArtifacts(gs.Files)
115118
histStyle := classifyHistoryStyle(gs.HistoryRaw)
116119

120+
// Surface tag + recent subjects so the agent never needs to re-run
121+
// `git log` / `git tag` just for style inference or version lookup.
122+
var latestTag string
123+
if len(gs.Tags) > 0 {
124+
latestTag = gs.Tags[0]
125+
}
126+
recentSubjects := splitLines(gs.HistoryRaw)
127+
if len(recentSubjects) > 5 {
128+
recentSubjects = recentSubjects[:5]
129+
}
130+
117131
// Plan file.
118132
plan := renderPlan(groups)
119133
if err := writeFile(refs.Plan, []byte(plan)); err != nil {
120134
return nil, fmt.Errorf("write plan: %w", err)
121135
}
122136

123137
analysis := &CommitAnalysis{
124-
Version: 1,
125-
Tmpdir: tmpdir,
126-
EarlyExit: false,
127-
Complexity: classifyComplexity(gs.Files, groups),
128-
Counts: countFiles(gs.Files),
129-
HistoryStyle: histStyle,
130-
Secrets: summary,
131-
Artifacts: artifacts,
132-
ConfigFiles: configFiles,
133-
DepBumps: bumps,
134-
Groups: groups,
135-
PlanHash: planHash(plan),
136-
Refs: refs,
138+
Version: 1,
139+
Tmpdir: tmpdir,
140+
EarlyExit: false,
141+
Complexity: classifyComplexity(gs.Files, groups),
142+
Counts: countFiles(gs.Files),
143+
HistoryStyle: histStyle,
144+
LatestTag: latestTag,
145+
RecentSubjects: recentSubjects,
146+
Remote: RemoteInfo{OriginURL: gs.OriginURL, Visibility: gs.Visibility},
147+
Secrets: summary,
148+
Artifacts: artifacts,
149+
ConfigFiles: configFiles,
150+
DepBumps: bumps,
151+
Groups: groups,
152+
PlanHash: planHash(plan),
153+
Refs: refs,
137154
}
138155
return analysis, nil
139156
}

commit_analyze_test.go

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ func runGitT(t *testing.T, root string, args ...string) {
5757
// Test_Analyze_HappyPath: a test-pair + a config change form two groups,
5858
// both high confidence, messages non-generic.
5959
func Test_Analyze_HappyPath(t *testing.T) {
60+
t.Parallel()
61+
if testing.Short() {
62+
t.Skip("skipping integration test in short mode")
63+
}
6064
root := initTestRepo(t,
6165
map[string]string{
6266
"go.mod": "module fixture\ngo 1.22\n",
@@ -100,6 +104,10 @@ func Test_Analyze_HappyPath(t *testing.T) {
100104

101105
// Test_Analyze_CleanRepo: no dirty files => early_exit.
102106
func Test_Analyze_CleanRepo(t *testing.T) {
107+
t.Parallel()
108+
if testing.Short() {
109+
t.Skip("skipping integration test in short mode")
110+
}
103111
root := initTestRepo(t,
104112
map[string]string{"README.md": "# clean\n"},
105113
nil,
@@ -115,6 +123,10 @@ func Test_Analyze_CleanRepo(t *testing.T) {
115123

116124
// Test_Analyze_Secrets: a live AWS access key in a config file must FLAG.
117125
func Test_Analyze_Secrets(t *testing.T) {
126+
t.Parallel()
127+
if testing.Short() {
128+
t.Skip("skipping integration test in short mode")
129+
}
118130
root := initTestRepo(t,
119131
map[string]string{"config.yaml": "key: value\n"},
120132
map[string]string{"config.yaml": "key: value\naws_access_key: AKIAIOSFODNN7EXAMPLE\n"},
@@ -133,6 +145,10 @@ func Test_Analyze_Secrets(t *testing.T) {
133145

134146
// Test_Analyze_PlaceholderPaths: /Users/you/ should NOT flag as PII.
135147
func Test_Analyze_PlaceholderPaths(t *testing.T) {
148+
t.Parallel()
149+
if testing.Short() {
150+
t.Skip("skipping integration test in short mode")
151+
}
136152
root := initTestRepo(t,
137153
map[string]string{"docs.md": "# Docs\n"},
138154
map[string]string{"docs.md": "# Docs\n\nInstall at /Users/you/bin/tool.\n"},
@@ -150,6 +166,10 @@ func Test_Analyze_PlaceholderPaths(t *testing.T) {
150166

151167
// Test_Analyze_DepBump: go.mod version bump is detected.
152168
func Test_Analyze_DepBump(t *testing.T) {
169+
t.Parallel()
170+
if testing.Short() {
171+
t.Skip("skipping integration test in short mode")
172+
}
153173
root := initTestRepo(t,
154174
map[string]string{
155175
"go.mod": "module fixture\n\ngo 1.22\n\nrequire github.com/pkg/errors v0.9.0\n",
@@ -170,6 +190,125 @@ func Test_Analyze_DepBump(t *testing.T) {
170190
}
171191
}
172192

193+
// Test_Analyze_TagAndSubjects: latest_tag + recent_subjects are emitted so the
194+
// agent never re-runs `git log` / `git tag` for style or version lookup.
195+
func Test_Analyze_TagAndSubjects(t *testing.T) {
196+
t.Parallel()
197+
if testing.Short() {
198+
t.Skip("skipping integration test in short mode")
199+
}
200+
root := initTestRepo(t,
201+
map[string]string{"README.md": "# Fixture\n"},
202+
nil,
203+
)
204+
// Two more commits with conventional-style subjects; tag the last one.
205+
writeFixture(t, root, "a.txt", "a\n")
206+
runGitT(t, root, "add", "-A")
207+
runGitT(t, root, "commit", "-q", "-m", "feat(core): add a")
208+
writeFixture(t, root, "b.txt", "b\n")
209+
runGitT(t, root, "add", "-A")
210+
runGitT(t, root, "commit", "-q", "-m", "fix(core): patch b")
211+
runGitT(t, root, "tag", "v0.2.0")
212+
// Dirty the tree so analyze doesn't early-exit.
213+
writeFixture(t, root, "README.md", "# Fixture\n\nchanged\n")
214+
215+
got, err := AnalyzeCommit(context.Background(), AnalyzeOptions{Root: root})
216+
if err != nil {
217+
t.Fatalf("AnalyzeCommit: %v", err)
218+
}
219+
if got.LatestTag != "v0.2.0" {
220+
t.Errorf("LatestTag = %q, want %q", got.LatestTag, "v0.2.0")
221+
}
222+
if len(got.RecentSubjects) == 0 {
223+
t.Fatalf("RecentSubjects empty; expected recent commit subjects")
224+
}
225+
if len(got.RecentSubjects) > 5 {
226+
t.Errorf("RecentSubjects = %d entries, want ≤5", len(got.RecentSubjects))
227+
}
228+
if !strings.HasPrefix(got.RecentSubjects[0], "fix(core)") {
229+
t.Errorf("RecentSubjects[0] = %q, want newest-first conventional subject", got.RecentSubjects[0])
230+
}
231+
}
232+
233+
// Test_Analyze_NoTag_NoHistory: fresh repo with one commit, no tags — LatestTag
234+
// must be empty (agent signal: propose initial v0.1.0).
235+
func Test_Analyze_NoTag_NoHistory(t *testing.T) {
236+
t.Parallel()
237+
if testing.Short() {
238+
t.Skip("skipping integration test in short mode")
239+
}
240+
root := initTestRepo(t,
241+
map[string]string{"README.md": "# fresh\n"},
242+
map[string]string{"README.md": "# fresh\n\nmodified\n"},
243+
)
244+
got, err := AnalyzeCommit(context.Background(), AnalyzeOptions{Root: root})
245+
if err != nil {
246+
t.Fatalf("AnalyzeCommit: %v", err)
247+
}
248+
if got.LatestTag != "" {
249+
t.Errorf("LatestTag = %q, want empty on untagged repo", got.LatestTag)
250+
}
251+
}
252+
253+
// Test_Analyze_Visibility_None_PIISafe: a personal repo with no origin should
254+
// get "safe" default_action on PII REVIEW findings (emails/localhost), so the
255+
// agent can skip per-finding adjudication.
256+
func Test_Analyze_Visibility_None_PIISafe(t *testing.T) {
257+
t.Parallel()
258+
if testing.Short() {
259+
t.Skip("skipping integration test in short mode")
260+
}
261+
root := initTestRepo(t,
262+
map[string]string{"README.md": "# readme\n"},
263+
map[string]string{"README.md": "# readme\n\nping me at dev@example.com on localhost:8080\n"},
264+
)
265+
got, err := AnalyzeCommit(context.Background(), AnalyzeOptions{Root: root})
266+
if err != nil {
267+
t.Fatalf("AnalyzeCommit: %v", err)
268+
}
269+
if got.Remote.Visibility != "none" {
270+
t.Fatalf("Remote.Visibility = %q, want %q (no origin remote)", got.Remote.Visibility, "none")
271+
}
272+
if got.Secrets.AmbiguousCount != 0 {
273+
t.Errorf("AmbiguousCount = %d, want 0 (personal repo: PII REVIEWs should be safe)", got.Secrets.AmbiguousCount)
274+
}
275+
findings := readFindings(t, got.Refs.Findings)
276+
if len(findings) == 0 {
277+
t.Fatalf("expected PII REVIEW findings for email/localhost")
278+
}
279+
for _, f := range findings {
280+
if f.Kind == "pii" && f.Class == "REVIEW" && f.DefaultAction != "safe" {
281+
t.Errorf("PII REVIEW finding %q default_action = %q, want %q", f.Snippet, f.DefaultAction, "safe")
282+
}
283+
}
284+
}
285+
286+
// Test_Analyze_Visibility_FlagAlwaysFix: FLAG findings always get default_action=fix
287+
// regardless of visibility — live secrets are never "safe".
288+
func Test_Analyze_Visibility_FlagAlwaysFix(t *testing.T) {
289+
t.Parallel()
290+
if testing.Short() {
291+
t.Skip("skipping integration test in short mode")
292+
}
293+
root := initTestRepo(t,
294+
map[string]string{"config.yaml": "key: value\n"},
295+
map[string]string{"config.yaml": "key: value\naws_access_key: AKIAIOSFODNN7EXAMPLE\n"},
296+
)
297+
got, err := AnalyzeCommit(context.Background(), AnalyzeOptions{Root: root})
298+
if err != nil {
299+
t.Fatalf("AnalyzeCommit: %v", err)
300+
}
301+
if got.Secrets.FixCount < 1 {
302+
t.Errorf("FixCount = %d, want >=1 (AKIA key must be fix)", got.Secrets.FixCount)
303+
}
304+
findings := readFindings(t, got.Refs.Findings)
305+
for _, f := range findings {
306+
if f.Class == "FLAG" && f.DefaultAction != "fix" {
307+
t.Errorf("FLAG finding %q default_action = %q, want %q", f.Snippet, f.DefaultAction, "fix")
308+
}
309+
}
310+
}
311+
173312
func containsAll(haystack []string, needles ...string) bool {
174313
set := make(map[string]bool, len(haystack))
175314
for _, h := range haystack {

commit_gitstate.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ type gitState struct {
2222
CoChange map[string]map[string]int // file -> file -> co-commit count (last 500 commits)
2323
Tags []string // descending semver
2424
HasUpstream bool
25+
OriginURL string // origin remote URL; empty if no origin
26+
Visibility string // public | private | none | unknown
2527
}
2628

2729
// collectGitState shells the git commands we need. Returns an error only for
@@ -81,9 +83,81 @@ func collectGitState(ctx context.Context, root string) (*gitState, error) {
8183
}
8284
}
8385

86+
// Origin + visibility. Drives Finding.DefaultAction strictness: no remote =
87+
// personal repo (lenient); public remote = strict; unknown = strict.
88+
gs.OriginURL, gs.Visibility = detectVisibility(ctx, root)
89+
8490
return gs, nil
8591
}
8692

93+
// detectVisibility classifies the origin remote. Logic:
94+
// - no origin → ("", "none")
95+
// - origin exists, `gh` authenticated → probe gh repo view for github.com URLs
96+
// - otherwise → (url, "unknown") — caller treats as strict
97+
//
98+
// Network-light: gh probe is skipped when origin is not github.com or gh is
99+
// missing; both paths short-circuit to "unknown" so analyze never stalls on
100+
// flaky network.
101+
func detectVisibility(ctx context.Context, root string) (url, vis string) {
102+
out, err := runGit(ctx, root, "remote", "get-url", "origin")
103+
if err != nil {
104+
return "", "none"
105+
}
106+
url = strings.TrimSpace(out)
107+
if url == "" {
108+
return "", "none"
109+
}
110+
if !isGitHubURL(url) {
111+
return url, "unknown"
112+
}
113+
slug := githubSlug(url)
114+
if slug == "" {
115+
return url, "unknown"
116+
}
117+
if _, err := exec.LookPath("gh"); err != nil {
118+
return url, "unknown"
119+
}
120+
cmd := exec.CommandContext(ctx, "gh", "repo", "view", slug, "--json", "visibility", "-q", ".visibility")
121+
raw, err := cmd.Output()
122+
if err != nil {
123+
return url, "unknown"
124+
}
125+
switch strings.ToLower(strings.TrimSpace(string(raw))) {
126+
case "public":
127+
return url, "public"
128+
case "private", "internal":
129+
return url, "private"
130+
}
131+
return url, "unknown"
132+
}
133+
134+
// isGitHubURL returns true for SSH or HTTPS origins on github.com.
135+
func isGitHubURL(url string) bool {
136+
return strings.Contains(url, "github.com:") || strings.Contains(url, "github.com/")
137+
}
138+
139+
// githubSlug extracts "owner/repo" from a github.com URL.
140+
// Handles: git@github.com:owner/repo.git, https://github.com/owner/repo(.git),
141+
// ssh://git@github.com/owner/repo.
142+
func githubSlug(url string) string {
143+
rest := url
144+
switch {
145+
case strings.HasPrefix(rest, "git@github.com:"):
146+
rest = strings.TrimPrefix(rest, "git@github.com:")
147+
case strings.Contains(rest, "github.com/"):
148+
_, rest, _ = strings.Cut(rest, "github.com/")
149+
default:
150+
return ""
151+
}
152+
rest = strings.TrimSuffix(rest, ".git")
153+
rest = strings.TrimSuffix(rest, "/")
154+
parts := strings.Split(rest, "/")
155+
if len(parts) < 2 {
156+
return ""
157+
}
158+
return parts[0] + "/" + parts[1]
159+
}
160+
87161
// runGit invokes git with the given args inside root, returns stdout.
88162
func runGit(ctx context.Context, root string, args ...string) (string, error) {
89163
cmd := exec.CommandContext(ctx, "git", append([]string{"-C", root}, args...)...)

0 commit comments

Comments
 (0)