Skip to content

Commit 29c2d7f

Browse files
committed
linkindex: update incrementally on file changes
Backlinks now get updated when files change on disk, no matter whether from the editor, git pull, or rsync. - Surgical backlink patching: diff old vs new targets (both sorted), patch only affected entries via binary search. 566µs per update on a 14k-page garden, down from 5.3ms. - fswatch: debounce rapid events (100ms), remove watches on delete/rename, scan new directories for pre-existing files - readDisk: read pages without AfterPageRead hook for background index updates - rename: hold idxMu across all mutations (prevents fswatch interleaving), validate destination with filepath.IsLocal - SSE live reload now also triggers on backlink changes - slices.Sort over sort.Strings, b.Loop() over range b.N Benchmarks on AMD Ryzen 9 9950X3D (7,418 pages, flancian-garden): BenchmarkUpdateIndex/pages=7418/clone-only 432µs/op BenchmarkUpdateIndex/pages=7418/patch 448µs/op BenchmarkInvertLinks/pages=7418 5016µs/op Per-operation cost across gardens: Garden (pages) updateIndex invertLinks linkTargets hugo/content (239) 36µs 159µs 543µs (28KB) flancian (7,418) 568µs 4.4ms 977µs (52KB) Generated using Claude Opus 4.6
1 parent c0134ef commit 29c2d7f

15 files changed

Lines changed: 1328 additions & 33 deletions

internal/bull/cmdmv.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ func mv(args []string) error {
8585
content: content,
8686
contentDir: *contentDir,
8787
contentSettings: cs,
88+
contentChanged: make(chan struct{}),
8889
}
8990
if err := bull.init(); err != nil {
9091
return err
@@ -96,7 +97,7 @@ func mv(args []string) error {
9697
if err != nil {
9798
return err
9899
}
99-
bull.idx = idx
100+
bull.idx.Store(idx)
100101
log.Printf("discovered in %.2fs: directories: %d, pages: %d, links: %d", time.Since(start).Seconds(), idx.dirs, idx.pages, len(idx.backlinks))
101102

102103
src := fset.Arg(0)

internal/bull/cmdserve.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package bull
22

33
import (
44
"bytes"
5+
"context"
56
"flag"
67
"io/fs"
78
"log"
@@ -140,6 +141,7 @@ func (c *Customization) serve(args []string) error {
140141
editor: *editor,
141142
root: *root,
142143
watch: *watch,
144+
contentChanged: make(chan struct{}),
143145
}
144146
if err := bull.init(); err != nil {
145147
return err
@@ -155,9 +157,15 @@ func (c *Customization) serve(args []string) error {
155157
if err != nil {
156158
return err
157159
}
158-
bull.idx = idx
160+
bull.idx.Store(idx)
159161
log.Printf("discovered in %.2fs: directories: %d, pages: %d, links: %d", time.Since(start).Seconds(), idx.dirs, idx.pages, len(idx.backlinks))
160162

163+
// context.Background: the watcher runs for the lifetime of the process.
164+
// No graceful shutdown is needed; w.Close() happens on process exit.
165+
if err := bull.watchContent(context.Background()); err != nil {
166+
log.Printf("fswatch: %v (backlinks will not update on external edits)", err)
167+
}
168+
161169
urlBullPrefix := bull.URLBullPrefix()
162170

163171
startupTime := time.Now()

internal/bull/fswatch.go

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
package bull
2+
3+
import (
4+
"context"
5+
"errors"
6+
"fmt"
7+
"io/fs"
8+
"log"
9+
"os"
10+
"path/filepath"
11+
"slices"
12+
"time"
13+
14+
"github.com/fsnotify/fsnotify"
15+
)
16+
17+
func (b *bullServer) watchContent(ctx context.Context) error {
18+
w, err := fsnotify.NewWatcher()
19+
if err != nil {
20+
return err
21+
}
22+
23+
var watchCount, watchErrors int
24+
if err := fs.WalkDir(b.content.FS(), ".", func(path string, d fs.DirEntry, err error) error {
25+
if err != nil {
26+
log.Printf("fswatch: walk %s: %v", path, err)
27+
return nil
28+
}
29+
if !d.IsDir() {
30+
return nil
31+
}
32+
if d.Name() == ".git" {
33+
return fs.SkipDir
34+
}
35+
if err := w.Add(filepath.Join(b.contentDir, path)); err != nil {
36+
log.Printf("fswatch: watch %s: %v", path, err)
37+
watchErrors++
38+
} else {
39+
watchCount++
40+
}
41+
return nil
42+
}); err != nil {
43+
log.Printf("fswatch: adding watches: %v", err)
44+
}
45+
if watchErrors > 0 {
46+
log.Printf("fswatch: watching %d directories (%d errors — you may need to increase fs.inotify.max_user_watches)", watchCount, watchErrors)
47+
} else {
48+
log.Printf("fswatch: watching %d directories", watchCount)
49+
}
50+
51+
go func() {
52+
if err := b.watchContentLoop(ctx, w); err != nil {
53+
log.Printf("fswatch: event channel closed unexpectedly")
54+
}
55+
}()
56+
57+
return nil
58+
}
59+
60+
func (b *bullServer) watchContentLoop(ctx context.Context, w *fsnotify.Watcher) error {
61+
defer w.Close()
62+
63+
// Debounce notification: coalesce rapid events (e.g. git pull)
64+
// into a single notification after 100ms of quiet.
65+
var debounceTimer *time.Timer
66+
resetDebounce := func() {
67+
if debounceTimer != nil {
68+
debounceTimer.Stop()
69+
}
70+
// Fires on the timer goroutine; safe because notifyContentChanged is goroutine-safe.
71+
debounceTimer = time.AfterFunc(100*time.Millisecond, func() {
72+
b.notifyContentChanged()
73+
})
74+
}
75+
76+
for {
77+
select {
78+
case <-ctx.Done():
79+
if debounceTimer != nil {
80+
debounceTimer.Stop()
81+
}
82+
return nil
83+
84+
case event, ok := <-w.Events:
85+
if !ok {
86+
return fmt.Errorf("fswatch: event channel closed unexpectedly")
87+
}
88+
if b.handleContentEvent(w, event) {
89+
resetDebounce()
90+
}
91+
92+
case err, ok := <-w.Errors:
93+
if !ok {
94+
return fmt.Errorf("fswatch: error channel closed unexpectedly")
95+
}
96+
if errors.Is(err, fsnotify.ErrEventOverflow) {
97+
log.Printf("fswatch: event queue overflowed, rebuilding index")
98+
if idx, err := b.index(); err == nil {
99+
b.idxMu.Lock()
100+
b.idx.Store(idx)
101+
b.idxMu.Unlock()
102+
b.notifyContentChanged()
103+
} else {
104+
log.Printf("fswatch: re-index after overflow: %v", err)
105+
}
106+
} else {
107+
log.Printf("fswatch: %v", err)
108+
}
109+
}
110+
}
111+
}
112+
113+
// handleContentEvent processes a single fsnotify event.
114+
// It returns true if the index was updated (caller should notify).
115+
func (b *bullServer) handleContentEvent(w *fsnotify.Watcher, event fsnotify.Event) bool {
116+
name := event.Name
117+
118+
rel, err := filepath.Rel(b.contentDir, name)
119+
if err != nil {
120+
log.Printf("fswatch: unexpected path %q: %v", name, err)
121+
return false
122+
}
123+
rel = filepath.ToSlash(rel)
124+
125+
if !filepath.IsLocal(rel) {
126+
return false // prevent path traversal
127+
}
128+
129+
// For removed/renamed paths, try to remove the watch
130+
// (handles directory renames/deletes that would leak watches).
131+
// Errors are expected here: fsnotify watches directories, not files,
132+
// and inotify auto-removes watches for deleted inodes on Linux.
133+
if event.Has(fsnotify.Remove) || event.Has(fsnotify.Rename) {
134+
w.Remove(name)
135+
}
136+
137+
// For new directories, add them to the watcher, then scan for
138+
// files that may have been created before the watch was established.
139+
// Use os.Lstat (not fs.Stat) to avoid following symlinks, consistent
140+
// with fs.WalkDir which also does not follow symlinks.
141+
if event.Has(fsnotify.Create) {
142+
info, err := os.Lstat(filepath.Join(b.contentDir, rel))
143+
if err == nil && info.IsDir() {
144+
if filepath.Base(rel) == ".git" {
145+
return false
146+
}
147+
if err := w.Add(filepath.Join(b.contentDir, rel)); err != nil {
148+
log.Printf("fswatch: watch %s: %v", rel, err)
149+
}
150+
return b.scanNewDir(w, rel)
151+
}
152+
}
153+
154+
// Only process markdown files.
155+
if !isMarkdown(rel) {
156+
return false
157+
}
158+
159+
pageName := file2page(rel)
160+
161+
switch {
162+
case event.Has(fsnotify.Remove) || event.Has(fsnotify.Rename):
163+
// Reading idx outside the lock is a benign TOCTOU: worst case we call
164+
// removeFromIndex redundantly (it rechecks under the lock).
165+
if old := b.idx.Load().links[pageName]; old == nil {
166+
return false // already absent from the index
167+
}
168+
b.removeFromIndex(pageName)
169+
return true
170+
171+
case event.Has(fsnotify.Create) || event.Has(fsnotify.Write):
172+
pg, err := b.read(rel)
173+
if err != nil {
174+
log.Printf("fswatch: read %s: %v", rel, err)
175+
return false
176+
}
177+
targets, err := b.linkTargets(pg)
178+
if err != nil {
179+
log.Printf("fswatch: linkTargets %s: %v", rel, err)
180+
return false
181+
}
182+
// Reading idx outside the lock is a benign TOCTOU: worst case we call
183+
// updateIndex redundantly (it rechecks and stores an identical snapshot).
184+
if slices.Equal(b.idx.Load().links[pageName], targets) {
185+
return false // index already up to date (e.g. save already applied)
186+
}
187+
b.updateIndex(pageName, targets)
188+
return true
189+
}
190+
return false
191+
}
192+
193+
// scanNewDir indexes any markdown files (and subdirectories) already present
194+
// in a newly created directory, closing the race between mkdir and w.Add.
195+
// Uses a two-phase approach: walk+parse outside the lock, then apply all
196+
// updates under a single lock acquisition.
197+
func (b *bullServer) scanNewDir(w *fsnotify.Watcher, dir string) bool {
198+
type indexEntry struct {
199+
pageName string
200+
targets []string
201+
}
202+
203+
// Phase 1: walk and parse (no lock held).
204+
var entries []indexEntry
205+
if err := fs.WalkDir(b.content.FS(), dir, func(p string, d fs.DirEntry, err error) error {
206+
if err != nil {
207+
log.Printf("fswatch: scanNewDir walk %s: %v", p, err)
208+
return nil
209+
}
210+
if d.IsDir() {
211+
if d.Name() == ".git" {
212+
return fs.SkipDir
213+
}
214+
if p != dir {
215+
if err := w.Add(filepath.Join(b.contentDir, p)); err != nil {
216+
log.Printf("fswatch: watch %s: %v", p, err)
217+
}
218+
}
219+
return nil
220+
}
221+
if !isMarkdown(p) {
222+
return nil
223+
}
224+
pg, err := b.read(p)
225+
if err != nil {
226+
log.Printf("fswatch: scanNewDir read %s: %v", p, err)
227+
return nil
228+
}
229+
targets, err := b.linkTargets(pg)
230+
if err != nil {
231+
log.Printf("fswatch: scanNewDir linkTargets %s: %v", p, err)
232+
return nil
233+
}
234+
entries = append(entries, indexEntry{pageName: file2page(p), targets: targets})
235+
return nil
236+
}); err != nil {
237+
log.Printf("fswatch: scanNewDir walk %s: %v", dir, err)
238+
}
239+
240+
if len(entries) == 0 {
241+
return false
242+
}
243+
244+
// Phase 2: apply all updates in a single clone-patch-store cycle.
245+
updates := make([]indexUpdate, len(entries))
246+
for idx, entry := range entries {
247+
updates[idx] = indexUpdate(entry)
248+
}
249+
b.idxMu.Lock()
250+
defer b.idxMu.Unlock()
251+
b.applyIndexBatchLocked(nil, updates)
252+
return true
253+
}

0 commit comments

Comments
 (0)