diff --git a/app/main.go b/app/main.go index 81a70df1..90a80cb6 100644 --- a/app/main.go +++ b/app/main.go @@ -18,6 +18,7 @@ import ( "net/url" "os" "os/signal" + "path/filepath" "strconv" "strings" "sync" @@ -1159,6 +1160,43 @@ var newWatcher = func() (fileWatcher, error) { return fswatcher{w}, nil } +var watchAbsPath = filepath.Abs + +const watchDebounceDelay = 100 * time.Millisecond + +type watchDebouncer struct { + delay time.Duration + timer *time.Timer + timerC <-chan time.Time +} + +func newWatchDebouncer(delay time.Duration) *watchDebouncer { + return &watchDebouncer{delay: delay} +} + +func (d *watchDebouncer) trigger() <-chan time.Time { + if d.timer == nil { + d.timer = time.NewTimer(d.delay) + d.timerC = d.timer.C + return d.timerC + } + d.timer.Stop() + d.timer.Reset(d.delay) + d.timerC = d.timer.C + return d.timerC +} + +func (d *watchDebouncer) fired() { + d.timerC = nil +} + +func (d *watchDebouncer) stop() { + if d.timer == nil { + return + } + d.timer.Stop() +} + func watchFiles(ctx context.Context, files []string, out chan<- struct{}) { w, err := newWatcher() if err != nil { @@ -1167,9 +1205,29 @@ func watchFiles(ctx context.Context, files []string, out chan<- struct{}) { } defer w.Close() + watchedDirs := make(map[string]struct{}) for _, f := range files { - if err := w.Add(f); err != nil { - logger.Error("watch add failed", "file", f, "error", err) + abs, err := watchAbsPath(f) + if err != nil { + logger.Error("watch path failed", "file", f, "error", err) + continue + } + watchedDirs[filepath.Dir(filepath.Clean(abs))] = struct{}{} + } + + for dir := range watchedDirs { + if err := w.Add(dir); err != nil { + logger.Error("watch add failed", "dir", dir, "error", err) + } + } + + debouncer := newWatchDebouncer(watchDebounceDelay) + defer debouncer.stop() + var timerC <-chan time.Time + notify := func() { + select { + case out <- struct{}{}: + default: } } @@ -1177,36 +1235,16 @@ func watchFiles(ctx context.Context, files []string, out chan<- struct{}) { select { case <-ctx.Done(): return + case <-timerC: + timerC = nil + debouncer.fired() + notify() case ev, ok := <-w.Events(): if !ok { return } - if ev.Op&(fsnotify.Rename|fsnotify.Remove) != 0 { - go func(name string) { - for i := 0; i < 50; i++ { - if err := w.Add(name); err == nil { - return - } else if !os.IsNotExist(err) { - logger.Error("watch re-add failed", "file", name, "error", err) - return - } - select { - case <-ctx.Done(): - return - case <-time.After(10 * time.Millisecond): - } - } - }(ev.Name) - } else if ev.Op&fsnotify.Create != 0 { - if err := w.Add(ev.Name); err != nil && !os.IsNotExist(err) { - logger.Error("watch re-add failed", "file", ev.Name, "error", err) - } - } - if ev.Op&(fsnotify.Write|fsnotify.Create|fsnotify.Rename) != 0 { - select { - case out <- struct{}{}: - default: - } + if watchEventInDirs(ev, watchedDirs) && ev.Op&(fsnotify.Write|fsnotify.Create|fsnotify.Rename|fsnotify.Remove|fsnotify.Chmod) != 0 { + timerC = debouncer.trigger() } case err, ok := <-w.Errors(): if !ok { @@ -1217,6 +1255,21 @@ func watchFiles(ctx context.Context, files []string, out chan<- struct{}) { } } +func watchEventInDirs(ev fsnotify.Event, watchedDirs map[string]struct{}) bool { + if ev.Name == "" { + return false + } + name := filepath.Clean(ev.Name) + if !filepath.IsAbs(name) { + abs, err := filepath.Abs(name) + if err == nil { + name = abs + } + } + _, ok := watchedDirs[filepath.Dir(name)] + return ok +} + // healthzHandler reports server readiness. func healthzHandler(w http.ResponseWriter, r *http.Request) { w.Header().Set("X-Last-Reload", metrics.LastReloadTime.Value()) diff --git a/app/watch_files_test.go b/app/watch_files_test.go index 3d2b5aa3..c1ff1854 100644 --- a/app/watch_files_test.go +++ b/app/watch_files_test.go @@ -4,6 +4,8 @@ import ( "context" "fmt" "os" + "path/filepath" + "runtime" "testing" "time" @@ -69,11 +71,11 @@ func TestWatchFilesRename(t *testing.T) { t.Fatal("timeout waiting for rename event") } - // recreate the original file and modify it; watcher should fire again + // recreate the original file and modify it; the directory watch should fire again if err := os.WriteFile(name, []byte("x"), 0o644); err != nil { t.Fatal(err) } - // give watcher time to re-add + // give the filesystem time to deliver the create event before the write time.Sleep(50 * time.Millisecond) if err := os.WriteFile(name, []byte("y"), 0o644); err != nil { t.Fatal(err) @@ -87,6 +89,63 @@ func TestWatchFilesRename(t *testing.T) { } } +func TestWatchFilesKubernetesSymlinkSwap(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("projected-volume symlink semantics are Unix-specific") + } + + dir := t.TempDir() + writeProjectedConfig(t, dir, 1, "one", true) + name := filepath.Join(dir, "config.yaml") + + ch := make(chan struct{}, 2) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go watchFiles(ctx, []string{name}, ch) + + // give watcher time to start + time.Sleep(50 * time.Millisecond) + + writeProjectedConfig(t, dir, 2, "two", false) + + select { + case <-ch: + // symlink swap detected + case <-time.After(time.Second): + t.Fatal("timeout waiting for projected volume update") + } +} + +func writeProjectedConfig(t *testing.T, dir string, rev int, contents string, createFileLink bool) { + t.Helper() + + dataDir := filepath.Join(dir, fmt.Sprintf("..2026_01_01_00_00_%02d.000000000", rev)) + if err := os.Mkdir(dataDir, 0o755); err != nil { + t.Fatalf("mkdir projected data dir: %v", err) + } + if err := os.WriteFile(filepath.Join(dataDir, "config.yaml"), []byte(contents), 0o644); err != nil { + t.Fatalf("write projected config: %v", err) + } + + tmpLink := filepath.Join(dir, "..data_tmp") + dataLink := filepath.Join(dir, "..data") + if err := os.Remove(tmpLink); err != nil && !os.IsNotExist(err) { + t.Fatalf("remove stale projected tmp link: %v", err) + } + if err := os.Symlink(filepath.Base(dataDir), tmpLink); err != nil { + t.Fatalf("create projected tmp link: %v", err) + } + if err := os.Rename(tmpLink, dataLink); err != nil { + t.Fatalf("swap projected data link: %v", err) + } + + if createFileLink { + if err := os.Symlink(filepath.Join("..data", "config.yaml"), filepath.Join(dir, "config.yaml")); err != nil { + t.Fatalf("create projected config link: %v", err) + } + } +} + func TestWatchFilesCancel(t *testing.T) { ch := make(chan struct{}, 1) ctx, cancel := context.WithCancel(context.Background()) @@ -112,11 +171,25 @@ type mockWatcher struct { addErr error } -func (m *mockWatcher) Add(name string) error { return m.addErr } -func (m *mockWatcher) Close() error { close(m.events); close(m.errors); return nil } +func (m *mockWatcher) Add(name string) error { return m.addErr } +func (m *mockWatcher) Close() error { + safeCloseEvents(m.events) + safeCloseErrors(m.errors) + return nil +} func (m *mockWatcher) Events() <-chan fsnotify.Event { return m.events } func (m *mockWatcher) Errors() <-chan error { return m.errors } +func safeCloseEvents(ch chan fsnotify.Event) { + defer func() { _ = recover() }() + close(ch) +} + +func safeCloseErrors(ch chan error) { + defer func() { _ = recover() }() + close(ch) +} + func TestWatchFilesError(t *testing.T) { mw := &mockWatcher{events: make(chan fsnotify.Event), errors: make(chan error, 1)} old := newWatcher @@ -127,52 +200,159 @@ func TestWatchFilesError(t *testing.T) { done := make(chan struct{}) go func() { watchFiles(ctx, nil, make(chan struct{})); close(done) }() mw.errors <- fmt.Errorf("boom") - cancel() + close(mw.errors) <-done + cancel() +} + +func TestWatchFilesPathError(t *testing.T) { + mw := &mockWatcher{events: make(chan fsnotify.Event), errors: make(chan error)} + oldWatcher := newWatcher + newWatcher = func() (fileWatcher, error) { return mw, nil } + defer func() { newWatcher = oldWatcher }() + oldAbsPath := watchAbsPath + watchAbsPath = func(string) (string, error) { return "", fmt.Errorf("abs failed") } + defer func() { watchAbsPath = oldAbsPath }() + + ctx, cancel := context.WithCancel(context.Background()) + done := make(chan struct{}) + go func() { + watchFiles(ctx, []string{"config.yaml"}, make(chan struct{})) + close(done) + }() + time.Sleep(50 * time.Millisecond) + cancel() + + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("watchFiles did not exit after path error") + } } -func TestWatchFilesRenameAddError(t *testing.T) { - mw := &mockWatcher{events: make(chan fsnotify.Event, 1), errors: make(chan error), addErr: fmt.Errorf("fail")} +func TestWatchFilesReturnsWhenEventsClosed(t *testing.T) { + mw := &mockWatcher{events: make(chan fsnotify.Event), errors: make(chan error)} old := newWatcher newWatcher = func() (fileWatcher, error) { return mw, nil } defer func() { newWatcher = old }() + done := make(chan struct{}) + go func() { + watchFiles(context.Background(), []string{filepath.Join(t.TempDir(), "config.yaml")}, make(chan struct{})) + close(done) + }() + close(mw.events) + + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("watchFiles did not exit after events channel closed") + } +} + +func TestWatchFilesReturnsWhenErrorsClosed(t *testing.T) { + mw := &mockWatcher{events: make(chan fsnotify.Event), errors: make(chan error)} + old := newWatcher + newWatcher = func() (fileWatcher, error) { return mw, nil } + defer func() { newWatcher = old }() + + done := make(chan struct{}) + go func() { + watchFiles(context.Background(), []string{filepath.Join(t.TempDir(), "config.yaml")}, make(chan struct{})) + close(done) + }() + close(mw.errors) + + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("watchFiles did not exit after errors channel closed") + } +} + +func TestWatchFilesIgnoresIrrelevantEvents(t *testing.T) { + mw := &mockWatcher{events: make(chan fsnotify.Event, 2), errors: make(chan error)} + old := newWatcher + newWatcher = func() (fileWatcher, error) { return mw, nil } + defer func() { newWatcher = old }() + + dir := t.TempDir() + name := filepath.Join(dir, "config.yaml") ch := make(chan struct{}, 1) ctx, cancel := context.WithCancel(context.Background()) + defer cancel() done := make(chan struct{}) - go func() { watchFiles(ctx, []string{"f"}, ch); close(done) }() - mw.events <- fsnotify.Event{Name: "f", Op: fsnotify.Rename} + go func() { watchFiles(ctx, []string{name}, ch); close(done) }() + + mw.events <- fsnotify.Event{Name: filepath.Join(t.TempDir(), "other.yaml"), Op: fsnotify.Write} + mw.events <- fsnotify.Event{Name: name, Op: 0} + select { case <-ch: - case <-time.After(time.Second): - t.Fatal("timeout waiting for event") + t.Fatal("unexpected notification for irrelevant events") + case <-time.After(2*watchDebounceDelay + 50*time.Millisecond): } cancel() <-done } -func TestWatchFilesCreateAddError(t *testing.T) { - mw := &mockWatcher{events: make(chan fsnotify.Event, 1), errors: make(chan error), addErr: fmt.Errorf("boom")} +func TestWatchFilesDropsNotificationWhenOutputFull(t *testing.T) { + mw := &mockWatcher{events: make(chan fsnotify.Event, 1), errors: make(chan error)} old := newWatcher newWatcher = func() (fileWatcher, error) { return mw, nil } defer func() { newWatcher = old }() + name := filepath.Join(t.TempDir(), "config.yaml") ch := make(chan struct{}, 1) + ch <- struct{}{} ctx, cancel := context.WithCancel(context.Background()) done := make(chan struct{}) - go func() { watchFiles(ctx, []string{"f"}, ch); close(done) }() - mw.events <- fsnotify.Event{Name: "f", Op: fsnotify.Create} + go func() { watchFiles(ctx, []string{name}, ch); close(done) }() + + mw.events <- fsnotify.Event{Name: name, Op: fsnotify.Write} + time.Sleep(2*watchDebounceDelay + 50*time.Millisecond) + cancel() + + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("watchFiles blocked when output channel was full") + } +} + +func TestWatchFilesDebouncesBurst(t *testing.T) { + mw := &mockWatcher{events: make(chan fsnotify.Event, 2), errors: make(chan error)} + old := newWatcher + newWatcher = func() (fileWatcher, error) { return mw, nil } + defer func() { newWatcher = old }() + + name := filepath.Join(t.TempDir(), "config.yaml") + ch := make(chan struct{}, 2) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + done := make(chan struct{}) + go func() { watchFiles(ctx, []string{name}, ch); close(done) }() + mw.events <- fsnotify.Event{Name: name, Op: fsnotify.Write} + mw.events <- fsnotify.Event{Name: name, Op: fsnotify.Write} + select { case <-ch: + // success case <-time.After(time.Second): - t.Fatal("timeout waiting for create event") + t.Fatal("timeout waiting for debounced event") + } + + select { + case <-ch: + t.Fatal("expected burst events to be coalesced") + case <-time.After(2*watchDebounceDelay + 50*time.Millisecond): } cancel() <-done } -func TestWatchFilesCreateAddMissing(t *testing.T) { - mw := &mockWatcher{events: make(chan fsnotify.Event, 1), errors: make(chan error), addErr: os.ErrNotExist} +func TestWatchFilesAddError(t *testing.T) { + mw := &mockWatcher{events: make(chan fsnotify.Event), errors: make(chan error), addErr: fmt.Errorf("boom")} old := newWatcher newWatcher = func() (fileWatcher, error) { return mw, nil } defer func() { newWatcher = old }() @@ -181,14 +361,13 @@ func TestWatchFilesCreateAddMissing(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) done := make(chan struct{}) go func() { watchFiles(ctx, []string{"f"}, ch); close(done) }() - mw.events <- fsnotify.Event{Name: "f", Op: fsnotify.Create} + time.Sleep(50 * time.Millisecond) + cancel() select { - case <-ch: + case <-done: case <-time.After(time.Second): - t.Fatal("timeout waiting for create event") + t.Fatal("watchFiles did not exit after add error") } - cancel() - <-done } func TestWatchFilesNewWatcherError(t *testing.T) { @@ -204,3 +383,79 @@ func TestWatchFilesNewWatcherError(t *testing.T) { t.Fatal("watchFiles did not exit on watcher error") } } + +func TestWatchDebouncer(t *testing.T) { + d := newWatchDebouncer(10 * time.Millisecond) + d.stop() + + first := d.trigger() + if first == nil { + t.Fatal("missing first timer channel") + } + second := d.trigger() + if second == nil { + t.Fatal("missing reset timer channel") + } + + select { + case <-second: + case <-time.After(time.Second): + t.Fatal("debouncer timer did not fire") + } + d.fired() + if d.timerC != nil { + t.Fatal("expected fired debouncer to clear timer channel") + } + d.stop() +} + +func TestWatchEventInDirs(t *testing.T) { + absDir := t.TempDir() + absName := filepath.Join(absDir, "config.yaml") + relDir := "relative-watch-dir" + relName := filepath.Join(relDir, "config.yaml") + absRelDir, err := filepath.Abs(relDir) + if err != nil { + t.Fatal(err) + } + + watched := map[string]struct{}{ + absDir: {}, + absRelDir: {}, + } + + tests := []struct { + name string + ev fsnotify.Event + want bool + }{ + { + name: "empty name", + ev: fsnotify.Event{Name: "", Op: fsnotify.Write}, + want: false, + }, + { + name: "absolute path in watched dir", + ev: fsnotify.Event{Name: absName, Op: fsnotify.Write}, + want: true, + }, + { + name: "relative path in watched dir", + ev: fsnotify.Event{Name: relName, Op: fsnotify.Write}, + want: true, + }, + { + name: "unwatched dir", + ev: fsnotify.Event{Name: filepath.Join(t.TempDir(), "config.yaml"), Op: fsnotify.Write}, + want: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := watchEventInDirs(tc.ev, watched); got != tc.want { + t.Fatalf("watchEventInDirs() = %v, want %v", got, tc.want) + } + }) + } +} diff --git a/charts/authtranslator/README.md b/charts/authtranslator/README.md index 4a15926f..a0a85590 100644 --- a/charts/authtranslator/README.md +++ b/charts/authtranslator/README.md @@ -12,6 +12,7 @@ This chart deploys [AuthTranslator](https://github.com/winhowes/AuthTranslator) | `redisAddress` | Address passed to `-redis-addr` – either `host:port` or a `redis://`/`rediss://` URL | `""` | | `redisCA` | CA file for verifying Redis TLS passed to `-redis-ca` | `""` | | `secretRefresh` | Value passed to `-secret-refresh` | `""` | +| `watch` | Run with `-watch` so projected ConfigMap updates trigger reloads | `true` | | `resources` | Pod resource requests/limits | see `values.yaml` | | `imagePullSecrets` | List of image pull secrets | `[]` | | `serviceAccountName` | Pod service account | `""` | diff --git a/charts/authtranslator/templates/deployment.yaml b/charts/authtranslator/templates/deployment.yaml index 600288fa..6f173eda 100644 --- a/charts/authtranslator/templates/deployment.yaml +++ b/charts/authtranslator/templates/deployment.yaml @@ -30,6 +30,15 @@ spec: imagePullPolicy: {{ .Values.image.pullPolicy }} command: ["./authtranslator"] args: + - "-config" + - "/conf/config.yaml" + - "-allowlist" + - "/conf/allowlist.yaml" + - "-denylist" + - "/conf/denylist.yaml" + {{- if .Values.watch }} + - "-watch" + {{- end }} {{- if .Values.redisAddress }} - "-redis-addr" - {{ .Values.redisAddress | quote }} @@ -44,14 +53,8 @@ spec: {{- end }} volumeMounts: - name: config - mountPath: /conf/config.yaml - subPath: config.yaml - - name: config - mountPath: /conf/allowlist.yaml - subPath: allowlist.yaml - - name: config - mountPath: /conf/denylist.yaml - subPath: denylist.yaml + mountPath: /conf + readOnly: true ports: - containerPort: 8080 name: http diff --git a/charts/authtranslator/values.yaml b/charts/authtranslator/values.yaml index d8a69b69..8930a7a2 100644 --- a/charts/authtranslator/values.yaml +++ b/charts/authtranslator/values.yaml @@ -6,6 +6,7 @@ image: redisAddress: "" redisCA: "" secretRefresh: "" +watch: true resources: limits: diff --git a/docs/helm.md b/docs/helm.md index cefdf72c..6f782e98 100644 --- a/docs/helm.md +++ b/docs/helm.md @@ -34,6 +34,7 @@ This: | `redisAddress` | `""` | Address passed to `-redis-addr` – either `host:port` or a `redis://`/`rediss://` URL. | | `redisCA` | `""` | CA file for `-redis-ca`; empty skips TLS verification. | | `secretRefresh` | `""` | Value passed to `-secret-refresh`. | +| `watch` | `true` | Run with `-watch` so projected ConfigMap updates trigger reloads. | | `resources` | *(object)* | Pod resource requests/limits. | | `imagePullSecrets` | `[]` | Names of image pull secrets. | | `serviceAccountName` | `""` | Pod service account name. | diff --git a/docs/runtime.md b/docs/runtime.md index 4d6a2122..ab577972 100644 --- a/docs/runtime.md +++ b/docs/runtime.md @@ -13,7 +13,7 @@ This guide explains how AuthTranslator behaves at runtime and lists the service ## Hot reload -Send `SIGHUP` or run with `-watch` to reload the configuration, allowlist, and denylist files without dropping connections. The watcher re-adds itself when files are replaced so edits trigger a reload automatically. **`-watch` only tracks local file paths** – if you supply `-config-url`, `-allowlist-url`, or `-denylist-url` (including `file://` URIs) the daemon skips file watching, so use `SIGHUP` or another orchestrated reload instead. Remote configuration URLs honour the `-remote-fetch-timeout` flag (default 10 seconds) when fetching over HTTP. +Send `SIGHUP` or run with `-watch` to reload the configuration, allowlist, and denylist files without dropping connections. The watcher monitors each file's containing directory and debounces event bursts, so edits, atomic file replacement, and Kubernetes projected ConfigMap/Secret symlink updates trigger a single reload. In Kubernetes, mount the whole ConfigMap/Secret volume rather than individual keys with `subPath`; subPath-mounted files are not updated by Kubernetes after the pod starts. **`-watch` only tracks local file paths** – if you supply `-config-url`, `-allowlist-url`, or `-denylist-url` (including `file://` URIs) the daemon skips file watching, so use `SIGHUP` or another orchestrated reload instead. Remote configuration URLs honour the `-remote-fetch-timeout` flag (default 10 seconds) when fetching over HTTP. ---