From b9a7aed91ba09fa0398fecd8a79ee996e98f41b1 Mon Sep 17 00:00:00 2001 From: Benjamin Wang Date: Thu, 12 Dec 2024 21:45:28 +0000 Subject: [PATCH 1/3] Do not wait for ready notify if the server is stopping Signed-off-by: Benjamin Wang --- server/embed/serve.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/server/embed/serve.go b/server/embed/serve.go index 91ec6e9a376..8af26a8227c 100644 --- a/server/embed/serve.go +++ b/server/embed/serve.go @@ -16,6 +16,7 @@ package embed import ( "context" + "errors" "fmt" "io/ioutil" defaultLog "log" @@ -100,6 +101,12 @@ func (sctx *serveCtx) serve( logger := defaultLog.New(ioutil.Discard, "etcdhttp", 0) <-s.ReadyNotify() + select { + case <-s.StoppingNotify(): + return errors.New("server is stopping") + case <-s.ReadyNotify(): + } + sctx.lg.Info("ready to serve client requests") m := cmux.New(sctx.l) From 9b241941202bb939a29248bfaaab5f7b91fe8cd3 Mon Sep 17 00:00:00 2001 From: Benjamin Wang Date: Fri, 13 Dec 2024 10:44:14 +0000 Subject: [PATCH 2/3] Remove duplicated <-s.ReadyNotify() Signed-off-by: Benjamin Wang --- server/embed/serve.go | 1 - 1 file changed, 1 deletion(-) diff --git a/server/embed/serve.go b/server/embed/serve.go index 8af26a8227c..99abe515db3 100644 --- a/server/embed/serve.go +++ b/server/embed/serve.go @@ -99,7 +99,6 @@ func (sctx *serveCtx) serve( splitHttp bool, gopts ...grpc.ServerOption) (err error) { logger := defaultLog.New(ioutil.Discard, "etcdhttp", 0) - <-s.ReadyNotify() select { case <-s.StoppingNotify(): From 80b0a73ee2fe74923f87e2b9aaf675b54e752cf5 Mon Sep 17 00:00:00 2001 From: Joshua Zhang Date: Sat, 11 Jan 2025 07:17:00 +0000 Subject: [PATCH 3/3] Avoid deadlock in etcd.Close when stopping during bootstrapping Signed-off-by: Joshua Zhang --- server/embed/etcd.go | 7 +++- server/embed/serve.go | 13 +++++- server/etcdserver/server.go | 1 + tests/integration/embed/embed_test.go | 58 ++++++++++++++++++++++++++- 4 files changed, 75 insertions(+), 4 deletions(-) diff --git a/server/embed/etcd.go b/server/embed/etcd.go index b2c7fee4482..10ed426d6b2 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -86,6 +86,7 @@ type Etcd struct { errc chan error closeOnce sync.Once + wg sync.WaitGroup } type peerListener struct { @@ -111,7 +112,7 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) { if !serving { // errored before starting gRPC server for serveCtx.serversC for _, sctx := range e.sctxs { - close(sctx.serversC) + sctx.close() } } e.Close() @@ -436,6 +437,7 @@ func (e *Etcd) Close() { } } if e.errc != nil { + e.wg.Wait() close(e.errc) } } @@ -880,6 +882,9 @@ func (e *Etcd) serveMetrics() (err error) { } func (e *Etcd) errHandler(err error) { + e.wg.Add(1) + defer e.wg.Done() + select { case <-e.stopc: return diff --git a/server/embed/serve.go b/server/embed/serve.go index 99abe515db3..1a46f51af93 100644 --- a/server/embed/serve.go +++ b/server/embed/serve.go @@ -23,6 +23,7 @@ import ( "net" "net/http" "strings" + "sync" etcdservergw "go.etcd.io/etcd/api/v3/etcdserverpb/gw" "go.etcd.io/etcd/client/pkg/v3/transport" @@ -65,6 +66,7 @@ type serveCtx struct { userHandlers map[string]http.Handler serviceRegister func(*grpc.Server) serversC chan *servers + closeOnce sync.Once } type servers struct { @@ -100,6 +102,9 @@ func (sctx *serveCtx) serve( gopts ...grpc.ServerOption) (err error) { logger := defaultLog.New(ioutil.Discard, "etcdhttp", 0) + // Make sure serversC is closed even if we prematurely exit the function. + defer sctx.close() + select { case <-s.StoppingNotify(): return errors.New("server is stopping") @@ -119,8 +124,6 @@ func (sctx *serveCtx) serve( servElection := v3election.NewElectionServer(v3c) servLock := v3lock.NewLockServer(v3c) - // Make sure serversC is closed even if we prematurely exit the function. - defer close(sctx.serversC) var gwmux *gw.ServeMux if s.Cfg.EnableGRPCGateway { // GRPC gateway connects to grpc server via connection provided by grpc dial. @@ -503,3 +506,9 @@ func (sctx *serveCtx) registerTrace() { evf := func(w http.ResponseWriter, r *http.Request) { trace.RenderEvents(w, r, true) } sctx.registerUserHandler("/debug/events", http.HandlerFunc(evf)) } + +func (sctx *serveCtx) close() { + sctx.closeOnce.Do(func() { + close(sctx.serversC) + }) +} diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index d60c8da64fc..513ef6bd7b8 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -2130,6 +2130,7 @@ func (s *EtcdServer) publish(timeout time.Duration) { Val: string(b), } + // gofail: var beforePublishing struct{} for { ctx, cancel := context.WithTimeout(s.ctx, timeout) _, err := s.Do(ctx, req) diff --git a/tests/integration/embed/embed_test.go b/tests/integration/embed/embed_test.go index 27da5bf473b..e45c7ade229 100644 --- a/tests/integration/embed/embed_test.go +++ b/tests/integration/embed/embed_test.go @@ -30,11 +30,14 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "go.etcd.io/etcd/client/pkg/v3/testutil" "go.etcd.io/etcd/client/pkg/v3/transport" - "go.etcd.io/etcd/client/v3" + clientv3 "go.etcd.io/etcd/client/v3" "go.etcd.io/etcd/server/v3/embed" "go.etcd.io/etcd/tests/v3/integration" + gofail "go.etcd.io/gofail/runtime" ) var ( @@ -210,3 +213,56 @@ func setupEmbedCfg(cfg *embed.Config, curls []url.URL, purls []url.URL) { } cfg.InitialCluster = cfg.InitialCluster[1:] } + +func TestEmbedEtcdStopDuringBootstrapping(t *testing.T) { + if len(gofail.List()) == 0 { + t.Skip("please run 'make gofail-enable' before running the test") + } + + fpName := "beforePublishing" + require.NoError(t, gofail.Enable(fpName, `sleep("2s")`)) + t.Cleanup(func() { + terr := gofail.Disable(fpName) + if terr != nil && terr != gofail.ErrDisabled { + t.Fatalf("failed to disable %s: %v", fpName, terr) + } + }) + + done := make(chan struct{}) + go func() { + defer close(done) + + cfg := embed.NewConfig() + urls := newEmbedURLs(false, 2) + setupEmbedCfg(cfg, []url.URL{urls[0]}, []url.URL{urls[1]}) + cfg.Dir = filepath.Join(t.TempDir(), "embed-etcd") + + e, err := embed.StartEtcd(cfg) + if err != nil { + t.Errorf("Failed to start etcd, got error %v", err) + } + defer e.Close() + + go func() { + time.Sleep(time.Second) + e.Server.Stop() + t.Log("Stopped server during bootstrapping") + }() + + select { + case <-e.Server.ReadyNotify(): + t.Log("Server is ready!") + case <-e.Server.StopNotify(): + t.Log("Server is stopped") + case <-time.After(20 * time.Second): + e.Server.Stop() // trigger a shutdown + t.Error("Server took too long to start!") + } + }() + + select { + case <-done: + case <-time.After(10 * time.Second): + t.Error("timeout in bootstrapping etcd") + } +}