Skip to content

Commit 8565d01

Browse files
committed
enhance: Add logs for check health failed
Signed-off-by: Wei Liu <[email protected]>
1 parent 3cd7403 commit 8565d01

File tree

4 files changed

+18
-9
lines changed

4 files changed

+18
-9
lines changed

internal/datacoord/server.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ func (s *Server) Init() error {
318318
log.Info("DataCoord startup success")
319319
return nil
320320
}
321-
s.stateCode.Store(commonpb.StateCode_StandBy)
321+
s.UpdateStateCode(commonpb.StateCode_StandBy)
322322
log.Info("DataCoord enter standby mode successfully")
323323
return nil
324324
}
@@ -328,7 +328,7 @@ func (s *Server) Init() error {
328328

329329
func (s *Server) initDataCoord() error {
330330
log := log.Ctx(s.ctx)
331-
s.stateCode.Store(commonpb.StateCode_Initializing)
331+
s.UpdateStateCode(commonpb.StateCode_Initializing)
332332
var err error
333333
if err = s.initRootCoordClient(); err != nil {
334334
return err
@@ -463,7 +463,7 @@ func (s *Server) startDataCoord() {
463463
// })
464464

465465
s.afterStart()
466-
s.stateCode.Store(commonpb.StateCode_Healthy)
466+
s.UpdateStateCode(commonpb.StateCode_Healthy)
467467
sessionutil.SaveServerInfo(typeutil.DataCoordRole, s.session.GetServerID())
468468
}
469469

internal/datacoord/services.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,12 @@ func (s *Server) GetStateCode() commonpb.StateCode {
688688
return code.(commonpb.StateCode)
689689
}
690690

691+
// UpdateStateCode update state code
692+
func (s *Server) UpdateStateCode(code commonpb.StateCode) {
693+
s.stateCode.Store(code)
694+
log.Ctx(s.ctx).Info("update datacoord state", zap.String("state", code.String()))
695+
}
696+
691697
// GetComponentStates returns DataCoord's current state
692698
func (s *Server) GetComponentStates(ctx context.Context, req *milvuspb.GetComponentStatesRequest) (*milvuspb.ComponentStates, error) {
693699
code := s.GetStateCode()

internal/http/healthz/healthz_handler.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,28 +90,30 @@ func (handler *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
9090
resp := &HealthResponse{
9191
State: "OK",
9292
}
93+
94+
unhealthyComponent := make([]string, 0)
9395
ctx := context.Background()
94-
healthNum := 0
9596
for _, in := range handler.indicators {
9697
handler.unregisterLock.RLock()
9798
_, unregistered := handler.unregisteredRoles[in.GetName()]
9899
handler.unregisterLock.RUnlock()
99100
if unregistered {
100-
healthNum++
101101
continue
102102
}
103103
code := in.Health(ctx)
104104
resp.Detail = append(resp.Detail, &IndicatorState{
105105
Name: in.GetName(),
106106
Code: code,
107107
})
108-
if code == commonpb.StateCode_Healthy || code == commonpb.StateCode_StandBy {
109-
healthNum++
108+
109+
if code != commonpb.StateCode_Healthy && code != commonpb.StateCode_StandBy {
110+
unhealthyComponent = append(unhealthyComponent, in.GetName())
110111
}
111112
}
112113

113-
if healthNum != handler.indicatorNum {
114-
resp.State = fmt.Sprintf("Not all components are healthy, %d/%d", healthNum, handler.indicatorNum)
114+
if len(unhealthyComponent) > 0 {
115+
resp.State = fmt.Sprintf("Not all components are healthy, %d/%d", handler.indicatorNum-len(unhealthyComponent), handler.indicatorNum)
116+
log.Info("check health failed", zap.Strings("UnhealthyComponent", unhealthyComponent))
115117
}
116118

117119
if resp.State == "OK" {

internal/querycoordv2/server.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,7 @@ func (s *Server) Stop() error {
671671
// UpdateStateCode updates the status of the coord, including healthy, unhealthy
672672
func (s *Server) UpdateStateCode(code commonpb.StateCode) {
673673
s.status.Store(int32(code))
674+
log.Ctx(s.ctx).Info("update querycoord state", zap.String("state", code.String()))
674675
}
675676

676677
func (s *Server) State() commonpb.StateCode {

0 commit comments

Comments
 (0)