From 8f09cce19663bdd68bc8652f80f602a9fbd404ce Mon Sep 17 00:00:00 2001
From: Alex Demidoff
Date: Thu, 27 Nov 2025 17:49:13 +0300
Subject: [PATCH 1/8] PMM-14442 Fix the data race
---
managed/services/ha/services.go | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/managed/services/ha/services.go b/managed/services/ha/services.go
index d4c35e54bf8..157c04e3032 100644
--- a/managed/services/ha/services.go
+++ b/managed/services/ha/services.go
@@ -68,14 +68,14 @@ func (s *services) StartAllServices(ctx context.Context) {
if _, ok := s.running[id]; !ok {
s.wg.Add(1)
s.running[id] = service
- go func() {
- s.l.Infoln("Starting", service.ID())
- err := service.Start(ctx)
+ go func(svc LeaderService, svcID string) {
+ s.l.Infoln("Starting", svcID)
+ err := svc.Start(ctx)
if err != nil {
s.l.Errorln(err)
- s.removeService(service.ID())
+ s.removeService(svcID)
}
- }()
+ }(service, id)
}
}
}
From fb0d8c81a6ef33f2edef6ef187dd4ec41bc3bc97 Mon Sep 17 00:00:00 2001
From: Alex Demidoff
Date: Fri, 28 Nov 2025 17:44:17 +0300
Subject: [PATCH 2/8] PMM-14442 Improve waits and locks
---
managed/services/ha/highavailability.go | 18 ++++------
managed/services/ha/services.go | 45 ++++++++++++++++---------
2 files changed, 35 insertions(+), 28 deletions(-)
diff --git a/managed/services/ha/highavailability.go b/managed/services/ha/highavailability.go
index 82f27babfe3..93b5a17cc14 100644
--- a/managed/services/ha/highavailability.go
+++ b/managed/services/ha/highavailability.go
@@ -93,9 +93,7 @@ func New(params *models.HAParams) *Service {
// Run runs the high availability service.
func (s *Service) Run(ctx context.Context) error {
- s.wg.Add(1)
- go func() {
- defer s.wg.Done()
+ s.wg.Go(func() {
for {
select {
case <-s.services.Refresh():
@@ -107,7 +105,7 @@ func (s *Service) Run(ctx context.Context) error {
return
}
}
- }()
+ })
if !s.params.Enabled {
s.l.Infoln("High availability is disabled")
@@ -197,17 +195,13 @@ func (s *Service) Run(ctx context.Context) error {
return fmt.Errorf("failed to join memberlist cluster: %w", err)
}
}
- s.wg.Add(1)
- go func() {
- defer s.wg.Done()
+ s.wg.Go(func() {
s.runLeaderObserver(ctx)
- }()
+ })
- s.wg.Add(1)
- go func() {
- defer s.wg.Done()
+ s.wg.Go(func() {
s.runRaftNodesSynchronizer(ctx)
- }()
+ })
<-ctx.Done()
diff --git a/managed/services/ha/services.go b/managed/services/ha/services.go
index 157c04e3032..6e886ab3b01 100644
--- a/managed/services/ha/services.go
+++ b/managed/services/ha/services.go
@@ -50,7 +50,7 @@ func (s *services) Add(service LeaderService) error {
id := service.ID()
if _, ok := s.all[id]; ok {
- return fmt.Errorf("service with id %s is already exist", id)
+ return fmt.Errorf("service with id %s already exists", id)
}
s.all[id] = service
select {
@@ -61,33 +61,46 @@ func (s *services) Add(service LeaderService) error {
}
func (s *services) StartAllServices(ctx context.Context) {
- s.rw.Lock()
- defer s.rw.Unlock()
+ type startItem struct {
+ svc LeaderService
+ id string
+ }
+ var toStart []startItem
+ s.rw.Lock()
for id, service := range s.all {
if _, ok := s.running[id]; !ok {
- s.wg.Add(1)
s.running[id] = service
- go func(svc LeaderService, svcID string) {
- s.l.Infoln("Starting", svcID)
- err := svc.Start(ctx)
- if err != nil {
- s.l.Errorln(err)
- s.removeService(svcID)
- }
- }(service, id)
+ toStart = append(toStart, startItem{svc: service, id: id})
}
}
+ s.rw.Unlock()
+
+ for _, service := range toStart {
+ s.wg.Add(1)
+ go func(svc LeaderService, svcID string) {
+ s.l.Infoln("Starting", svcID)
+ err := svc.Start(ctx)
+ if err != nil {
+ s.l.Errorln(err)
+ s.removeService(svcID)
+ }
+ }(service.svc, service.id)
+ }
}
func (s *services) StopRunningServices() {
s.rw.Lock()
- defer s.rw.Unlock()
-
+ toStop := make([]LeaderService, 0, len(s.running))
for id, service := range s.running {
+ toStop = append(toStop, service)
+ delete(s.running, id)
+ }
+ s.rw.Unlock()
+
+ for _, service := range toStop {
s.l.Infoln("Stopping", service.ID())
service.Stop()
- delete(s.running, id)
s.wg.Done()
}
}
@@ -102,7 +115,7 @@ func (s *services) Wait() {
func (s *services) removeService(id string) {
s.rw.Lock()
- defer s.rw.Unlock()
delete(s.running, id)
+ s.rw.Unlock()
s.wg.Done()
}
From 2998a6ed794165d3b28c6d97a2d0faaf8e395258 Mon Sep 17 00:00:00 2001
From: Alex Demidoff
Date: Sat, 29 Nov 2025 00:20:31 +0300
Subject: [PATCH 3/8] PMM-14442 Change the wait order
---
managed/services/ha/highavailability.go | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/managed/services/ha/highavailability.go b/managed/services/ha/highavailability.go
index 93b5a17cc14..32ac3cd084d 100644
--- a/managed/services/ha/highavailability.go
+++ b/managed/services/ha/highavailability.go
@@ -109,8 +109,8 @@ func (s *Service) Run(ctx context.Context) error {
if !s.params.Enabled {
s.l.Infoln("High availability is disabled")
- s.services.Wait()
s.wg.Wait()
+ s.services.Wait()
return nil
}
@@ -205,8 +205,8 @@ func (s *Service) Run(ctx context.Context) error {
<-ctx.Done()
- s.services.Wait()
s.wg.Wait()
+ s.services.Wait()
return nil
}
From 6bcf2f304600db4082e51d858ee7bc38d447c433 Mon Sep 17 00:00:00 2001
From: Alex Demidoff
Date: Sat, 29 Nov 2025 01:22:32 +0300
Subject: [PATCH 4/8] PMM-14442 Fix VM -dryRun parameter syntax
---
managed/services/ha/services.go | 2 +-
managed/services/victoriametrics/victoriametrics.go | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/managed/services/ha/services.go b/managed/services/ha/services.go
index 6e886ab3b01..f33efd787c9 100644
--- a/managed/services/ha/services.go
+++ b/managed/services/ha/services.go
@@ -65,9 +65,9 @@ func (s *services) StartAllServices(ctx context.Context) {
svc LeaderService
id string
}
- var toStart []startItem
s.rw.Lock()
+ toStart := make([]startItem, 0, len(s.all))
for id, service := range s.all {
if _, ok := s.running[id]; !ok {
s.running[id] = service
diff --git a/managed/services/victoriametrics/victoriametrics.go b/managed/services/victoriametrics/victoriametrics.go
index f9a3ca845a7..72018be0f19 100644
--- a/managed/services/victoriametrics/victoriametrics.go
+++ b/managed/services/victoriametrics/victoriametrics.go
@@ -242,7 +242,7 @@ func (svc *Service) validateConfig(ctx context.Context, cfg []byte) error {
_ = os.Remove(f.Name())
}()
- args := []string{"-dryRun", "-promscrape.config", f.Name()}
+ args := []string{"-promscrape.config.dryRun=true", "-promscrape.config", f.Name()}
cmd := exec.CommandContext(ctx, "victoriametrics", args...) //nolint:gosec
pdeathsig.Set(cmd, unix.SIGKILL)
From 8847b57f53630c7e7ed0b7c1c61c67ef61e76007 Mon Sep 17 00:00:00 2001
From: Alex Demidoff
Date: Sat, 29 Nov 2025 01:39:19 +0300
Subject: [PATCH 5/8] PMM-14442 Minor compose fixes
---
agent/docker-compose.yml | 2 --
api-tests/docker-compose.yml | 4 ++--
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/agent/docker-compose.yml b/agent/docker-compose.yml
index e56d3381e0a..9bd1a72c184 100644
--- a/agent/docker-compose.yml
+++ b/agent/docker-compose.yml
@@ -1,6 +1,4 @@
---
-version: '3.7'
-
services:
pmm-server:
image: ${PMM_SERVER_IMAGE:-perconalab/pmm-server:3-dev-latest}
diff --git a/api-tests/docker-compose.yml b/api-tests/docker-compose.yml
index 55af34e3a45..05286e186d9 100644
--- a/api-tests/docker-compose.yml
+++ b/api-tests/docker-compose.yml
@@ -82,7 +82,7 @@ services:
mysql:
condition: service_healthy
environment:
- PMM_AGENT_SERVER_ADDRESS: pmm-server
+ PMM_AGENT_SERVER_ADDRESS: pmm-server:8443
PMM_AGENT_SERVER_USERNAME: admin
PMM_AGENT_SERVER_PASSWORD: admin
PMM_AGENT_SERVER_INSECURE_TLS: 1
@@ -90,7 +90,7 @@ services:
PMM_AGENT_SETUP: 1
PMM_AGENT_SETUP_FORCE: 1
PMM_AGENT_SIDECAR: 1
-# PMM_AGENT_PRERUN_SCRIPT: "pmm-admin status --wait=10s; pmm-admin add mysql --username=pmm-agent --password=pmm-agent-password --host=pmm-agent_mysql --port=3306"
+ # PMM_AGENT_PRERUN_SCRIPT: "pmm-admin status --wait=10s; pmm-admin add mysql --username=pmm-agent --password=pmm-agent-password --host=pmm-agent_mysql --port=3306"
sysbench:
image: perconalab/sysbench
From a28c448fe812a2ae311744cdd35d49af13fda491 Mon Sep 17 00:00:00 2001
From: Alex Demidoff
Date: Sat, 29 Nov 2025 22:18:30 +0300
Subject: [PATCH 6/8] PMM-14442 Use modern syntax
---
managed/cmd/pmm-managed/main.go | 48 +++++++++++----------------------
1 file changed, 16 insertions(+), 32 deletions(-)
diff --git a/managed/cmd/pmm-managed/main.go b/managed/cmd/pmm-managed/main.go
index 714ed0d0a8a..29dce9c1b98 100644
--- a/managed/cmd/pmm-managed/main.go
+++ b/managed/cmd/pmm-managed/main.go
@@ -1082,40 +1082,30 @@ func main() { //nolint:maintidx,cyclop
l.Info("Starting services...")
var wg sync.WaitGroup
- wg.Add(1)
- go func() {
- defer wg.Done()
+ wg.Go(func() {
authServer.Run(ctx)
- }()
+ })
- wg.Add(1)
- go func() {
- defer wg.Done()
+ wg.Go(func() {
vmalert.Run(ctx)
- }()
+ })
- wg.Add(1)
- go func() {
- defer wg.Done()
+ wg.Go(func() {
vmdb.Run(ctx)
- }()
+ })
haService.AddLeaderService(ha.NewContextService("checks", func(ctx context.Context) error {
checksService.Run(ctx)
return nil
}))
- wg.Add(1)
- go func() {
- defer wg.Done()
+ wg.Go(func() {
supervisord.Run(ctx)
- }()
+ })
- wg.Add(1)
- go func() {
- defer wg.Done()
+ wg.Go(func() {
updater.Run(ctx)
- }()
+ })
wg.Add(1)
haService.AddLeaderService(ha.NewContextService("telemetry", func(ctx context.Context) error {
@@ -1134,9 +1124,7 @@ func main() { //nolint:maintidx,cyclop
return nil
}))
- wg.Add(1)
- go func() {
- defer wg.Done()
+ wg.Go(func() {
runGRPCServer(ctx,
&gRPCServerDeps{
actions: actionsService,
@@ -1170,22 +1158,18 @@ func main() { //nolint:maintidx,cyclop
vmClient: &vmClient,
vmdb: vmdb,
})
- }()
+ })
- wg.Add(1)
- go func() {
- defer wg.Done()
+ wg.Go(func() {
runHTTP1Server(ctx, &http1ServerDeps{
logs: logs,
authServer: authServer,
})
- }()
+ })
- wg.Add(1)
- go func() {
- defer wg.Done()
+ wg.Go(func() {
runDebugServer(ctx)
- }()
+ })
haService.AddLeaderService(ha.NewContextService("cleaner", func(ctx context.Context) error {
cleaner.Run(ctx, cleanInterval, cleanOlderThan)
From c1f2e456a5af8b27ad00d9bc208986db9e3a17bb Mon Sep 17 00:00:00 2001
From: Alex Demidoff
Date: Sun, 30 Nov 2025 12:52:16 +0300
Subject: [PATCH 7/8] PMM-14442 Use proper version of Go
---
.github/workflows/clean.yml | 26 +++++++++-----------------
1 file changed, 9 insertions(+), 17 deletions(-)
diff --git a/.github/workflows/clean.yml b/.github/workflows/clean.yml
index 0df379c2788..f6668a69baa 100644
--- a/.github/workflows/clean.yml
+++ b/.github/workflows/clean.yml
@@ -12,14 +12,7 @@ jobs:
name: Clean caches
timeout-minutes: 5
- strategy:
- fail-fast: false
- matrix:
- go:
- - version: 1.24.x
- may-fail: false
-
- continue-on-error: ${{ matrix.go.may-fail }}
+ continue-on-error: false
runs-on: ubuntu-22.04
env:
@@ -31,15 +24,15 @@ jobs:
ROBOT_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: echo "machine github.com login percona-robot password ${{ secrets.ROBOT_TOKEN }}" > $HOME/.netrc
- - name: Set up Go release
+ - name: Set up Go
env:
# to avoid error due to `go version` accepting -v flag with an argument since 1.15
GOFLAGS: ""
uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6.1.0
with:
- go-version: ${{ matrix.go.version }}
+ go-version-file: ${{ github.workspace }}/go.mod
- - name: Check out code into the Go module directory
+ - name: Check out code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with:
lfs: true
@@ -48,18 +41,17 @@ jobs:
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: ~/go/pkg/mod
- key: ${{ matrix.go.version }}-modules-${{ hashFiles('**/go.sum') }}
- restore-keys: |
- ${{ matrix.go.version }}-modules-
+ key: ${{ runner.os }}-go-modules-${{ hashFiles('**/go.sum') }}
+ restore-keys: ${{ runner.os }}-go-modules-
- name: Enable Go build cache
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: ~/.cache/go-build
- key: ${{ matrix.go.version }}-build-${{ github.ref }}-${{ hashFiles('**') }}
+ key: ${{ runner.os }}-go-build-${{ github.ref }}-${{ hashFiles('**') }}
restore-keys: |
- ${{ matrix.go.version }}-build-${{ github.ref }}-
- ${{ matrix.go.version }}-build-
+ ${{ runner.os }}-go-build-${{ github.ref }}-
+ ${{ runner.os }}-go-build-
- name: Clean Go modules cache
run: go clean -modcache
From d1f3b6fdecb51f95bdc67a65800b81a908b022b9 Mon Sep 17 00:00:00 2001
From: Alex Demidoff
Date: Sun, 30 Nov 2025 13:20:58 +0300
Subject: [PATCH 8/8] PMM-14442 Update batch and update time intervals
---
managed/services/victoriametrics/victoriametrics.go | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/managed/services/victoriametrics/victoriametrics.go b/managed/services/victoriametrics/victoriametrics.go
index 72018be0f19..6128ad5a5d6 100644
--- a/managed/services/victoriametrics/victoriametrics.go
+++ b/managed/services/victoriametrics/victoriametrics.go
@@ -43,8 +43,8 @@ import (
)
const (
- updateBatchDelay = time.Second
- configurationUpdateTimeout = 3 * time.Second
+ updateBatchDelay = 3 * time.Second
+ configurationUpdateTimeout = 5 * time.Second
victoriametricsDir = "/srv/victoriametrics"
victoriametricsDataDir = "/srv/victoriametrics/data"