Skip to content

Commit fc6e45c

Browse files
ti-chi-botlhy1024
andauthored
checker, statistic: avoid leak in label statistic (#8703) (#8735)
close #8700 Signed-off-by: lhy1024 <[email protected]> Co-authored-by: lhy1024 <[email protected]>
1 parent 2d8f681 commit fc6e45c

File tree

5 files changed

+55
-11
lines changed

5 files changed

+55
-11
lines changed

pkg/cluster/cluster.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ func HandleOverlaps(ctx context.Context, c Cluster, overlaps []*core.RegionInfo)
6868
if c.GetRegionStats() != nil {
6969
c.GetRegionStats().ClearDefunctRegion(item.GetID())
7070
}
71-
c.GetLabelStats().ClearDefunctRegion(item.GetID())
71+
c.GetLabelStats().MarkDefunctRegion(item.GetID())
7272
c.GetRuleManager().InvalidCache(item.GetID())
7373
}
7474
}

pkg/mcs/scheduling/server/cluster.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -379,13 +379,12 @@ func (c *Cluster) waitSchedulersInitialized() {
379379
}
380380
}
381381

382-
// TODO: implement the following methods
383-
384382
// UpdateRegionsLabelLevelStats updates the status of the region label level by types.
385383
func (c *Cluster) UpdateRegionsLabelLevelStats(regions []*core.RegionInfo) {
386384
for _, region := range regions {
387385
c.labelStats.Observe(region, c.getStoresWithoutLabelLocked(region, core.EngineKey, core.EngineTiFlash), c.persistConfig.GetLocationLabels())
388386
}
387+
c.labelStats.ClearDefunctRegions()
389388
}
390389

391390
func (c *Cluster) getStoresWithoutLabelLocked(region *core.RegionInfo, key, value string) []*core.StoreInfo {

pkg/statistics/region_collection.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -365,13 +365,15 @@ type LabelStatistics struct {
365365
syncutil.RWMutex
366366
regionLabelStats map[uint64]string
367367
labelCounter map[string]int
368+
defunctRegions map[uint64]struct{}
368369
}
369370

370371
// NewLabelStatistics creates a new LabelStatistics.
371372
func NewLabelStatistics() *LabelStatistics {
372373
return &LabelStatistics{
373374
regionLabelStats: make(map[uint64]string),
374375
labelCounter: make(map[string]int),
376+
defunctRegions: make(map[uint64]struct{}),
375377
}
376378
}
377379

@@ -405,14 +407,26 @@ func ResetLabelStatsMetrics() {
405407
regionLabelLevelGauge.Reset()
406408
}
407409

408-
// ClearDefunctRegion is used to handle the overlap region.
409-
func (l *LabelStatistics) ClearDefunctRegion(regionID uint64) {
410+
// MarkDefunctRegion is used to handle the overlap region.
411+
// It is used to mark the region as defunct and remove it from the label statistics later.
412+
func (l *LabelStatistics) MarkDefunctRegion(regionID uint64) {
410413
l.Lock()
411414
defer l.Unlock()
412-
if label, ok := l.regionLabelStats[regionID]; ok {
413-
l.labelCounter[label]--
414-
delete(l.regionLabelStats, regionID)
415+
l.defunctRegions[regionID] = struct{}{}
416+
}
417+
418+
// ClearDefunctRegions is used to handle the overlap region.
419+
// It is used to remove the defunct regions from the label statistics.
420+
func (l *LabelStatistics) ClearDefunctRegions() {
421+
l.Lock()
422+
defer l.Unlock()
423+
for regionID := range l.defunctRegions {
424+
if label, ok := l.regionLabelStats[regionID]; ok {
425+
l.labelCounter[label]--
426+
delete(l.regionLabelStats, regionID)
427+
}
415428
}
429+
l.defunctRegions = make(map[uint64]struct{})
416430
}
417431

418432
// GetLabelCounter is only used for tests.

server/cluster/cluster_test.go

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,6 +1125,7 @@ func TestRegionLabelIsolationLevel(t *testing.T) {
11251125
opt.SetReplicationConfig(cfg)
11261126
re.NoError(err)
11271127
cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend())
1128+
cluster.coordinator = schedule.NewCoordinator(ctx, cluster, nil)
11281129

11291130
for i := uint64(1); i <= 4; i++ {
11301131
var labels []*metapb.StoreLabel
@@ -1159,13 +1160,42 @@ func TestRegionLabelIsolationLevel(t *testing.T) {
11591160
StartKey: []byte{byte(1)},
11601161
EndKey: []byte{byte(2)},
11611162
}
1162-
r := core.NewRegionInfo(region, peers[0])
1163-
re.NoError(cluster.putRegion(r))
1163+
r1 := core.NewRegionInfo(region, peers[0])
1164+
re.NoError(cluster.putRegion(r1))
11641165

1165-
cluster.UpdateRegionsLabelLevelStats([]*core.RegionInfo{r})
1166+
cluster.UpdateRegionsLabelLevelStats([]*core.RegionInfo{r1})
11661167
counter := cluster.labelStats.GetLabelCounter()
11671168
re.Equal(0, counter["none"])
11681169
re.Equal(1, counter["zone"])
1170+
1171+
region = &metapb.Region{
1172+
Id: 10,
1173+
Peers: peers,
1174+
StartKey: []byte{byte(2)},
1175+
EndKey: []byte{byte(3)},
1176+
}
1177+
r2 := core.NewRegionInfo(region, peers[0])
1178+
re.NoError(cluster.putRegion(r2))
1179+
1180+
cluster.UpdateRegionsLabelLevelStats([]*core.RegionInfo{r2})
1181+
counter = cluster.labelStats.GetLabelCounter()
1182+
re.Equal(0, counter["none"])
1183+
re.Equal(2, counter["zone"])
1184+
1185+
// issue: https://github.com/tikv/pd/issues/8700
1186+
// step1: heartbeat a overlap region, which is used to simulate the case that the region is merged.
1187+
// step2: update region 9 and region 10, which is used to simulate the case that patrol is triggered.
1188+
// We should only count region 9.
1189+
overlapRegion := r1.Clone(
1190+
core.WithStartKey(r1.GetStartKey()),
1191+
core.WithEndKey(r2.GetEndKey()),
1192+
core.WithLeader(r2.GetPeer(8)),
1193+
)
1194+
re.NoError(cluster.HandleRegionHeartbeat(overlapRegion))
1195+
cluster.UpdateRegionsLabelLevelStats([]*core.RegionInfo{r1, r2})
1196+
counter = cluster.labelStats.GetLabelCounter()
1197+
re.Equal(0, counter["none"])
1198+
re.Equal(1, counter["zone"])
11691199
}
11701200

11711201
func heartbeatRegions(re *require.Assertions, cluster *RaftCluster, regions []*core.RegionInfo) {

server/cluster/scheduling_controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ func (sc *schedulingController) UpdateRegionsLabelLevelStats(regions []*core.Reg
236236
for _, region := range regions {
237237
sc.labelStats.Observe(region, sc.getStoresWithoutLabelLocked(region, core.EngineKey, core.EngineTiFlash), sc.opt.GetLocationLabels())
238238
}
239+
sc.labelStats.ClearDefunctRegions()
239240
}
240241

241242
func (sc *schedulingController) getStoresWithoutLabelLocked(region *core.RegionInfo, key, value string) []*core.StoreInfo {

0 commit comments

Comments
 (0)