Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions prometheus_keys.lua
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,18 @@ function KeyIndex:list()
self:sync()
local copy = {}
local i = 1
for _, v in pairs(self.keys) do
copy[i] = v
i = i + 1
-- Walk the key slots in order, but only emit a key when the slot is the one
-- the index currently points at (self.index[key] == idx). self.keys can
-- transiently hold the same key value in two different slots (e.g. when an
-- expired metric is re-added at a new slot before the old slot is reclaimed);
-- listing the raw self.keys values would emit duplicate metrics. Consulting
-- the index guarantees each key is listed exactly once, at its canonical slot.
for idx = 0, self.last do
local key = self.keys[idx]
if key and self.index[key] == idx then
copy[i] = key
i = i + 1
end
end
return copy
end
Expand Down Expand Up @@ -125,6 +134,13 @@ function KeyIndex:add(key_or_keys, err_msg_lru_eviction, exptime)
self.index[key] = nil
self.keys[idx] = nil
self.expire_keys[idx] = nil
self.dict:set(self.key_prefix .. idx, nil)
self.deleted = self.deleted + 1
-- Bump delete_count so other workers do a full sync and reclaim the
-- stale slot. Without this the old slot lingers in their local
-- self.keys while the metric is re-added at a new slot, which
-- desynchronizes the index and causes duplicate metric emission.
self.dict:incr(self.delete_count, 1, 0)
expired = true
end
end
Expand Down
38 changes: 38 additions & 0 deletions prometheus_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,44 @@ function TestKeyIndex:testList()
luaunit.assertEquals(keys[3], "key3")
end

-- Regression test for apache/apisix#11934 (duplicate metrics).
-- A key with an exptime is added and synced (self.last now tracks N). The key
-- then expires in the underlying shared dict without going through remove(), so
-- neither key_count nor delete_count changes and the next sync() is a no-op,
-- leaving self.index still pointing at the now-vanished slot. Re-adding the key
-- therefore takes the "expired" path in add() and allocates a new slot while
-- the stale slot lingers in self.keys. Before the fix, delete_count was not
-- bumped (other workers never re-synced) and list() iterated self.keys, so the
-- same key was emitted twice -> duplicate metrics.
function TestKeyIndex:testExpiredReAddNoDuplicate()
self.key_index:add("expkey", "eviction_err", 1)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
self.key_index:sync()
luaunit.assertEquals(self.dict:get("_prefix_key_count"), 1)
luaunit.assertEquals(self.dict:get("_prefix_key_1"), "expkey")
luaunit.assertEquals(self.dict:get("_prefix_delete_count"), nil)
luaunit.assertEquals(self.key_index.index["expkey"], 1)

-- Let the key expire in the underlying shared dict. key_count and
-- delete_count are untouched, so the next sync() inside add() is a no-op and
-- the local index keeps pointing at the (now gone) slot 1.
sleep(2)
luaunit.assertEquals(self.dict:get("_prefix_key_1"), nil)

-- Re-adding the now-expired key takes the expired branch and allocates slot 2.
self.key_index:add("expkey", "eviction_err", 1)
luaunit.assertEquals(ngx.logs, nil)
luaunit.assertEquals(self.dict:get("_prefix_key_2"), "expkey")

-- delete_count must have been bumped on the expired re-add path so that
-- other workers do a full sync and drop the stale slot.
luaunit.assertEquals(self.dict:get("_prefix_delete_count"), 1)

-- list() must report the key exactly once, not twice.
local keys = self.key_index:list()
luaunit.assertEquals(#keys, 1)
luaunit.assertEquals(keys[1], "expkey")
end

function TestKeyIndex:testSync()
self.key_index:sync()
luaunit.assertEquals(ngx.logs, nil)
Expand Down