Skip to content

Commit 1bc9934

Browse files
committed
Add the list_lru interator helper and test for the helper
The drgn iterator for list_lru and test that walks filesystem(s) and verifies the memcg and NUMA node id. Provides the functions: list_lru_for_each_list() iterate the list_lru and return each list_lru_one, the NUMA node and memcg number. list_lru_for_each_entry() iterate the list_lru and return each entry of specified type, the NUMA node and memcg number. list_lru_from_memcg_node_for_each_list() iterate the list_lru for the specified NUMA node and memcg id and return each list_lru_one. list_lru_from_memcg_node_for_each_entry() iterate the list_lru for the specified NUMA node and memcg id and return each entry of specified type. Helpers: slab_object_to_memcgidx() return the memcg index for the specified list_lru slab object. slab_object_to_nodeid() return the NUMA node id for the specified list_lru object. The test defaults to the quick verification of the information from list_lru_for_each() but adding "verify", the test walks the memcg/NUMA node portion of the list_lru to verify the entry exists. The test raises an exception if the memcg or nodeid lookup does not match the reported value. The optional arguement, "maxitems", limits the number of checked items in a filsystem. Signed-off-by: Mark Tinguely <[email protected]>
1 parent 4830729 commit 1bc9934

File tree

3 files changed

+458
-0
lines changed

3 files changed

+458
-0
lines changed

doc/api.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,14 @@ drgn\_tools.itertools module
129129
:undoc-members:
130130
:show-inheritance:
131131

132+
drgn\_tools.list_lru module
133+
-----------------------
134+
135+
.. automodule:: drgn_tools.list_lru
136+
:members:
137+
:undoc-members:
138+
:show-inheritance:
139+
132140
drgn\_tools.lock module
133141
-----------------------
134142

drgn_tools/list_lru.py

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
# Copyright (c) 2025, Oracle and/or its affiliates.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
3+
"""
4+
LRU Lists
5+
------------
6+
7+
Helper to work with LRU lists. LRU can be created to be memcg aware and
8+
ordered by NUMA node.
9+
10+
The routines iterate through the specified LRU and on NUMA machines, the
11+
output keeps the entries ordered by NUMA node.
12+
13+
The list_lru_for_each_list() function iterates all of the list_lru_one
14+
list. The list_lru_for_each_entry() function iterates through all the
15+
specified entries on a list_lru and returns the NUMA nodeid, memcg
16+
and Object of the specified type.
17+
18+
The list_lru_from_memcg_node_for_each_list() and
19+
list_lru_from_memcg_node_for_each_entry() functions allows the user to
20+
restrict the iteration of the list_lru_one and entries by the memcg
21+
index when the list_lru is memcg_aware and the NUMA node identifier.
22+
23+
slab_object_to_memcgidx() is a helper to find the mem_cgroup index
24+
from a list_lru entry. This routine is only interested in slab
25+
allocated entries and does not check nor handle the MEMCG_DATA_KMEM case.
26+
slab_object_to_nodeid() is a helper to find the NUMA node id from a
27+
list_lru entry.
28+
"""
29+
from typing import Iterator
30+
from typing import Tuple
31+
from typing import Union
32+
33+
from drgn import cast
34+
from drgn import IntegerLike
35+
from drgn import NULL
36+
from drgn import Object
37+
from drgn import Type
38+
from drgn.helpers.linux.list import list_for_each_entry
39+
from drgn.helpers.linux.mm import compound_head
40+
from drgn.helpers.linux.mm import page_to_pfn
41+
from drgn.helpers.linux.mm import page_to_virt
42+
from drgn.helpers.linux.mm import PageSlab
43+
from drgn.helpers.linux.mm import virt_to_page
44+
from drgn.helpers.linux.nodemask import for_each_online_node
45+
from drgn.helpers.linux.nodemask import node_state
46+
from drgn.helpers.linux.slab import slab_object_info
47+
from drgn.helpers.linux.xarray import xa_for_each
48+
from drgn.helpers.linux.xarray import xa_load
49+
50+
from drgn_tools.meminfo import get_active_numa_nodes
51+
from drgn_tools.util import has_member
52+
53+
__all__ = (
54+
"list_lru_for_each_list",
55+
"list_lru_for_each_entry",
56+
"list_lru_from_memcg_node_for_each_list",
57+
"list_lru_from_memcg_node_for_each_entry",
58+
"slab_object_to_memcgidx",
59+
"slab_object_to_nodeid",
60+
)
61+
62+
63+
def list_lru_for_each_list(lru: Object) -> Iterator[Tuple[int, int, Object]]:
64+
"""
65+
Iterate over a list_lru and return each NUMA nodeid, memcgid and
66+
list_lru_one object.
67+
68+
:param lru: ``struct list_lru *``
69+
:return: Iterator of the Tuple (node_id, memcg_idx, ``list_lru_one *``)
70+
"""
71+
prog = lru.prog_
72+
memcg_aware = 0
73+
# v5.2-rc2-303-g3e8589963773 (memcg: make it work on sparse non-0-node
74+
# systems) adds memcg_aware boolean
75+
if has_member(lru, "memcg_aware") and lru.memcg_aware:
76+
memcg_aware = 1
77+
78+
# Before v5.15.0-9.96.3-944-gd337fa4c0eb2 (Oracle) and
79+
# v5.17-47-g6a6b7b77cc0f (community) (mm: list_lru: transpose the array
80+
# of per-node per-memcg lru lists), the list_lru_memcg entry was in the
81+
# list_lru_node that was in the list_lru.
82+
if has_member(lru, "node"):
83+
if has_member(lru.node, "memcg_lrus") and lru.node[0].memcg_lrus:
84+
memcg_aware = 1
85+
86+
if memcg_aware:
87+
if has_member(lru, "ext") or has_member(lru, "xa"):
88+
if has_member(lru, "ext"):
89+
# (uek7) Oracle port UEK_KABI_REPLACE of node to ext
90+
# v5.15.0-9.96.3-944-gd337fa4c0eb2 of commity patch
91+
# v5.17-47-g6a6b7b77cc0f
92+
xa = lru.ext.xa
93+
else:
94+
# uek 8 v5.17-57-gbbca91cca9a9 replace array with xarray
95+
# doesn't have uek7 KABI changes.
96+
xa = lru.xa
97+
# Keep the entries grouped by the NUMA node.
98+
for nid in for_each_online_node(prog):
99+
for memcgid, memcg in xa_for_each(xa.address_of_()):
100+
# convert from the void ptr
101+
memcg = Object(prog, "struct list_lru_memcg *", memcg)
102+
yield (nid, memcgid, memcg.node[nid])
103+
else:
104+
for nid in for_each_online_node(prog):
105+
# Keep the entries grouped by the NUMA node.
106+
for i in range(prog["memcg_nr_cache_ids"]):
107+
yield (nid, i, lru.node[nid].memcg_lrus.lru[i])
108+
else:
109+
# not lru.memcg_aware
110+
for nid in for_each_online_node(prog):
111+
if has_member(lru, "ext"):
112+
yield (nid, 0, lru.ext.node[nid].lru)
113+
else:
114+
yield (nid, 0, lru.node[nid].lru)
115+
116+
117+
def list_lru_for_each_entry(
118+
type: Union[str, Type], lru: Object, member: str
119+
) -> Iterator[Tuple[int, int, Object]]:
120+
"""
121+
Iterate over all of the entries in a list_lru.
122+
This function calls list_lru_for_each_list() and then iterates over
123+
each list_lru_one.
124+
125+
:param type: Entry type.
126+
:param lru: ``struct list_lru *``
127+
:param member: Name of list node member in entry type.
128+
:return: Iterator of ``type *`` objects.
129+
"""
130+
for nid, memcgid, llru1 in list_lru_for_each_list(lru):
131+
for entry in list_for_each_entry(
132+
type, llru1.list.address_of_(), member
133+
):
134+
yield (nid, memcgid, entry)
135+
136+
137+
def list_lru_from_memcg_node_for_each_list(
138+
mindx: IntegerLike,
139+
nid: IntegerLike,
140+
lru: Object,
141+
) -> Object:
142+
"""
143+
Iterate over each list_lru_one entries for the provided memcg and NUMA node.
144+
145+
:param mindx: memcg index.
146+
:param nid: NUMA node ID.
147+
:param lru: ``struct list_lru *``
148+
:return: Iterator of ``struct list_lru_one`` objects.
149+
"""
150+
prog = lru.prog_
151+
if node_state(nid, prog["N_ONLINE"]):
152+
memcg_aware = 0
153+
# v5.2-rc2-303-g3e8589963773 (memcg: make it work on sparse non-0-node
154+
# systems) adds memcg_aware boolean
155+
if has_member(lru, "memcg_aware") and lru.memcg_aware:
156+
memcg_aware = 1
157+
# Before v5.15.0-9.96.3-944-gd337fa4c0eb2 (Oracle) and
158+
# v5.17-47-g6a6b7b77cc0f (community) (mm: list_lru: transpose the array
159+
# of per-node per-memcg lru lists), the list_lru_memcg entry was in the
160+
# list_lru_node that was in the list_lru.
161+
if has_member(lru, "node"):
162+
if has_member(lru.node, "memcg_lrus") and lru.node[0].memcg_lrus:
163+
memcg_aware = 1
164+
if memcg_aware:
165+
if has_member(lru, "ext") or has_member(lru, "xa"):
166+
if has_member(lru, "ext"):
167+
# (uek7) Oracle port UEK_KABI_REPLACE of node to ext
168+
# v5.15.0-9.96.3-944-gd337fa4c0eb2 of commity patch
169+
# v5.17-47-g6a6b7b77cc0f
170+
xa = lru.ext.xa
171+
else:
172+
# uek 8 v5.17-57-gbbca91cca9a9 replace array with xarray
173+
# doesn't have uek7 KABI changes.
174+
xa = lru.xa
175+
# Keep the entries grouped by the NUMA node.
176+
memcg = xa_load(xa.address_of_(), mindx)
177+
# convert from the void ptr unless it is a NULL
178+
if memcg != NULL(prog, "void *"):
179+
memcg = Object(prog, "struct list_lru_memcg *", memcg)
180+
yield memcg.node[nid]
181+
else:
182+
# make sure the memcg index is within the legal limits
183+
if mindx >= 0 and mindx < prog["memcg_nr_cache_ids"]:
184+
yield lru.node[nid].memcg_lrus.lru[mindx]
185+
else:
186+
# not lru.memcg_aware
187+
if has_member(lru, "ext"):
188+
yield lru.ext.node[nid].lru
189+
else:
190+
yield lru.node[nid].lru
191+
192+
193+
def list_lru_from_memcg_node_for_each_entry(
194+
mindx: IntegerLike,
195+
nid: IntegerLike,
196+
type: Union[str, Type],
197+
lru: Object,
198+
member: str,
199+
) -> Iterator[Object]:
200+
"""
201+
Iterate over the entries in a list_lru by the provided memcg and NUMA node.
202+
This function calls list_lru_from_memcg_node_for_each_list() and
203+
then iterates over each list_lru_one.
204+
205+
:param mindx: memcg index.
206+
:param nid: NUMA node ID.
207+
:param type: Entry type.
208+
:param lru: ``struct list_lru *``
209+
:param member: Name of list node member in entry type.
210+
:return: Iterator of ``type *`` objects.
211+
"""
212+
for llru1 in list_lru_from_memcg_node_for_each_list(mindx, nid, lru):
213+
yield from list_for_each_entry(type, llru1.list.address_of_(), member)
214+
215+
216+
def slab_object_to_memcgidx(obj: Object) -> IntegerLike:
217+
"""
218+
Return the memcg index of the list_lru object.
219+
Return -1 if the list_lru is not memcg enabled. Raise an error if the
220+
value could not be found. Memory cgroups for slab allocation are per
221+
object. This code expects a slab allocated object and the MEMCG_DATA_KMEM
222+
case is NOT covered in this routine.
223+
"""
224+
prog = obj.prog_
225+
info = slab_object_info(obj)
226+
if not info:
227+
raise ValueError("not a slab object")
228+
229+
if hasattr(info.slab_cache, "memcg_params"):
230+
# Prior to v5.9, there were separate slab caches per memcg, so the memcg
231+
# could be determined from the slab cache itself.
232+
# uek6 added commit v5.4.17-2050-33-g3aac91dc16a4 (community commit
233+
# 10befea91b61c ("mm: memcg/slab: use a single set of kmem_caches
234+
# for all allocations") and retained a unused memcg_params.
235+
params = info.slab_cache.memcg_params
236+
if params.memcg:
237+
return params.memcg.kmemcg_id.value_()
238+
239+
slab_object_index = (
240+
obj.value_()
241+
- page_to_virt(info.slab).value_()
242+
) // info.slab_cache.size.value_()
243+
244+
if hasattr(info.slab, "obj_cgroups"):
245+
# Starting with v5.9 in commit 10befea91b61c ("mm: memcg/slab: use a
246+
# single set of kmem_caches for all allocations"), until v5.11, object
247+
# cgroup information was stored in a "obj_cgroups" array, which was
248+
# shared in a union as "mem_cgroup". The lowest bit is set to indicate
249+
# that it is an array of object cgroup information.
250+
obj_cgroups = info.slab.obj_cgroups
251+
if not obj_cgroups.value_() & 1:
252+
return -1
253+
memcg_data = Object(prog, obj_cgroups.type_, obj_cgroups.value_() - 1)
254+
memcg = memcg_data[slab_object_index].memcg
255+
elif hasattr(info.slab, "memcg_data"):
256+
# Starting with v5.11 commit 87944e2992bd2 ("mm: Introduce page memcg
257+
# flags"), until v6.10 , the "mem_cgroup" and "obj_cgroups" are placed
258+
# into the unsigned long field "memcg_data", with constant flags to
259+
# formalize the access to them.
260+
flag = prog.constant("MEMCG_DATA_OBJCGS")
261+
mask = cast("unsigned long", prog.constant("__NR_MEMCG_DATA_FLAGS") - 1)
262+
if not info.slab.memcg_data & flag:
263+
return -1
264+
memcg_data = cast("struct obj_cgroup **", info.slab.memcg_data & ~mask)
265+
memcg = memcg_data[slab_object_index].memcg
266+
elif hasattr(info.slab, "obj_exts"):
267+
# Since v6.10 commit 21c690a349baa ("mm: introduce slabobj_ext to
268+
# support slab object extensions"), struct slab now supports more type
269+
# of object metadata, in addition to memcg info. There are new constants
270+
# to check and a new type, slabobj_ext, to use for accessing the
271+
# metadata.
272+
flag = prog.constant("MEMCG_DATA_OBJEXTS")
273+
mask = cast("unsigned long", prog.constant("__NR_OBJEXTS_FLAGS") - 1)
274+
if not info.slab.obj_exts & flag:
275+
return -1
276+
exts = cast("struct slabobj_ext *", info.slab.obj_exts & ~mask)
277+
memcg = exts[slab_object_index].objcg.memcg
278+
else:
279+
raise RuntimeError(
280+
"Cannot find object memcg info for this kernel version"
281+
)
282+
283+
if memcg:
284+
return memcg.kmemcg_id.value_()
285+
else:
286+
return -1
287+
288+
289+
def slab_object_to_nodeid(obj: Object) -> IntegerLike:
290+
"""
291+
Return the NUMA node id of the list_lru entry.
292+
293+
:param obj: address of a list_lru entry
294+
:return: NUMA node id
295+
"""
296+
prog = obj.prog_
297+
page = virt_to_page(prog, obj)
298+
cpage = compound_head(page)
299+
#
300+
pfn = page_to_pfn(cpage)
301+
nodes = get_active_numa_nodes(prog)
302+
for i in range(1, len(nodes)):
303+
if nodes[i - 1].node_start_pfn <= pfn < nodes[i].node_start_pfn:
304+
return nodes[i - 1].node_id
305+
return nodes[-1].node_id

0 commit comments

Comments
 (0)