Skip to content

Commit 11bac0e

Browse files
committed
Add a corelens module for spinlocks
Detect spinlock spinners and owners. Orabug: 37357389 Signed-off-by: Richard Li <[email protected]>
1 parent d6ba9fe commit 11bac0e

File tree

2 files changed

+374
-0
lines changed

2 files changed

+374
-0
lines changed

drgn_tools/spinlock.py

Lines changed: 366 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,366 @@
1+
# Copyright (c) 2025, Oracle and/or its affiliates.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
3+
import argparse
4+
import os
5+
import re
6+
import shutil
7+
import subprocess
8+
from collections import defaultdict
9+
from typing import Dict
10+
from typing import Iterable
11+
from typing import List
12+
from typing import Tuple
13+
14+
from drgn import FaultError
15+
from drgn import Object
16+
from drgn import Program
17+
from drgn.helpers.linux.cpumask import for_each_online_cpu
18+
from drgn.helpers.linux.percpu import per_cpu
19+
from drgn.helpers.linux.sched import cpu_curr
20+
21+
from drgn_tools.bt import frame_name
22+
from drgn_tools.corelens import CorelensModule
23+
from drgn_tools.table import FixedTable
24+
from drgn_tools.task import get_command
25+
from drgn_tools.task import get_current_run_time
26+
from drgn_tools.util import timestamp_str
27+
28+
# must have cscope installed
29+
# must have uek source code repo set here
30+
UEK_CODE_DIR = ""
31+
32+
"""
33+
Find this C symbol:
34+
Find this function definition:
35+
Find functions called by this function:
36+
Find functions calling this function:
37+
Find this text string:
38+
Change this text string:
39+
Find this egrep pattern:
40+
Find this file:
41+
Find files #including this file:
42+
"""
43+
44+
45+
def query_cscope(
46+
nums: List, pattern: str, keyword: str = "", target_dir: str = UEK_CODE_DIR
47+
):
48+
"""Run cscope query with grep filter and return the output as a string."""
49+
results = ""
50+
for num in nums:
51+
try:
52+
result = subprocess.check_output(
53+
f"cscope -d -L -{num}{pattern} | grep -E '{keyword}'",
54+
universal_newlines=True,
55+
shell=True,
56+
cwd=target_dir,
57+
)
58+
results += result
59+
60+
except subprocess.CalledProcessError:
61+
continue
62+
63+
return results
64+
65+
66+
_QSPINLOCK_UNLOCKED_VAL = 0
67+
68+
69+
def qspinlock_is_locked(qsp: Object) -> str:
70+
"""
71+
Check if a qspinlock is locked or not
72+
73+
:param qsp: ``struct qspinlock *``
74+
:returns: True if qspinlock is locked, False otherwise.
75+
"""
76+
return str(qsp.locked.value_() != _QSPINLOCK_UNLOCKED_VAL)
77+
78+
79+
def get_qspinlock_tail_cpu(qsp: Object) -> int:
80+
"""
81+
Get tail cpu that spins on the qspinlock
82+
83+
:param qsp: ``struct qspinlock *``
84+
:returns: tail cpu that spins on the qspinlock, -1 if None
85+
"""
86+
tail = qsp.tail.value_()
87+
tail_cpu = (tail >> 2) - 1
88+
return tail_cpu
89+
90+
91+
def get_tail_cpu_qnode(qsp: Object) -> Iterable[Object]:
92+
"""
93+
Only for UEK6 and above.
94+
Given a qspinlock, find qnodes associated with the tail cpu spining on the qspinlock.
95+
96+
:param qsp: ``struct qspinlock *``
97+
:returns: Iterator of qnode
98+
"""
99+
tail_cpu = get_qspinlock_tail_cpu(qsp)
100+
prog = qsp.prog_
101+
if tail_cpu < 0:
102+
return []
103+
tail_qnodes = per_cpu(prog["qnodes"], tail_cpu)
104+
for qnode in tail_qnodes:
105+
yield qnode
106+
107+
108+
def dump_qnode_address_for_each_cpu(prog: Program, cpu: int = -1) -> None:
109+
"""
110+
Only for UEK6 and above.
111+
Dump all qnode addresses per cpu. If cpu is specified, dump qnode address on that cpu only.
112+
113+
:param prog: drgn program
114+
:param cpu: cpu id
115+
"""
116+
print(
117+
"%-20s %-20s"
118+
% (
119+
"cpu",
120+
"qnode",
121+
)
122+
)
123+
online_cpus = list(for_each_online_cpu(prog))
124+
if cpu > -1:
125+
if cpu in online_cpus:
126+
qnode_addr = per_cpu(prog["qnodes"], cpu).address_of_().value_()
127+
print("%-20s %-20lx" % (cpu, qnode_addr))
128+
else:
129+
for cpu_id in online_cpus:
130+
qnode_addr = per_cpu(prog["qnodes"], cpu_id).address_of_().value_()
131+
print("%-20s %-20lx" % (cpu_id, qnode_addr))
132+
133+
134+
def scan_bt_for_spinners(prog: Program) -> Tuple[Dict, Dict]:
135+
"""
136+
Scan spinlocks spinners on bt and dump their info.
137+
138+
:param prog: drgn program
139+
:param show_unlocked_only: bool
140+
"""
141+
wait_on_spin_lock_key_words = {
142+
"__pv_queued_spin_lock_slowpath",
143+
"native_queued_spin_lock_slowpath",
144+
"queued_spin_lock_slowpath",
145+
}
146+
147+
spinners = {}
148+
sp_ids = defaultdict(list)
149+
for cpu in for_each_online_cpu(prog):
150+
task = cpu_curr(prog, cpu)
151+
try:
152+
trace = prog.stack_trace(task)
153+
except ValueError:
154+
continue # cannot unwind stack of running task
155+
f_names = []
156+
# store the index where the keyword appears
157+
spin_lock_key_word_idx = -1
158+
159+
for idx, frame in enumerate(trace):
160+
f_name = frame_name(prog, frame).split(" ")[0]
161+
f_names.append(f_name)
162+
if f_name in wait_on_spin_lock_key_words:
163+
spin_lock_key_word_idx = idx
164+
run_time = timestamp_str(get_current_run_time(prog, cpu))
165+
pid = task.pid.value_()
166+
cmd = get_command(task)
167+
task_addr = task.value_()
168+
if "lock" in frame.locals():
169+
sp = frame["lock"]
170+
if not sp.absent_:
171+
try:
172+
sp.val.read_()
173+
sp_addr = sp.value_()
174+
is_locked = qspinlock_is_locked(sp)
175+
except FaultError:
176+
sp_addr = "Unknown"
177+
is_locked = "Unknown"
178+
pass
179+
180+
spinners[cpu] = [
181+
sp_addr,
182+
is_locked,
183+
task_addr,
184+
pid,
185+
run_time,
186+
cmd,
187+
]
188+
189+
# the caller function should be the first function after the frame containing keyword
190+
# that does not contain _spin_lock substring (might exist corner cases where the caller indeed contains such substring?)
191+
if spin_lock_key_word_idx > -1:
192+
for i, f_name in enumerate(f_names[spin_lock_key_word_idx + 1 :]):
193+
if "_spin_lock" not in f_name:
194+
sp_id = get_spinlock_container_type_and_field_name(f_name)
195+
if sp_id:
196+
sp_ids[sp_id].append(cpu)
197+
break
198+
return spinners, sp_ids
199+
200+
201+
def get_spinlock_name(funcname: str):
202+
"""
203+
Try to look for a spinlock in a function definition
204+
205+
:param funcname: bool
206+
"""
207+
skip_list = ["raw_spin_rq_lock_nested"]
208+
if funcname in skip_list:
209+
return None
210+
211+
output = query_cscope(
212+
[2],
213+
funcname,
214+
keyword=r"spin_lock\(|spin_lock_irq|spin_lock_irqsave|spin_lock_bh",
215+
)
216+
# line of code that invokes spin_lock(), spin_lock_irqsave(),..
217+
spinlock_line = ""
218+
if output:
219+
match = re.search(r"\s{1}(\d+)\s{1}(.*)", output)
220+
if match:
221+
spinlock_line = match.group(2)
222+
223+
spinlock_name = ""
224+
if spinlock_line:
225+
match = re.search(r"\((.*?)\)", spinlock_line)
226+
if match:
227+
spinlock_name = match.group(1).split(",")[0].lstrip("&")
228+
229+
return spinlock_name
230+
231+
232+
def get_spinlock_container_type_and_field_name(funcname: str):
233+
"""
234+
Get the struct type that contains this spinlock and its spinlock field name
235+
236+
:param funcname: str
237+
"""
238+
# get the spinlock name first
239+
spinlock_name = get_spinlock_name(funcname)
240+
if not spinlock_name:
241+
return None
242+
243+
# get the container instance first
244+
spinlock_container_instance = None
245+
spinlock_field = ""
246+
if "->" in spinlock_name:
247+
spinlock_container_instance, spinlock_field = (
248+
spinlock_name.split("->")[0],
249+
spinlock_name.split("->")[1],
250+
)
251+
elif "." in spinlock_name:
252+
spinlock_container_instance, spinlock_field = (
253+
spinlock_name.split(".")[0],
254+
spinlock_name.split(".")[1],
255+
)
256+
else:
257+
return None
258+
259+
# then get the struct type of the instance
260+
# there could be multiple matches, and we are looking for "struct A a" pattern to get A
261+
outputs = query_cscope([0, 1], spinlock_container_instance).split("\n")
262+
for output in outputs:
263+
output = output.strip("{;").strip()
264+
match = re.search(r"\s{1}(\d+)\s{1}(.*)", output)
265+
if match:
266+
match = re.search(r"struct\s{1}(.*)", match.group(2))
267+
if match:
268+
candidate = match.group(1).split(" ")
269+
if len(candidate) > 1:
270+
return candidate[0], spinlock_field
271+
272+
return None
273+
274+
275+
def scan_bt_for_owners(prog: Program):
276+
"""
277+
Scan spinlocks owners on bt and dump their info.
278+
279+
:param prog: drgn program
280+
"""
281+
spinners, sp_ids = scan_bt_for_spinners(prog)
282+
283+
# number of spinlocks
284+
nr_locks = len(set([v[0] for v in spinners.values()]))
285+
print(f"There are {nr_locks} spinlock(s) detected.")
286+
nr_lock_owners_found = 0
287+
for cpu in for_each_online_cpu(prog):
288+
if nr_lock_owners_found == nr_locks:
289+
break
290+
291+
task = cpu_curr(prog, cpu)
292+
trace = prog.stack_trace(task)
293+
294+
for frame in trace:
295+
f_name = frame_name(prog, frame).split(" ")[0]
296+
sp_id = get_spinlock_container_type_and_field_name(f_name)
297+
if sp_id and sp_id in sp_ids:
298+
# it is very unlikely for a cpu to hold and spin on the same lock
299+
if cpu in sp_ids[sp_id]:
300+
continue
301+
302+
nr_lock_owners_found += 1
303+
print(
304+
f"{nr_lock_owners_found}/{nr_locks} of lock owner(s) found!"
305+
)
306+
print(f"{frame}({f_name}) is a spinlock owner: ")
307+
308+
tbl = FixedTable(
309+
[
310+
"CPU:>",
311+
"TASK:>x",
312+
"PID:>",
313+
"CURRENT HOLDTIME:>",
314+
"COMMAND:>",
315+
]
316+
)
317+
318+
hold_time = timestamp_str(get_current_run_time(prog, cpu))
319+
pid = task.pid.value_()
320+
cmd = get_command(task)
321+
task_addr = task.value_()
322+
tbl.row(cpu, task_addr, pid, hold_time, cmd)
323+
tbl.write()
324+
325+
print("It has below spinners: ")
326+
spinner_cpus = sp_ids[sp_id]
327+
tbl = FixedTable(
328+
[
329+
"CPU:>",
330+
"SPINLOCK:>x",
331+
"TASK:>x",
332+
"PID:>",
333+
"CURRENT SPINTIME:>",
334+
"COMMAND:>",
335+
]
336+
)
337+
for sp_cpu in spinner_cpus:
338+
tbl.row(
339+
sp_cpu,
340+
spinners[sp_cpu][0],
341+
spinners[sp_cpu][2],
342+
spinners[sp_cpu][3],
343+
spinners[sp_cpu][4],
344+
spinners[sp_cpu][5],
345+
)
346+
tbl.write()
347+
348+
349+
class Spinlock(CorelensModule):
350+
"""
351+
Print out spinlock owners and spinners.
352+
"""
353+
354+
name = "spinlock"
355+
356+
def run(self, prog: Program, args: argparse.Namespace) -> None:
357+
if not UEK_CODE_DIR or not os.path.isdir(UEK_CODE_DIR):
358+
print(
359+
"UEK source code not found. Please set UEK_CODE_DIR correctly."
360+
)
361+
return
362+
if not shutil.which("cscope"):
363+
print("cscope not installed or not in PATH.")
364+
return
365+
366+
scan_bt_for_owners(prog)

tests/test_spinlock.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Copyright (c) 2025, Oracle and/or its affiliates.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
3+
from drgn_tools import spinlock
4+
5+
6+
# test for qspinlock
7+
def test_scan_bt_for_spinlocks(prog):
8+
spinlock.scan_bt_for_owners(prog)

0 commit comments

Comments
 (0)