Skip to content

Commit

Permalink
lock: Add additional lock auditing code
Browse files Browse the repository at this point in the history
Keep track of lock owner name and replace lock_depth counter
with a per-cpu list of locks held by the cpu.

This allows us to print the actual locks held in case we hit
the (in)famous message about opal_pollers being run with a
lock held.

It also allows us to warn (and drop them) if locks are still
held when returning to the OS or completing a scheduled job.

Signed-off-by: Benjamin Herrenschmidt <[email protected]>
Reviewed-by: Nicholas Piggin <[email protected]>
[stewart: fix unit tests]
Signed-off-by: Stewart Smith <[email protected]>
  • Loading branch information
ozbenh authored and stewartsmith committed Dec 21, 2017
1 parent ca612b8 commit 76d9bcd
Show file tree
Hide file tree
Showing 21 changed files with 106 additions and 31 deletions.
6 changes: 6 additions & 0 deletions core/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,11 @@ void cpu_process_jobs(void)
if (no_return)
free(job);
func(data);
if (!list_empty(&cpu->locks_held)) {
prlog(PR_ERR, "OPAL job %s returning with locks held\n",
job->name);
drop_my_locks(true);
}
lock(&cpu->job_lock);
if (!no_return) {
cpu->job_count--;
Expand Down Expand Up @@ -822,6 +827,7 @@ static void init_cpu_thread(struct cpu_thread *t,
init_lock(&t->dctl_lock);
init_lock(&t->job_lock);
list_head_init(&t->job_queue);
list_head_init(&t->locks_held);
t->stack_guard = STACK_CHECK_GUARD_BASE ^ pir;
t->state = state;
t->pir = pir;
Expand Down
41 changes: 32 additions & 9 deletions core/lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ static void unlock_check(struct lock *l)
if (l->in_con_path && this_cpu()->con_suspend == 0)
lock_error(l, "Unlock con lock with console not suspended", 3);

if (this_cpu()->lock_depth == 0)
lock_error(l, "Releasing lock with 0 depth", 4);
if (list_empty(&this_cpu()->locks_held))
lock_error(l, "Releasing lock we don't hold depth", 4);
}

#else
Expand Down Expand Up @@ -89,30 +89,31 @@ static inline bool __try_lock(struct cpu_thread *cpu, struct lock *l)
return false;
}

bool try_lock(struct lock *l)
bool try_lock_caller(struct lock *l, const char *owner)
{
struct cpu_thread *cpu = this_cpu();

if (bust_locks)
return true;

if (__try_lock(cpu, l)) {
l->owner = owner;
if (l->in_con_path)
cpu->con_suspend++;
cpu->lock_depth++;
list_add(&cpu->locks_held, &l->list);
return true;
}
return false;
}

void lock(struct lock *l)
void lock_caller(struct lock *l, const char *owner)
{
if (bust_locks)
return;

lock_check(l);
for (;;) {
if (try_lock(l))
if (try_lock_caller(l, owner))
break;
smt_lowest();
while (l->lock_val)
Expand All @@ -130,8 +131,9 @@ void unlock(struct lock *l)

unlock_check(l);

l->owner = NULL;
list_del(&l->list);
lwsync();
this_cpu()->lock_depth--;
l->lock_val = 0;

/* WARNING: On fast reboot, we can be reset right at that
Expand All @@ -144,19 +146,40 @@ void unlock(struct lock *l)
}
}

bool lock_recursive(struct lock *l)
bool lock_recursive_caller(struct lock *l, const char *caller)
{
if (bust_locks)
return false;

if (lock_held_by_me(l))
return false;

lock(l);
lock_caller(l, caller);
return true;
}

void init_locks(void)
{
bust_locks = false;
}

void dump_locks_list(void)
{
struct lock *l;

prlog(PR_ERR, "Locks held:\n");
list_for_each(&this_cpu()->locks_held, l, list)
prlog(PR_ERR, " %s\n", l->owner);
}

void drop_my_locks(bool warn)
{
struct lock *l;

while((l = list_pop(&this_cpu()->locks_held, struct lock, list)) != NULL) {
if (warn)
prlog(PR_ERR, " %s\n", l->owner);
unlock(l);
}
}

8 changes: 7 additions & 1 deletion core/opal.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,11 @@ int64_t opal_exit_check(int64_t retval, struct stack_frame *eframe)
printf("CPU UN-ACCOUNTED FIRMWARE ENTRY! PIR=%04lx cpu @%p -> pir=%04x token=%llu retval=%lld\n",
mfspr(SPR_PIR), cpu, cpu->pir, token, retval);
} else {
if (!list_empty(&cpu->locks_held)) {
prlog(PR_ERR, "OPAL exiting with locks held, token=%llu retval=%lld\n",
token, retval);
drop_my_locks(true);
}
sync(); /* release barrier vs quiescing */
cpu->in_opal_call--;
}
Expand Down Expand Up @@ -557,14 +562,15 @@ void opal_run_pollers(void)
}
this_cpu()->in_poller = true;

if (this_cpu()->lock_depth && pollers_with_lock_warnings < 64) {
if (!list_empty(&this_cpu()->locks_held) && pollers_with_lock_warnings < 64) {
/**
* @fwts-label OPALPollerWithLock
* @fwts-advice opal_run_pollers() was called with a lock
* held, which could lead to deadlock if not excessively
* lucky/careful.
*/
prlog(PR_ERR, "Running pollers with lock held !\n");
dump_locks_list();
backtrace();
pollers_with_lock_warnings++;
if (pollers_with_lock_warnings == 64) {
Expand Down
1 change: 1 addition & 0 deletions core/stack.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <stack.h>
#include <mem_region.h>
#include <unistd.h>
#include <lock.h>

#define STACK_BUF_ENTRIES 60
static struct bt_entry bt_buf[STACK_BUF_ENTRIES];
Expand Down
3 changes: 2 additions & 1 deletion core/test/run-malloc-speed.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@ static inline void real_free(void *p)
char __rodata_start[1], __rodata_end[1];
struct dt_node *dt_root;

void lock(struct lock *l)
void lock_caller(struct lock *l, const char *caller)
{
(void)caller;
assert(!l->lock_val);
l->lock_val = 1;
}
Expand Down
3 changes: 2 additions & 1 deletion core/test/run-malloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ static inline void real_free(void *p)

struct dt_node *dt_root;

void lock(struct lock *l)
void lock_caller(struct lock *l, const char *caller)
{
(void)caller;
assert(!l->lock_val);
l->lock_val = 1;
}
Expand Down
3 changes: 2 additions & 1 deletion core/test/run-mem_range_is_reserved.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ static void real_free(void *p)
#include <assert.h>
#include <stdio.h>

void lock(struct lock *l)
void lock_caller(struct lock *l, const char *caller)
{
(void)caller;
assert(!l->lock_val);
l->lock_val++;
}
Expand Down
3 changes: 2 additions & 1 deletion core/test/run-mem_region.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@ static inline void real_free(void *p)

struct dt_node *dt_root;

void lock(struct lock *l)
void lock_caller(struct lock *l, const char *caller)
{
(void)caller;
assert(!l->lock_val);
l->lock_val++;
}
Expand Down
3 changes: 2 additions & 1 deletion core/test/run-mem_region_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,9 @@ static inline char *skiboot_strdup(const char *str)
#include <assert.h>
#include <stdio.h>

void lock(struct lock *l)
void lock_caller(struct lock *l, const char *caller)
{
(void)caller;
assert(!l->lock_val);
l->lock_val = 1;
}
Expand Down
3 changes: 2 additions & 1 deletion core/test/run-mem_region_next.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ static void real_free(void *p)
#include <assert.h>
#include <stdio.h>

void lock(struct lock *l)
void lock_caller(struct lock *l, const char *caller)
{
(void)caller;
assert(!l->lock_val);
l->lock_val++;
}
Expand Down
3 changes: 2 additions & 1 deletion core/test/run-mem_region_release_unused.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,9 @@ static inline void __free(void *p, const char *location __attribute__((unused)))
#include <assert.h>
#include <stdio.h>

void lock(struct lock *l)
void lock_caller(struct lock *l, const char *caller)
{
(void)caller;
l->lock_val++;
}

Expand Down
3 changes: 2 additions & 1 deletion core/test/run-mem_region_release_unused_noalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,9 @@ static inline void __free(void *p, const char *location __attribute__((unused)))
#include <assert.h>
#include <stdio.h>

void lock(struct lock *l)
void lock_caller(struct lock *l, const char *caller)
{
(void)caller;
l->lock_val++;
}

Expand Down
3 changes: 2 additions & 1 deletion core/test/run-mem_region_reservations.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ static void real_free(void *p)
#include <assert.h>
#include <stdio.h>

void lock(struct lock *l)
void lock_caller(struct lock *l, const char *caller)
{
(void)caller;
assert(!l->lock_val);
l->lock_val++;
}
Expand Down
3 changes: 2 additions & 1 deletion core/test/run-msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ static void *zalloc(size_t size)
#include "../opal-msg.c"
#include <skiboot.h>

void lock(struct lock *l)
void lock_caller(struct lock *l, const char *caller)
{
(void)caller;
assert(!l->lock_val);
l->lock_val = 1;
}
Expand Down
6 changes: 5 additions & 1 deletion core/test/run-timer.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@

static uint64_t stamp, last;
struct lock;
static inline void lock(struct lock *l) { (void)l; }
static inline void lock_caller(struct lock *l, const char *caller)
{
(void)caller;
(void)l;
}
static inline void unlock(struct lock *l) { (void)l; }

unsigned long tb_hz = 512000000;
Expand Down
3 changes: 2 additions & 1 deletion core/test/run-trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,9 @@ struct debug_descriptor debug_descriptor = {
.trace_mask = -1
};

void lock(struct lock *l)
void lock_caller(struct lock *l, const char *caller)
{
(void)caller;
assert(!l->lock_val);
l->lock_val = 1;
}
Expand Down
2 changes: 1 addition & 1 deletion core/timebase.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void time_wait(unsigned long duration)
{
struct cpu_thread *c = this_cpu();

if (this_cpu()->lock_depth) {
if (!list_empty(&this_cpu()->locks_held)) {
time_wait_nopoll(duration);
return;
}
Expand Down
3 changes: 2 additions & 1 deletion coverity-model.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ void mem_free(struct mem_region *region, void *mem, const char *location) {
__coverity_free__(mem);
}

void lock(struct lock *l) {
void lock_caller(struct lock *l, const char *caller)
{
__coverity_exclusive_lock_acquire__(l);
}

Expand Down
2 changes: 1 addition & 1 deletion hdata/test/stubs.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ static bool false_stub(void) { return false; }
#define NOOP_STUB FALSE_STUB

TRUE_STUB(lock_held_by_me);
NOOP_STUB(lock);
NOOP_STUB(lock_caller);
NOOP_STUB(unlock);
NOOP_STUB(early_uart_init);
NOOP_STUB(mem_reserve_fw);
Expand Down
2 changes: 1 addition & 1 deletion include/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ struct cpu_thread {
uint64_t save_r1;
void *icp_regs;
uint32_t in_opal_call;
uint32_t lock_depth;
uint32_t con_suspend;
struct list_head locks_held;
bool con_need_flush;
bool quiesce_opal_call;
bool in_mcount;
Expand Down
33 changes: 28 additions & 5 deletions include/lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include <stdbool.h>
#include <processor.h>
#include <cmpxchg.h>
#include <ccan/list/list.h>
#include <ccan/str/str.h>

struct lock {
/* Lock value has bit 63 as lock bit and the PIR of the owner
Expand All @@ -32,10 +34,19 @@ struct lock {
* in which case taking it will suspend console flushing
*/
bool in_con_path;

/* file/line of lock owner */
const char *owner;

/* linkage in per-cpu list of owned locks */
struct list_node list;
};

/* Initializer */
#define LOCK_UNLOCKED { .lock_val = 0, .in_con_path = 0 }
/* Initializer... not ideal but works for now. If we need different
* values for the fields and/or start getting warnings we'll have to
* play macro tricks
*/
#define LOCK_UNLOCKED { 0 }

/* Note vs. libc and locking:
*
Expand Down Expand Up @@ -65,8 +76,14 @@ static inline void init_lock(struct lock *l)
*l = (struct lock)LOCK_UNLOCKED;
}

extern bool try_lock(struct lock *l);
extern void lock(struct lock *l);
#define LOCK_CALLER __FILE__ ":" stringify(__LINE__)

#define try_lock(l) try_lock_caller(l, LOCK_CALLER)
#define lock(l) lock_caller(l, LOCK_CALLER)
#define lock_recursive(l) lock_recursive_caller(l, LOCK_CALLER)

extern bool try_lock_caller(struct lock *l, const char *caller);
extern void lock_caller(struct lock *l, const char *caller);
extern void unlock(struct lock *l);

extern bool lock_held_by_me(struct lock *l);
Expand All @@ -77,9 +94,15 @@ extern bool lock_held_by_me(struct lock *l);
* returns false if the lock was already held by this cpu. If it returns
* true, then the caller shall release it when done.
*/
extern bool lock_recursive(struct lock *l);
extern bool lock_recursive_caller(struct lock *l, const char *caller);

/* Called after per-cpu data structures are available */
extern void init_locks(void);

/* Dump the list of locks held by this CPU */
extern void dump_locks_list(void);

/* Clean all locks held by CPU (and warn if any) */
extern void drop_my_locks(bool warn);

#endif /* __LOCK_H */

0 comments on commit 76d9bcd

Please sign in to comment.