Skip to content

Commit 0c89056

Browse files
gh-108724: Add PyMutex and _PyParkingLot APIs (gh-109344)
PyMutex is a one byte lock with fast, inlineable lock and unlock functions for the common uncontended case. The design is based on WebKit's WTF::Lock. PyMutex is built using the _PyParkingLot APIs, which provides a cross-platform futex-like API (based on WebKit's WTF::ParkingLot). This internal API will be used for building other synchronization primitives used to implement PEP 703, such as one-time initialization and events. This also includes tests and a mini benchmark in Tools/lockbench/lockbench.py to compare with the existing PyThread_type_lock. Uncontended acquisition + release: * Linux (x86-64): PyMutex: 11 ns, PyThread_type_lock: 44 ns * macOS (arm64): PyMutex: 13 ns, PyThread_type_lock: 18 ns * Windows (x86-64): PyMutex: 13 ns, PyThread_type_lock: 38 ns PR Overview: The primary purpose of this PR is to implement PyMutex, but there are a number of support pieces (described below). * PyMutex: A 1-byte lock that doesn't require memory allocation to initialize and is generally faster than the existing PyThread_type_lock. The API is internal only for now. * _PyParking_Lot: A futex-like API based on the API of the same name in WebKit. Used to implement PyMutex. * _PyRawMutex: A word sized lock used to implement _PyParking_Lot. * PyEvent: A one time event. This was used a bunch in the "nogil" fork and is useful for testing the PyMutex implementation, so I've included it as part of the PR. * pycore_llist.h: Defines common operations on doubly-linked list. Not strictly necessary (could do the list operations manually), but they come up frequently in the "nogil" fork. ( Similar to https://man.freebsd.org/cgi/man.cgi?queue) --------- Co-authored-by: Eric Snow <[email protected]>
1 parent 0a31ff0 commit 0c89056

29 files changed

+1665
-21
lines changed

Diff for: Include/Python.h

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include "pytypedefs.h"
4949
#include "pybuffer.h"
5050
#include "pystats.h"
51+
#include "pyatomic.h"
5152
#include "object.h"
5253
#include "objimpl.h"
5354
#include "typeslots.h"

Diff for: Include/cpython/pyatomic.h

+3-6
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,9 @@
8383
// # release
8484
// ...
8585

86-
#ifndef Py_ATOMIC_H
87-
#define Py_ATOMIC_H
88-
86+
#ifndef Py_CPYTHON_ATOMIC_H
87+
# error "this header file must not be included directly"
88+
#endif
8989

9090
// --- _Py_atomic_add --------------------------------------------------------
9191
// Atomically adds `value` to `obj` and returns the previous value
@@ -501,6 +501,3 @@ static inline void _Py_atomic_fence_release(void);
501501
#else
502502
# error "no available pyatomic implementation for this platform/compiler"
503503
#endif
504-
505-
#endif /* Py_ATOMIC_H */
506-

Diff for: Include/cpython/pyatomic_msc.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -906,7 +906,7 @@ _Py_atomic_store_ptr_release(void *obj, void *value)
906906
#if defined(_M_X64) || defined(_M_IX86)
907907
*(void * volatile *)obj = value;
908908
#elif defined(_M_ARM64)
909-
__stlr64(obj, (uintptr_t)value);
909+
__stlr64((unsigned __int64 volatile *)obj, (uintptr_t)value);
910910
#else
911911
# error "no implementation of _Py_atomic_store_ptr_release"
912912
#endif

Diff for: Include/internal/pycore_llist.h

+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// A doubly-linked list that can be embedded in a struct.
2+
//
3+
// Usage:
4+
// struct llist_node head = LLIST_INIT(head);
5+
// typedef struct {
6+
// ...
7+
// struct llist_node node;
8+
// ...
9+
// } MyObj;
10+
//
11+
// llist_insert_tail(&head, &obj->node);
12+
// llist_remove(&obj->node);
13+
//
14+
// struct llist_node *node;
15+
// llist_for_each(node, &head) {
16+
// MyObj *obj = llist_data(node, MyObj, node);
17+
// ...
18+
// }
19+
//
20+
21+
#ifndef Py_INTERNAL_LLIST_H
22+
#define Py_INTERNAL_LLIST_H
23+
24+
#include <stddef.h>
25+
26+
#ifdef __cplusplus
27+
extern "C" {
28+
#endif
29+
30+
#ifndef Py_BUILD_CORE
31+
# error "Py_BUILD_CORE must be defined to include this header"
32+
#endif
33+
34+
struct llist_node {
35+
struct llist_node *next;
36+
struct llist_node *prev;
37+
};
38+
39+
// Get the struct containing a node.
40+
#define llist_data(node, type, member) \
41+
(type*)((char*)node - offsetof(type, member))
42+
43+
// Iterate over a list.
44+
#define llist_for_each(node, head) \
45+
for (node = (head)->next; node != (head); node = node->next)
46+
47+
// Iterate over a list, but allow removal of the current node.
48+
#define llist_for_each_safe(node, head) \
49+
for (struct llist_node *_next = (node = (head)->next, node->next); \
50+
node != (head); node = _next, _next = node->next)
51+
52+
#define LLIST_INIT(head) { &head, &head }
53+
54+
static inline void
55+
llist_init(struct llist_node *head)
56+
{
57+
head->next = head;
58+
head->prev = head;
59+
}
60+
61+
// Returns 1 if the list is empty, 0 otherwise.
62+
static inline int
63+
llist_empty(struct llist_node *head)
64+
{
65+
return head->next == head;
66+
}
67+
68+
// Appends to the tail of the list.
69+
static inline void
70+
llist_insert_tail(struct llist_node *head, struct llist_node *node)
71+
{
72+
node->prev = head->prev;
73+
node->next = head;
74+
head->prev->next = node;
75+
head->prev = node;
76+
}
77+
78+
// Remove a node from the list.
79+
static inline void
80+
llist_remove(struct llist_node *node)
81+
{
82+
struct llist_node *prev = node->prev;
83+
struct llist_node *next = node->next;
84+
prev->next = next;
85+
next->prev = prev;
86+
node->prev = NULL;
87+
node->next = NULL;
88+
}
89+
90+
// Append all nodes from head2 onto head1. head2 is left empty.
91+
static inline void
92+
llist_concat(struct llist_node *head1, struct llist_node *head2)
93+
{
94+
if (!llist_empty(head2)) {
95+
head1->prev->next = head2->next;
96+
head2->next->prev = head1->prev;
97+
98+
head1->prev = head2->prev;
99+
head2->prev->next = head1;
100+
llist_init(head2);
101+
}
102+
}
103+
104+
#ifdef __cplusplus
105+
}
106+
#endif
107+
#endif /* !Py_INTERNAL_LLIST_H */

Diff for: Include/internal/pycore_lock.h

+158
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
// Lightweight locks and other synchronization mechanisms.
2+
//
3+
// These implementations are based on WebKit's WTF::Lock. See
4+
// https://webkit.org/blog/6161/locking-in-webkit/ for a description of the
5+
// design.
6+
#ifndef Py_INTERNAL_LOCK_H
7+
#define Py_INTERNAL_LOCK_H
8+
#ifdef __cplusplus
9+
extern "C" {
10+
#endif
11+
12+
#ifndef Py_BUILD_CORE
13+
# error "this header requires Py_BUILD_CORE define"
14+
#endif
15+
16+
#include "pycore_time.h" // _PyTime_t
17+
18+
19+
// A mutex that occupies one byte. The lock can be zero initialized.
20+
//
21+
// Only the two least significant bits are used. The remaining bits should be
22+
// zero:
23+
// 0b00: unlocked
24+
// 0b01: locked
25+
// 0b10: unlocked and has parked threads
26+
// 0b11: locked and has parked threads
27+
//
28+
// Typical initialization:
29+
// PyMutex m = (PyMutex){0};
30+
//
31+
// Typical usage:
32+
// PyMutex_Lock(&m);
33+
// ...
34+
// PyMutex_Unlock(&m);
35+
typedef struct _PyMutex {
36+
uint8_t v;
37+
} PyMutex;
38+
39+
#define _Py_UNLOCKED 0
40+
#define _Py_LOCKED 1
41+
#define _Py_HAS_PARKED 2
42+
43+
// (private) slow path for locking the mutex
44+
PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m);
45+
46+
// (private) slow path for unlocking the mutex
47+
PyAPI_FUNC(void) _PyMutex_UnlockSlow(PyMutex *m);
48+
49+
// Locks the mutex.
50+
//
51+
// If the mutex is currently locked, the calling thread will be parked until
52+
// the mutex is unlocked. If the current thread holds the GIL, then the GIL
53+
// will be released while the thread is parked.
54+
static inline void
55+
PyMutex_Lock(PyMutex *m)
56+
{
57+
uint8_t expected = _Py_UNLOCKED;
58+
if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_LOCKED)) {
59+
_PyMutex_LockSlow(m);
60+
}
61+
}
62+
63+
// Unlocks the mutex.
64+
static inline void
65+
PyMutex_Unlock(PyMutex *m)
66+
{
67+
uint8_t expected = _Py_LOCKED;
68+
if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_UNLOCKED)) {
69+
_PyMutex_UnlockSlow(m);
70+
}
71+
}
72+
73+
// Checks if the mutex is currently locked.
74+
static inline int
75+
PyMutex_IsLocked(PyMutex *m)
76+
{
77+
return (_Py_atomic_load_uint8(&m->v) & _Py_LOCKED) != 0;
78+
}
79+
80+
typedef enum _PyLockFlags {
81+
// Do not detach/release the GIL when waiting on the lock.
82+
_Py_LOCK_DONT_DETACH = 0,
83+
84+
// Detach/release the GIL while waiting on the lock.
85+
_PY_LOCK_DETACH = 1,
86+
87+
// Handle signals if interrupted while waiting on the lock.
88+
_PY_LOCK_HANDLE_SIGNALS = 2,
89+
} _PyLockFlags;
90+
91+
// Lock a mutex with an optional timeout and additional options. See
92+
// _PyLockFlags for details.
93+
extern PyLockStatus
94+
_PyMutex_LockTimed(PyMutex *m, _PyTime_t timeout_ns, _PyLockFlags flags);
95+
96+
// Unlock a mutex, returns 0 if the mutex is not locked (used for improved
97+
// error messages).
98+
extern int _PyMutex_TryUnlock(PyMutex *m);
99+
100+
101+
// PyEvent is a one-time event notification
102+
typedef struct {
103+
uint8_t v;
104+
} PyEvent;
105+
106+
// Set the event and notify any waiting threads.
107+
// Export for '_testinternalcapi' shared extension
108+
PyAPI_FUNC(void) _PyEvent_Notify(PyEvent *evt);
109+
110+
// Wait for the event to be set. If the event is already set, then this returns
111+
// immediately.
112+
PyAPI_FUNC(void) PyEvent_Wait(PyEvent *evt);
113+
114+
// Wait for the event to be set, or until the timeout expires. If the event is
115+
// already set, then this returns immediately. Returns 1 if the event was set,
116+
// and 0 if the timeout expired or thread was interrupted.
117+
PyAPI_FUNC(int) PyEvent_WaitTimed(PyEvent *evt, _PyTime_t timeout_ns);
118+
119+
120+
// _PyRawMutex implements a word-sized mutex that that does not depend on the
121+
// parking lot API, and therefore can be used in the parking lot
122+
// implementation.
123+
//
124+
// The mutex uses a packed representation: the least significant bit is used to
125+
// indicate whether the mutex is locked or not. The remaining bits are either
126+
// zero or a pointer to a `struct raw_mutex_entry` (see lock.c).
127+
typedef struct {
128+
uintptr_t v;
129+
} _PyRawMutex;
130+
131+
// Slow paths for lock/unlock
132+
extern void _PyRawMutex_LockSlow(_PyRawMutex *m);
133+
extern void _PyRawMutex_UnlockSlow(_PyRawMutex *m);
134+
135+
static inline void
136+
_PyRawMutex_Lock(_PyRawMutex *m)
137+
{
138+
uintptr_t unlocked = _Py_UNLOCKED;
139+
if (_Py_atomic_compare_exchange_uintptr(&m->v, &unlocked, _Py_LOCKED)) {
140+
return;
141+
}
142+
_PyRawMutex_LockSlow(m);
143+
}
144+
145+
static inline void
146+
_PyRawMutex_Unlock(_PyRawMutex *m)
147+
{
148+
uintptr_t locked = _Py_LOCKED;
149+
if (_Py_atomic_compare_exchange_uintptr(&m->v, &locked, _Py_UNLOCKED)) {
150+
return;
151+
}
152+
_PyRawMutex_UnlockSlow(m);
153+
}
154+
155+
#ifdef __cplusplus
156+
}
157+
#endif
158+
#endif /* !Py_INTERNAL_LOCK_H */

0 commit comments

Comments
 (0)