Skip to content

Commit 99d0bb0

Browse files
mpageebonnal
authored andcommitted
pythongh-115999: Enable specialization of CALL instructions in free-threaded builds (python#127123)
The CALL family of instructions were mostly thread-safe already and only required a small number of changes, which are documented below. A few changes were needed to make CALL_ALLOC_AND_ENTER_INIT thread-safe: Added _PyType_LookupRefAndVersion, which returns the type version corresponding to the returned ref. Added _PyType_CacheInitForSpecialization, which takes an init method and the corresponding type version and only populates the specialization cache if the current type version matches the supplied version. This prevents potentially caching a stale value in free-threaded builds if we race with an update to __init__. Only cache __init__ functions that are deferred in free-threaded builds. This ensures that the reference to __init__ that is stored in the specialization cache is valid if the type version guard in _CHECK_AND_ALLOCATE_OBJECT passes. Fix a bug in _CREATE_INIT_FRAME where the frame is pushed to the stack on failure. A few other miscellaneous changes were also needed: Use {LOCK,UNLOCK}_OBJECT in LIST_APPEND. This ensures that the list's per-object lock is held while we are appending to it. Add missing co_tlbc for _Py_InitCleanup. Stop/start the world around setting the eval frame hook. This allows us to read interp->eval_frame non-atomically and preserves the behavior of _CHECK_PEP_523 documented below.
1 parent a0cb339 commit 99d0bb0

11 files changed

+220
-92
lines changed

Include/internal/pycore_object.h

+14
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,20 @@ extern int _PyObject_StoreInstanceAttribute(PyObject *obj,
835835
PyObject *name, PyObject *value);
836836
extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name,
837837
PyObject **attr);
838+
extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *,
839+
unsigned int *);
840+
841+
// Cache the provided init method in the specialization cache of type if the
842+
// provided type version matches the current version of the type.
843+
//
844+
// The cached value is borrowed and is only valid if guarded by a type
845+
// version check. In free-threaded builds the init method must also use
846+
// deferred reference counting.
847+
//
848+
// Returns 1 if the value was cached or 0 otherwise.
849+
extern int _PyType_CacheInitForSpecialization(PyHeapTypeObject *type,
850+
PyObject *init,
851+
unsigned int tp_version);
838852

839853
#ifdef Py_GIL_DISABLED
840854
# define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1)

Lib/test/test_monitoring.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import unittest
1212

1313
import test.support
14-
from test.support import requires_specialization, script_helper
14+
from test.support import requires_specialization_ft, script_helper
1515
from test.support.import_helper import import_module
1616

1717
_testcapi = test.support.import_helper.import_module("_testcapi")
@@ -850,6 +850,13 @@ def __init__(self, events):
850850
def __call__(self, code, offset, val):
851851
self.events.append(("return", code.co_name, val))
852852

853+
# gh-127274: CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that
854+
# are deferred. We only defer functions defined at the top-level.
855+
class ValueErrorRaiser:
856+
def __init__(self):
857+
raise ValueError()
858+
859+
853860
class ExceptionMonitoringTest(CheckEvents):
854861

855862
exception_recorders = (
@@ -1045,16 +1052,12 @@ def func():
10451052
)
10461053
self.assertEqual(events[0], ("throw", IndexError))
10471054

1048-
@requires_specialization
1055+
@requires_specialization_ft
10491056
def test_no_unwind_for_shim_frame(self):
10501057

1051-
class B:
1052-
def __init__(self):
1053-
raise ValueError()
1054-
10551058
def f():
10561059
try:
1057-
return B()
1060+
return ValueErrorRaiser()
10581061
except ValueError:
10591062
pass
10601063

Lib/test/test_opcache.py

+27-5
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,18 @@ def f():
493493
self.assertFalse(f())
494494

495495

496+
# gh-127274: CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that
497+
# are deferred. We only defer functions defined at the top-level.
498+
class MyClass:
499+
def __init__(self):
500+
pass
501+
502+
503+
class InitTakesArg:
504+
def __init__(self, arg):
505+
self.arg = arg
506+
507+
496508
class TestCallCache(TestBase):
497509
def test_too_many_defaults_0(self):
498510
def f():
@@ -522,12 +534,8 @@ def f(x, y):
522534
f()
523535

524536
@disabling_optimizer
525-
@requires_specialization
537+
@requires_specialization_ft
526538
def test_assign_init_code(self):
527-
class MyClass:
528-
def __init__(self):
529-
pass
530-
531539
def instantiate():
532540
return MyClass()
533541

@@ -544,6 +552,20 @@ def count_args(self, *args):
544552
MyClass.__init__.__code__ = count_args.__code__
545553
instantiate()
546554

555+
@disabling_optimizer
556+
@requires_specialization_ft
557+
def test_push_init_frame_fails(self):
558+
def instantiate():
559+
return InitTakesArg()
560+
561+
for _ in range(2):
562+
with self.assertRaises(TypeError):
563+
instantiate()
564+
self.assert_specialized(instantiate, "CALL_ALLOC_AND_ENTER_INIT")
565+
566+
with self.assertRaises(TypeError):
567+
instantiate()
568+
547569

548570
@threading_helper.requires_working_threading()
549571
class TestRacesDoNotCrash(TestBase):

Lib/test/test_type_cache.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import unittest
33
import dis
44
from test import support
5-
from test.support import import_helper, requires_specialization
5+
from test.support import import_helper, requires_specialization, requires_specialization_ft
66
try:
77
from sys import _clear_type_cache
88
except ImportError:
@@ -110,7 +110,6 @@ class HolderSub(Holder):
110110
HolderSub.value
111111

112112
@support.cpython_only
113-
@requires_specialization
114113
class TypeCacheWithSpecializationTests(unittest.TestCase):
115114
def tearDown(self):
116115
_clear_type_cache()
@@ -140,6 +139,7 @@ def _check_specialization(self, func, arg, opname, *, should_specialize):
140139
else:
141140
self.assertIn(opname, self._all_opnames(func))
142141

142+
@requires_specialization
143143
def test_class_load_attr_specialization_user_type(self):
144144
class A:
145145
def foo(self):
@@ -160,6 +160,7 @@ def load_foo_2(type_):
160160

161161
self._check_specialization(load_foo_2, A, "LOAD_ATTR", should_specialize=False)
162162

163+
@requires_specialization
163164
def test_class_load_attr_specialization_static_type(self):
164165
self.assertNotEqual(type_get_version(str), 0)
165166
self.assertNotEqual(type_get_version(bytes), 0)
@@ -171,6 +172,7 @@ def get_capitalize_1(type_):
171172
self.assertEqual(get_capitalize_1(str)('hello'), 'Hello')
172173
self.assertEqual(get_capitalize_1(bytes)(b'hello'), b'Hello')
173174

175+
@requires_specialization
174176
def test_property_load_attr_specialization_user_type(self):
175177
class G:
176178
@property
@@ -192,6 +194,7 @@ def load_x_2(instance):
192194

193195
self._check_specialization(load_x_2, G(), "LOAD_ATTR", should_specialize=False)
194196

197+
@requires_specialization
195198
def test_store_attr_specialization_user_type(self):
196199
class B:
197200
__slots__ = ("bar",)
@@ -211,6 +214,7 @@ def store_bar_2(type_):
211214

212215
self._check_specialization(store_bar_2, B(), "STORE_ATTR", should_specialize=False)
213216

217+
@requires_specialization_ft
214218
def test_class_call_specialization_user_type(self):
215219
class F:
216220
def __init__(self):
@@ -231,6 +235,7 @@ def call_class_2(type_):
231235

232236
self._check_specialization(call_class_2, F, "CALL", should_specialize=False)
233237

238+
@requires_specialization
234239
def test_to_bool_specialization_user_type(self):
235240
class H:
236241
pass

Objects/typeobject.c

+55-7
Original file line numberDiff line numberDiff line change
@@ -5528,9 +5528,12 @@ _PyTypes_AfterFork(void)
55285528
}
55295529

55305530
/* Internal API to look for a name through the MRO.
5531-
This returns a borrowed reference, and doesn't set an exception! */
5531+
This returns a strong reference, and doesn't set an exception!
5532+
If nonzero, version is set to the value of type->tp_version at the time of
5533+
the lookup.
5534+
*/
55325535
PyObject *
5533-
_PyType_LookupRef(PyTypeObject *type, PyObject *name)
5536+
_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version)
55345537
{
55355538
PyObject *res;
55365539
int error;
@@ -5553,6 +5556,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
55535556
// If the sequence is still valid then we're done
55545557
if (value == NULL || _Py_TryIncref(value)) {
55555558
if (_PySeqLock_EndRead(&entry->sequence, sequence)) {
5559+
if (version != NULL) {
5560+
*version = entry_version;
5561+
}
55565562
return value;
55575563
}
55585564
Py_XDECREF(value);
@@ -5574,6 +5580,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
55745580
OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name));
55755581
OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name));
55765582
Py_XINCREF(entry->value);
5583+
if (version != NULL) {
5584+
*version = entry->version;
5585+
}
55775586
return entry->value;
55785587
}
55795588
#endif
@@ -5587,12 +5596,12 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
55875596
// anyone else can modify our mro or mutate the type.
55885597

55895598
int has_version = 0;
5590-
int version = 0;
5599+
unsigned int assigned_version = 0;
55915600
BEGIN_TYPE_LOCK();
55925601
res = find_name_in_mro(type, name, &error);
55935602
if (MCACHE_CACHEABLE_NAME(name)) {
55945603
has_version = assign_version_tag(interp, type);
5595-
version = type->tp_version_tag;
5604+
assigned_version = type->tp_version_tag;
55965605
}
55975606
END_TYPE_LOCK();
55985607

@@ -5609,28 +5618,67 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
56095618
if (error == -1) {
56105619
PyErr_Clear();
56115620
}
5621+
if (version != NULL) {
5622+
// 0 is not a valid version
5623+
*version = 0;
5624+
}
56125625
return NULL;
56135626
}
56145627

56155628
if (has_version) {
56165629
#if Py_GIL_DISABLED
5617-
update_cache_gil_disabled(entry, name, version, res);
5630+
update_cache_gil_disabled(entry, name, assigned_version, res);
56185631
#else
5619-
PyObject *old_value = update_cache(entry, name, version, res);
5632+
PyObject *old_value = update_cache(entry, name, assigned_version, res);
56205633
Py_DECREF(old_value);
56215634
#endif
56225635
}
5636+
if (version != NULL) {
5637+
// 0 is not a valid version
5638+
*version = has_version ? assigned_version : 0;
5639+
}
56235640
return res;
56245641
}
56255642

5643+
/* Internal API to look for a name through the MRO.
5644+
This returns a strong reference, and doesn't set an exception!
5645+
*/
5646+
PyObject *
5647+
_PyType_LookupRef(PyTypeObject *type, PyObject *name)
5648+
{
5649+
return _PyType_LookupRefAndVersion(type, name, NULL);
5650+
}
5651+
5652+
/* Internal API to look for a name through the MRO.
5653+
This returns a borrowed reference, and doesn't set an exception! */
56265654
PyObject *
56275655
_PyType_Lookup(PyTypeObject *type, PyObject *name)
56285656
{
5629-
PyObject *res = _PyType_LookupRef(type, name);
5657+
PyObject *res = _PyType_LookupRefAndVersion(type, name, NULL);
56305658
Py_XDECREF(res);
56315659
return res;
56325660
}
56335661

5662+
int
5663+
_PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init,
5664+
unsigned int tp_version)
5665+
{
5666+
if (!init || !tp_version) {
5667+
return 0;
5668+
}
5669+
int can_cache;
5670+
BEGIN_TYPE_LOCK();
5671+
can_cache = ((PyTypeObject*)type)->tp_version_tag == tp_version;
5672+
#ifdef Py_GIL_DISABLED
5673+
can_cache = can_cache && _PyObject_HasDeferredRefcount(init);
5674+
#endif
5675+
if (can_cache) {
5676+
FT_ATOMIC_STORE_PTR_RELEASE(type->_spec_cache.init, init);
5677+
}
5678+
END_TYPE_LOCK();
5679+
return can_cache;
5680+
}
5681+
56345682
static void
56355683
set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags)
56365684
{

Python/bytecodes.c

+10-6
Original file line numberDiff line numberDiff line change
@@ -3329,15 +3329,15 @@ dummy_func(
33293329
};
33303330

33313331
specializing op(_SPECIALIZE_CALL, (counter/1, callable[1], self_or_null[1], args[oparg] -- callable[1], self_or_null[1], args[oparg])) {
3332-
#if ENABLE_SPECIALIZATION
3332+
#if ENABLE_SPECIALIZATION_FT
33333333
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
33343334
next_instr = this_instr;
33353335
_Py_Specialize_Call(callable[0], next_instr, oparg + !PyStackRef_IsNull(self_or_null[0]));
33363336
DISPATCH_SAME_OPARG();
33373337
}
33383338
OPCODE_DEFERRED_INC(CALL);
33393339
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
3340-
#endif /* ENABLE_SPECIALIZATION */
3340+
#endif /* ENABLE_SPECIALIZATION_FT */
33413341
}
33423342

33433343
op(_MAYBE_EXPAND_METHOD, (callable[1], self_or_null[1], args[oparg] -- func[1], maybe_self[1], args[oparg])) {
@@ -3722,10 +3722,10 @@ dummy_func(
37223722
DEOPT_IF(!PyStackRef_IsNull(null[0]));
37233723
DEOPT_IF(!PyType_Check(callable_o));
37243724
PyTypeObject *tp = (PyTypeObject *)callable_o;
3725-
DEOPT_IF(tp->tp_version_tag != type_version);
3725+
DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version);
37263726
assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES);
37273727
PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
3728-
PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init;
3728+
PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init);
37293729
PyCodeObject *code = (PyCodeObject *)init_func->func_code;
37303730
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize));
37313731
STAT_INC(CALL, hit);
@@ -3743,17 +3743,19 @@ dummy_func(
37433743
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
37443744
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
37453745
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
3746+
assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE);
37463747
/* Push self onto stack of shim */
37473748
shim->localsplus[0] = PyStackRef_DUP(self[0]);
37483749
DEAD(init);
37493750
DEAD(self);
3750-
init_frame = _PyEvalFramePushAndInit(
3751+
_PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
37513752
tstate, init[0], NULL, args-1, oparg+1, NULL, shim);
37523753
SYNC_SP();
3753-
if (init_frame == NULL) {
3754+
if (temp == NULL) {
37543755
_PyEval_FrameClearAndPop(tstate, shim);
37553756
ERROR_NO_POP();
37563757
}
3758+
init_frame = temp;
37573759
frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL;
37583760
/* Account for pushing the extra frame.
37593761
* We don't check recursion depth here,
@@ -4000,8 +4002,10 @@ dummy_func(
40004002
DEOPT_IF(callable_o != interp->callable_cache.list_append);
40014003
assert(self_o != NULL);
40024004
DEOPT_IF(!PyList_Check(self_o));
4005+
DEOPT_IF(!LOCK_OBJECT(self_o));
40034006
STAT_INC(CALL, hit);
40044007
int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg));
4008+
UNLOCK_OBJECT(self_o);
40054009
PyStackRef_CLOSE(self);
40064010
PyStackRef_CLOSE(callable);
40074011
ERROR_IF(err, error);

0 commit comments

Comments
 (0)