Skip to content

Commit db67888

Browse files
authored
[mypyc] Use native integers for some sequence indexing operations (#19426)
For example, when iterating over a list, now we use a native integer for the index (which is not exposed to the user). Previously we used tagged integers, but in these use cases they provide no real benefit. This simplifies the IR and should slightly improve performance, as fewer tagged int to native int conversions are needed. Multiple ops have to be migrated in one go, as these interact with each other, and by only changing a subset of them would actually generate more verbose IR, as a bunch of extra coercions would be needed. List of impacted statements: * For loop over sequence * Assignment like `x, y = a` for tuple/list rvalue * Dict iteration * List comprehension For example, consider this example: ``` def foo(a: list[int]) -> None: for x in a: pass ``` Old generated IR was like this: ``` def foo(a): a :: list r0 :: short_int r1 :: ptr r2 :: native_int r3 :: short_int r4 :: bit r5 :: native_int r6, r7 :: ptr r8 :: native_int r9 :: ptr r10 :: object r11 :: int r12 :: short_int r13 :: None L0: r0 = 0 L1: r1 = get_element_ptr a ob_size :: PyVarObject r2 = load_mem r1 :: native_int* r3 = r2 << 1 r4 = r0 < r3 :: signed if r4 goto L2 else goto L5 :: bool L2: r5 = r0 >> 1 r6 = get_element_ptr a ob_item :: PyListObject r7 = load_mem r6 :: ptr* r8 = r5 * 8 r9 = r7 + r8 r10 = load_mem r9 :: builtins.object* inc_ref r10 r11 = unbox(int, r10) dec_ref r10 if is_error(r11) goto L6 (error at foo:2) else goto L3 L3: dec_ref r11 :: int L4: r12 = r0 + 2 r0 = r12 goto L1 L5: return 1 L6: r13 = <error> :: None return r13 ``` Now the generated IR is simpler: ``` def foo(a): a :: list r0 :: native_int r1 :: ptr r2 :: native_int r3 :: bit r4, r5 :: ptr r6 :: native_int r7 :: ptr r8 :: object r9 :: int r10 :: native_int r11 :: None L0: r0 = 0 L1: r1 = get_element_ptr a ob_size :: PyVarObject r2 = load_mem r1 :: native_int* r3 = r0 < r2 :: signed if r3 goto L2 else goto L5 :: bool L2: r4 = get_element_ptr a ob_item :: PyListObject r5 = load_mem r4 :: ptr* r6 = r0 * 8 r7 = r5 + r6 r8 = load_mem r7 :: builtins.object* inc_ref r8 r9 = unbox(int, r8) dec_ref r8 if is_error(r9) goto L6 (error at foo:2) else goto L3 L3: dec_ref r9 :: int L4: r10 = r0 + 1 r0 = r10 goto L1 L5: return 1 L6: r11 = <error> :: None return r11 ```
1 parent 2f7ba4e commit db67888

17 files changed

+1128
-1217
lines changed

mypyc/irbuild/builder.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@
129129
from mypyc.primitives.list_ops import list_get_item_unsafe_op, list_pop_last, to_list
130130
from mypyc.primitives.misc_ops import check_unpack_count_op, get_module_dict_op, import_op
131131
from mypyc.primitives.registry import CFunctionDescription, function_ops
132+
from mypyc.primitives.tuple_ops import tuple_get_item_unsafe_op
132133

133134
# These int binary operations can borrow their operands safely, since the
134135
# primitives take this into consideration.
@@ -772,10 +773,15 @@ def process_sequence_assignment(
772773
values = []
773774
for i in range(len(target.items)):
774775
item = target.items[i]
775-
index = self.builder.load_int(i)
776+
index: Value
776777
if is_list_rprimitive(rvalue.type):
778+
index = Integer(i, c_pyssize_t_rprimitive)
777779
item_value = self.primitive_op(list_get_item_unsafe_op, [rvalue, index], line)
780+
elif is_tuple_rprimitive(rvalue.type):
781+
index = Integer(i, c_pyssize_t_rprimitive)
782+
item_value = self.call_c(tuple_get_item_unsafe_op, [rvalue, index], line)
778783
else:
784+
index = self.builder.load_int(i)
779785
item_value = self.builder.gen_method_call(
780786
rvalue, "__getitem__", [index], item.type, line
781787
)

mypyc/irbuild/for_helpers.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
RTuple,
4545
RType,
4646
bool_rprimitive,
47+
c_pyssize_t_rprimitive,
4748
int_rprimitive,
4849
is_dict_rprimitive,
4950
is_fixed_width_rtype,
@@ -75,6 +76,7 @@
7576
from mypyc.primitives.misc_ops import stop_async_iteration_op
7677
from mypyc.primitives.registry import CFunctionDescription
7778
from mypyc.primitives.set_ops import set_add_op
79+
from mypyc.primitives.tuple_ops import tuple_get_item_unsafe_op
7880

7981
GenFunc = Callable[[], None]
8082

@@ -586,7 +588,9 @@ def gen_cleanup(self) -> None:
586588

587589
def load_len(self, expr: Value | AssignmentTarget) -> Value:
588590
"""A helper to get collection length, used by several subclasses."""
589-
return self.builder.builder.builtin_len(self.builder.read(expr, self.line), self.line)
591+
return self.builder.builder.builtin_len(
592+
self.builder.read(expr, self.line), self.line, use_pyssize_t=True
593+
)
590594

591595

592596
class ForIterable(ForGenerator):
@@ -766,6 +770,8 @@ def unsafe_index(builder: IRBuilder, target: Value, index: Value, line: int) ->
766770
# so we just check manually.
767771
if is_list_rprimitive(target.type):
768772
return builder.primitive_op(list_get_item_unsafe_op, [target, index], line)
773+
elif is_tuple_rprimitive(target.type):
774+
return builder.call_c(tuple_get_item_unsafe_op, [target, index], line)
769775
else:
770776
return builder.gen_method_call(target, "__getitem__", [index], None, line)
771777

@@ -784,11 +790,9 @@ def init(self, expr_reg: Value, target_type: RType, reverse: bool) -> None:
784790
# environment class.
785791
self.expr_target = builder.maybe_spill(expr_reg)
786792
if not reverse:
787-
index_reg: Value = Integer(0)
793+
index_reg: Value = Integer(0, c_pyssize_t_rprimitive)
788794
else:
789-
index_reg = builder.binary_op(
790-
self.load_len(self.expr_target), Integer(1), "-", self.line
791-
)
795+
index_reg = builder.builder.int_sub(self.load_len(self.expr_target), 1)
792796
self.index_target = builder.maybe_spill_assignable(index_reg)
793797
self.target_type = target_type
794798

@@ -838,13 +842,7 @@ def gen_step(self) -> None:
838842
builder = self.builder
839843
line = self.line
840844
step = 1 if not self.reverse else -1
841-
add = builder.int_op(
842-
short_int_rprimitive,
843-
builder.read(self.index_target, line),
844-
Integer(step),
845-
IntOp.ADD,
846-
line,
847-
)
845+
add = builder.builder.int_add(builder.read(self.index_target, line), step)
848846
builder.assign(self.index_target, add, line)
849847

850848

mypyc/lib-rt/CPy.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,7 @@ PyObject *CPyList_GetItemShortBorrow(PyObject *list, CPyTagged index);
652652
PyObject *CPyList_GetItemInt64(PyObject *list, int64_t index);
653653
PyObject *CPyList_GetItemInt64Borrow(PyObject *list, int64_t index);
654654
bool CPyList_SetItem(PyObject *list, CPyTagged index, PyObject *value);
655-
bool CPyList_SetItemUnsafe(PyObject *list, CPyTagged index, PyObject *value);
655+
void CPyList_SetItemUnsafe(PyObject *list, Py_ssize_t index, PyObject *value);
656656
bool CPyList_SetItemInt64(PyObject *list, int64_t index, PyObject *value);
657657
PyObject *CPyList_PopLast(PyObject *obj);
658658
PyObject *CPyList_Pop(PyObject *obj, CPyTagged index);
@@ -703,14 +703,13 @@ tuple_T4CIOO CPyDict_NextItem(PyObject *dict_or_iter, CPyTagged offset);
703703
int CPyMapping_Check(PyObject *obj);
704704

705705
// Check that dictionary didn't change size during iteration.
706-
static inline char CPyDict_CheckSize(PyObject *dict, CPyTagged size) {
706+
static inline char CPyDict_CheckSize(PyObject *dict, Py_ssize_t size) {
707707
if (!PyDict_CheckExact(dict)) {
708708
// Dict subclasses will be checked by Python runtime.
709709
return 1;
710710
}
711-
Py_ssize_t py_size = CPyTagged_AsSsize_t(size);
712711
Py_ssize_t dict_size = PyDict_Size(dict);
713-
if (py_size != dict_size) {
712+
if (size != dict_size) {
714713
PyErr_SetString(PyExc_RuntimeError, "dictionary changed size during iteration");
715714
return 0;
716715
}
@@ -783,7 +782,8 @@ bool CPySet_Remove(PyObject *set, PyObject *key);
783782

784783
PyObject *CPySequenceTuple_GetItem(PyObject *tuple, CPyTagged index);
785784
PyObject *CPySequenceTuple_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end);
786-
bool CPySequenceTuple_SetItemUnsafe(PyObject *tuple, CPyTagged index, PyObject *value);
785+
PyObject *CPySequenceTuple_GetItemUnsafe(PyObject *tuple, Py_ssize_t index);
786+
void CPySequenceTuple_SetItemUnsafe(PyObject *tuple, Py_ssize_t index, PyObject *value);
787787

788788

789789
// Exception operations

mypyc/lib-rt/list_ops.c

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -231,15 +231,8 @@ bool CPyList_SetItemInt64(PyObject *list, int64_t index, PyObject *value) {
231231
}
232232

233233
// This function should only be used to fill in brand new lists.
234-
bool CPyList_SetItemUnsafe(PyObject *list, CPyTagged index, PyObject *value) {
235-
if (CPyTagged_CheckShort(index)) {
236-
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
237-
PyList_SET_ITEM(list, n, value);
238-
return true;
239-
} else {
240-
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
241-
return false;
242-
}
234+
void CPyList_SetItemUnsafe(PyObject *list, Py_ssize_t index, PyObject *value) {
235+
PyList_SET_ITEM(list, index, value);
243236
}
244237

245238
PyObject *CPyList_PopLast(PyObject *obj)

mypyc/lib-rt/tuple_ops.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,17 @@ PyObject *CPySequenceTuple_GetSlice(PyObject *obj, CPyTagged start, CPyTagged en
4646
return CPyObject_GetSlice(obj, start, end);
4747
}
4848

49+
// No error checking
50+
PyObject *CPySequenceTuple_GetItemUnsafe(PyObject *tuple, Py_ssize_t index)
51+
{
52+
PyObject *result = PyTuple_GET_ITEM(tuple, index);
53+
Py_INCREF(result);
54+
return result;
55+
}
56+
4957
// PyTuple_SET_ITEM does no error checking,
5058
// and should only be used to fill in brand new tuples.
51-
bool CPySequenceTuple_SetItemUnsafe(PyObject *tuple, CPyTagged index, PyObject *value)
59+
void CPySequenceTuple_SetItemUnsafe(PyObject *tuple, Py_ssize_t index, PyObject *value)
5260
{
53-
if (CPyTagged_CheckShort(index)) {
54-
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
55-
PyTuple_SET_ITEM(tuple, n, value);
56-
return true;
57-
} else {
58-
PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
59-
return false;
60-
}
61+
PyTuple_SET_ITEM(tuple, index, value);
6162
}

mypyc/primitives/dict_ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@
289289

290290
# check that len(dict) == const during iteration
291291
dict_check_size_op = custom_op(
292-
arg_types=[dict_rprimitive, int_rprimitive],
292+
arg_types=[dict_rprimitive, c_pyssize_t_rprimitive],
293293
return_type=bit_rprimitive,
294294
c_function_name="CPyDict_CheckSize",
295295
error_kind=ERR_FALSE,

mypyc/primitives/list_ops.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
object_rprimitive,
1414
pointer_rprimitive,
1515
short_int_rprimitive,
16+
void_rtype,
1617
)
1718
from mypyc.primitives.registry import (
1819
ERR_NEG_INT,
@@ -154,7 +155,7 @@
154155
# that is in-bounds for the list.
155156
list_get_item_unsafe_op = custom_primitive_op(
156157
name="list_get_item_unsafe",
157-
arg_types=[list_rprimitive, short_int_rprimitive],
158+
arg_types=[list_rprimitive, c_pyssize_t_rprimitive],
158159
return_type=object_rprimitive,
159160
error_kind=ERR_NEVER,
160161
)
@@ -183,10 +184,10 @@
183184
# PyList_SET_ITEM does no error checking,
184185
# and should only be used to fill in brand new lists.
185186
new_list_set_item_op = custom_op(
186-
arg_types=[list_rprimitive, int_rprimitive, object_rprimitive],
187-
return_type=bit_rprimitive,
187+
arg_types=[list_rprimitive, c_pyssize_t_rprimitive, object_rprimitive],
188+
return_type=void_rtype,
188189
c_function_name="CPyList_SetItemUnsafe",
189-
error_kind=ERR_FALSE,
190+
error_kind=ERR_NEVER,
190191
steals=[False, False, True],
191192
)
192193

mypyc/primitives/tuple_ops.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66

77
from __future__ import annotations
88

9-
from mypyc.ir.ops import ERR_FALSE, ERR_MAGIC
9+
from mypyc.ir.ops import ERR_MAGIC, ERR_NEVER
1010
from mypyc.ir.rtypes import (
11-
bit_rprimitive,
1211
c_pyssize_t_rprimitive,
1312
int_rprimitive,
1413
list_rprimitive,
1514
object_rprimitive,
1615
tuple_rprimitive,
16+
void_rtype,
1717
)
1818
from mypyc.primitives.registry import binary_op, custom_op, function_op, load_address_op, method_op
1919

@@ -29,6 +29,15 @@
2929
error_kind=ERR_MAGIC,
3030
)
3131

32+
# This is unsafe because it assumes that the index is a non-negative integer
33+
# that is in-bounds for the tuple.
34+
tuple_get_item_unsafe_op = custom_op(
35+
arg_types=[tuple_rprimitive, c_pyssize_t_rprimitive],
36+
return_type=object_rprimitive,
37+
c_function_name="CPySequenceTuple_GetItemUnsafe",
38+
error_kind=ERR_NEVER,
39+
)
40+
3241
# Construct a boxed tuple from items: (item1, item2, ...)
3342
new_tuple_op = custom_op(
3443
arg_types=[c_pyssize_t_rprimitive],
@@ -48,10 +57,10 @@
4857
# PyTuple_SET_ITEM does no error checking,
4958
# and should only be used to fill in brand new tuples.
5059
new_tuple_set_item_op = custom_op(
51-
arg_types=[tuple_rprimitive, int_rprimitive, object_rprimitive],
52-
return_type=bit_rprimitive,
60+
arg_types=[tuple_rprimitive, c_pyssize_t_rprimitive, object_rprimitive],
61+
return_type=void_rtype,
5362
c_function_name="CPySequenceTuple_SetItemUnsafe",
54-
error_kind=ERR_FALSE,
63+
error_kind=ERR_NEVER,
5564
steals=[False, False, True],
5665
)
5766

0 commit comments

Comments
 (0)