Skip to content

Commit 32f57e4

Browse files
authored
[mypyc] Add SetElement op for initializing struct values (#19437)
Also add Undef value type that can currently only used as the operand for SetElement to signify that we are creating a new value instead of modifying an existing value. A new struct value can be created by starting with Undef and setting each element sequentially. Each operation produces a new struct value, but the temporaries will be optimized away in the later passes (currently by the C compiler, but we could do something more clever here in the future). This is needed to support packed arrays, which are represented as structs. I extracted this from my packed array branch, and it's currently unused outside tests.
1 parent c66417d commit 32f57e4

File tree

9 files changed

+137
-4
lines changed

9 files changed

+137
-4
lines changed

mypyc/analysis/dataflow.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,14 @@
4545
RegisterOp,
4646
Return,
4747
SetAttr,
48+
SetElement,
4849
SetMem,
4950
Truncate,
5051
TupleGet,
5152
TupleSet,
5253
Unborrow,
5354
Unbox,
55+
Undef,
5456
Unreachable,
5557
Value,
5658
)
@@ -272,6 +274,9 @@ def visit_load_mem(self, op: LoadMem) -> GenAndKill[T]:
272274
def visit_get_element_ptr(self, op: GetElementPtr) -> GenAndKill[T]:
273275
return self.visit_register_op(op)
274276

277+
def visit_set_element(self, op: SetElement) -> GenAndKill[T]:
278+
return self.visit_register_op(op)
279+
275280
def visit_load_address(self, op: LoadAddress) -> GenAndKill[T]:
276281
return self.visit_register_op(op)
277282

@@ -444,7 +449,7 @@ def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]:
444449
def non_trivial_sources(op: Op) -> set[Value]:
445450
result = set()
446451
for source in op.sources():
447-
if not isinstance(source, (Integer, Float)):
452+
if not isinstance(source, (Integer, Float, Undef)):
448453
result.add(source)
449454
return result
450455

mypyc/analysis/ircheck.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
ControlOp,
1818
DecRef,
1919
Extend,
20+
Float,
2021
FloatComparisonOp,
2122
FloatNeg,
2223
FloatOp,
@@ -42,12 +43,14 @@
4243
Register,
4344
Return,
4445
SetAttr,
46+
SetElement,
4547
SetMem,
4648
Truncate,
4749
TupleGet,
4850
TupleSet,
4951
Unborrow,
5052
Unbox,
53+
Undef,
5154
Unreachable,
5255
Value,
5356
)
@@ -148,7 +151,7 @@ def check_op_sources_valid(fn: FuncIR) -> list[FnError]:
148151
for block in fn.blocks:
149152
for op in block.ops:
150153
for source in op.sources():
151-
if isinstance(source, Integer):
154+
if isinstance(source, (Integer, Float, Undef)):
152155
pass
153156
elif isinstance(source, Op):
154157
if source not in valid_ops:
@@ -423,6 +426,9 @@ def visit_set_mem(self, op: SetMem) -> None:
423426
def visit_get_element_ptr(self, op: GetElementPtr) -> None:
424427
pass
425428

429+
def visit_set_element(self, op: SetElement) -> None:
430+
pass
431+
426432
def visit_load_address(self, op: LoadAddress) -> None:
427433
pass
428434

mypyc/analysis/selfleaks.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
RegisterOp,
3636
Return,
3737
SetAttr,
38+
SetElement,
3839
SetMem,
3940
Truncate,
4041
TupleGet,
@@ -181,6 +182,9 @@ def visit_load_mem(self, op: LoadMem) -> GenAndKill:
181182
def visit_get_element_ptr(self, op: GetElementPtr) -> GenAndKill:
182183
return CLEAN
183184

185+
def visit_set_element(self, op: SetElement) -> GenAndKill:
186+
return CLEAN
187+
184188
def visit_load_address(self, op: LoadAddress) -> GenAndKill:
185189
return CLEAN
186190

mypyc/codegen/emitfunc.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,14 @@
7070
Register,
7171
Return,
7272
SetAttr,
73+
SetElement,
7374
SetMem,
7475
Truncate,
7576
TupleGet,
7677
TupleSet,
7778
Unborrow,
7879
Unbox,
80+
Undef,
7981
Unreachable,
8082
Value,
8183
)
@@ -813,6 +815,31 @@ def visit_get_element_ptr(self, op: GetElementPtr) -> None:
813815
)
814816
)
815817

818+
def visit_set_element(self, op: SetElement) -> None:
819+
dest = self.reg(op)
820+
item = self.reg(op.item)
821+
field = op.field
822+
if isinstance(op.src, Undef):
823+
# First assignment to an undefined struct is trivial.
824+
self.emit_line(f"{dest}.{field} = {item};")
825+
else:
826+
# In the general case create a copy of the struct with a single
827+
# item modified.
828+
#
829+
# TODO: Can we do better if only a subset of fields are initialized?
830+
# TODO: Make this less verbose in the common case
831+
# TODO: Support tuples (or use RStruct for tuples)?
832+
src = self.reg(op.src)
833+
src_type = op.src.type
834+
assert isinstance(src_type, RStruct), src_type
835+
init_items = []
836+
for n in src_type.names:
837+
if n != field:
838+
init_items.append(f"{src}.{n}")
839+
else:
840+
init_items.append(item)
841+
self.emit_line(f"{dest} = ({self.ctype(src_type)}) {{ {', '.join(init_items)} }};")
842+
816843
def visit_load_address(self, op: LoadAddress) -> None:
817844
typ = op.type
818845
dest = self.reg(op)

mypyc/ir/ops.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class to enable the new behavior. Sometimes adding a new abstract
3434
from mypyc.ir.rtypes import (
3535
RArray,
3636
RInstance,
37+
RStruct,
3738
RTuple,
3839
RType,
3940
RVoid,
@@ -244,6 +245,26 @@ def __init__(self, value: bytes, line: int = -1) -> None:
244245
self.line = line
245246

246247

248+
@final
249+
class Undef(Value):
250+
"""An undefined value.
251+
252+
Use Undef() as the initial value followed by one or more SetElement
253+
ops to initialize a struct. Pseudocode example:
254+
255+
r0 = set_element undef MyStruct, "field1", f1
256+
r1 = set_element r0, "field2", f2
257+
# r1 now has new struct value with two fields set
258+
259+
Warning: Always initialize undefined values before using them,
260+
as otherwise the values are garbage. You shouldn't expect that
261+
undefined values are zeroed, in particular.
262+
"""
263+
264+
def __init__(self, rtype: RType) -> None:
265+
self.type = rtype
266+
267+
247268
class Op(Value):
248269
"""Abstract base class for all IR operations.
249270
@@ -1636,6 +1657,39 @@ def accept(self, visitor: OpVisitor[T]) -> T:
16361657
return visitor.visit_get_element_ptr(self)
16371658

16381659

1660+
@final
1661+
class SetElement(RegisterOp):
1662+
"""Set the value of a struct element.
1663+
1664+
This evaluates to a new struct with the changed value.
1665+
1666+
Use together with Undef to initialize a fresh struct value
1667+
(see Undef for more details).
1668+
"""
1669+
1670+
error_kind = ERR_NEVER
1671+
1672+
def __init__(self, src: Value, field: str, item: Value, line: int = -1) -> None:
1673+
super().__init__(line)
1674+
assert isinstance(src.type, RStruct), src.type
1675+
self.type = src.type
1676+
self.src = src
1677+
self.item = item
1678+
self.field = field
1679+
1680+
def sources(self) -> list[Value]:
1681+
return [self.src]
1682+
1683+
def set_sources(self, new: list[Value]) -> None:
1684+
(self.src,) = new
1685+
1686+
def stolen(self) -> list[Value]:
1687+
return [self.src]
1688+
1689+
def accept(self, visitor: OpVisitor[T]) -> T:
1690+
return visitor.visit_set_element(self)
1691+
1692+
16391693
@final
16401694
class LoadAddress(RegisterOp):
16411695
"""Get the address of a value: result = (type)&src
@@ -1908,6 +1962,10 @@ def visit_set_mem(self, op: SetMem) -> T:
19081962
def visit_get_element_ptr(self, op: GetElementPtr) -> T:
19091963
raise NotImplementedError
19101964

1965+
@abstractmethod
1966+
def visit_set_element(self, op: SetElement) -> T:
1967+
raise NotImplementedError
1968+
19111969
@abstractmethod
19121970
def visit_load_address(self, op: LoadAddress) -> T:
19131971
raise NotImplementedError

mypyc/ir/pprint.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,14 @@
5050
Register,
5151
Return,
5252
SetAttr,
53+
SetElement,
5354
SetMem,
5455
Truncate,
5556
TupleGet,
5657
TupleSet,
5758
Unborrow,
5859
Unbox,
60+
Undef,
5961
Unreachable,
6062
Value,
6163
)
@@ -273,6 +275,9 @@ def visit_set_mem(self, op: SetMem) -> str:
273275
def visit_get_element_ptr(self, op: GetElementPtr) -> str:
274276
return self.format("%r = get_element_ptr %r %s :: %t", op, op.src, op.field, op.src_type)
275277

278+
def visit_set_element(self, op: SetElement) -> str:
279+
return self.format("%r = set_element %r, %s, %r", op, op.src, op.field, op.item)
280+
276281
def visit_load_address(self, op: LoadAddress) -> str:
277282
if isinstance(op.src, Register):
278283
return self.format("%r = load_address %r", op, op.src)
@@ -330,6 +335,8 @@ def format(self, fmt: str, *args: Any) -> str:
330335
result.append(repr(arg.value))
331336
elif isinstance(arg, CString):
332337
result.append(f"CString({arg.value!r})")
338+
elif isinstance(arg, Undef):
339+
result.append(f"undef {arg.type.name}")
333340
else:
334341
result.append(self.names[arg])
335342
elif typespec == "d":
@@ -486,7 +493,7 @@ def generate_names_for_ir(args: list[Register], blocks: list[BasicBlock]) -> dic
486493
continue
487494
if isinstance(value, Register) and value.name:
488495
name = value.name
489-
elif isinstance(value, (Integer, Float)):
496+
elif isinstance(value, (Integer, Float, Undef)):
490497
continue
491498
else:
492499
name = "r%d" % temp_index

mypyc/test/test_emitfunc.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,11 @@
3535
Register,
3636
Return,
3737
SetAttr,
38+
SetElement,
3839
SetMem,
3940
TupleGet,
4041
Unbox,
42+
Undef,
4143
Unreachable,
4244
Value,
4345
)
@@ -121,6 +123,11 @@ def add_local(name: str, rtype: RType) -> Register:
121123
self.r = add_local("r", RInstance(ir))
122124
self.none = add_local("none", none_rprimitive)
123125

126+
self.struct_type = RStruct(
127+
"Foo", ["b", "x", "y"], [bool_rprimitive, int32_rprimitive, int64_rprimitive]
128+
)
129+
self.st = add_local("st", self.struct_type)
130+
124131
self.context = EmitterContext(NameGenerator([["mod"]]))
125132

126133
def test_goto(self) -> None:
@@ -674,6 +681,17 @@ def test_get_element_ptr(self) -> None:
674681
GetElementPtr(self.o, r, "i64"), """cpy_r_r0 = (CPyPtr)&((Foo *)cpy_r_o)->i64;"""
675682
)
676683

684+
def test_set_element(self) -> None:
685+
# Use compact syntax when setting the initial element of an undefined value
686+
self.assert_emit(
687+
SetElement(Undef(self.struct_type), "b", self.b), """cpy_r_r0.b = cpy_r_b;"""
688+
)
689+
# We propagate the unchanged values in subsequent assignments
690+
self.assert_emit(
691+
SetElement(self.st, "x", self.i32),
692+
"""cpy_r_r0 = (Foo) { cpy_r_st.b, cpy_r_i32, cpy_r_st.y };""",
693+
)
694+
677695
def test_load_address(self) -> None:
678696
self.assert_emit(
679697
LoadAddress(object_rprimitive, "PyDict_Type"),

mypyc/transform/ir_transform.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
RaiseStandardError,
4040
Return,
4141
SetAttr,
42+
SetElement,
4243
SetMem,
4344
Truncate,
4445
TupleGet,
@@ -214,6 +215,9 @@ def visit_set_mem(self, op: SetMem) -> Value | None:
214215
def visit_get_element_ptr(self, op: GetElementPtr) -> Value | None:
215216
return self.add(op)
216217

218+
def visit_set_element(self, op: SetElement) -> Value | None:
219+
return self.add(op)
220+
217221
def visit_load_address(self, op: LoadAddress) -> Value | None:
218222
return self.add(op)
219223

@@ -354,6 +358,9 @@ def visit_set_mem(self, op: SetMem) -> None:
354358
def visit_get_element_ptr(self, op: GetElementPtr) -> None:
355359
op.src = self.fix_op(op.src)
356360

361+
def visit_set_element(self, op: SetElement) -> None:
362+
op.src = self.fix_op(op.src)
363+
357364
def visit_load_address(self, op: LoadAddress) -> None:
358365
if isinstance(op.src, LoadStatic):
359366
new = self.fix_op(op.src)

mypyc/transform/refcount.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
Op,
4444
Register,
4545
RegisterOp,
46+
Undef,
4647
Value,
4748
)
4849

@@ -94,7 +95,7 @@ def is_maybe_undefined(post_must_defined: set[Value], src: Value) -> bool:
9495
def maybe_append_dec_ref(
9596
ops: list[Op], dest: Value, defined: AnalysisDict[Value], key: tuple[BasicBlock, int]
9697
) -> None:
97-
if dest.type.is_refcounted and not isinstance(dest, Integer):
98+
if dest.type.is_refcounted and not isinstance(dest, (Integer, Undef)):
9899
ops.append(DecRef(dest, is_xdec=is_maybe_undefined(defined[key], dest)))
99100

100101

0 commit comments

Comments
 (0)