From 8fc9699f3ec487d345b5b5ab9418b2c1c6ea214b Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sat, 10 Jun 2023 15:18:02 +0100 Subject: [PATCH 1/3] [mypyc] Add SetElement op for initializing struct values Also add Undef value type that can currently only used as the operand for SetElement to signify that we are creating a new value instead of modifying an existing value. --- mypyc/analysis/dataflow.py | 7 ++++- mypyc/analysis/ircheck.py | 8 ++++- mypyc/analysis/selfleaks.py | 4 +++ mypyc/codegen/emitfunc.py | 22 +++++++++++++ mypyc/ir/ops.py | 56 +++++++++++++++++++++++++++++++++ mypyc/ir/pprint.py | 9 +++++- mypyc/test/test_emitfunc.py | 18 +++++++++++ mypyc/transform/ir_transform.py | 7 +++++ mypyc/transform/refcount.py | 3 +- 9 files changed, 130 insertions(+), 4 deletions(-) diff --git a/mypyc/analysis/dataflow.py b/mypyc/analysis/dataflow.py index db62ef1700fa..827c70a0eb4d 100644 --- a/mypyc/analysis/dataflow.py +++ b/mypyc/analysis/dataflow.py @@ -45,12 +45,14 @@ RegisterOp, Return, SetAttr, + SetElement, SetMem, Truncate, TupleGet, TupleSet, Unborrow, Unbox, + Undef, Unreachable, Value, ) @@ -272,6 +274,9 @@ def visit_load_mem(self, op: LoadMem) -> GenAndKill[T]: def visit_get_element_ptr(self, op: GetElementPtr) -> GenAndKill[T]: return self.visit_register_op(op) + def visit_set_element(self, op: SetElement) -> GenAndKill[T]: + return self.visit_register_op(op) + def visit_load_address(self, op: LoadAddress) -> GenAndKill[T]: return self.visit_register_op(op) @@ -444,7 +449,7 @@ def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]: def non_trivial_sources(op: Op) -> set[Value]: result = set() for source in op.sources(): - if not isinstance(source, (Integer, Float)): + if not isinstance(source, (Integer, Float, Undef)): result.add(source) return result diff --git a/mypyc/analysis/ircheck.py b/mypyc/analysis/ircheck.py index 88737ac208de..4ad2a52c1036 100644 --- a/mypyc/analysis/ircheck.py +++ b/mypyc/analysis/ircheck.py @@ -17,6 +17,7 @@ ControlOp, DecRef, Extend, + Float, FloatComparisonOp, FloatNeg, FloatOp, @@ -42,12 +43,14 @@ Register, Return, SetAttr, + SetElement, SetMem, Truncate, TupleGet, TupleSet, Unborrow, Unbox, + Undef, Unreachable, Value, ) @@ -148,7 +151,7 @@ def check_op_sources_valid(fn: FuncIR) -> list[FnError]: for block in fn.blocks: for op in block.ops: for source in op.sources(): - if isinstance(source, Integer): + if isinstance(source, (Integer, Float, Undef)): pass elif isinstance(source, Op): if source not in valid_ops: @@ -423,6 +426,9 @@ def visit_set_mem(self, op: SetMem) -> None: def visit_get_element_ptr(self, op: GetElementPtr) -> None: pass + def visit_set_element(self, op: SetElement) -> None: + pass + def visit_load_address(self, op: LoadAddress) -> None: pass diff --git a/mypyc/analysis/selfleaks.py b/mypyc/analysis/selfleaks.py index 9f7e00db78d2..8f46cbe3312b 100644 --- a/mypyc/analysis/selfleaks.py +++ b/mypyc/analysis/selfleaks.py @@ -35,6 +35,7 @@ RegisterOp, Return, SetAttr, + SetElement, SetMem, Truncate, TupleGet, @@ -181,6 +182,9 @@ def visit_load_mem(self, op: LoadMem) -> GenAndKill: def visit_get_element_ptr(self, op: GetElementPtr) -> GenAndKill: return CLEAN + def visit_set_element(self, op: SetElement) -> GenAndKill: + return CLEAN + def visit_load_address(self, op: LoadAddress) -> GenAndKill: return CLEAN diff --git a/mypyc/codegen/emitfunc.py b/mypyc/codegen/emitfunc.py index 3fdd08037d1a..98972b830c18 100644 --- a/mypyc/codegen/emitfunc.py +++ b/mypyc/codegen/emitfunc.py @@ -70,12 +70,14 @@ Register, Return, SetAttr, + SetElement, SetMem, Truncate, TupleGet, TupleSet, Unborrow, Unbox, + Undef, Unreachable, Value, ) @@ -813,6 +815,26 @@ def visit_get_element_ptr(self, op: GetElementPtr) -> None: ) ) + def visit_set_element(self, op: SetElement) -> None: + # TODO: do properly + dest = self.reg(op) + item = self.reg(op.item) + field = op.field + if isinstance(op.src, Undef): + self.emit_line(f"{dest}.{field} = {item};") + else: + src = self.reg(op.src) + # TODO: Support tuples (or use RStruct for tuples) + src_type = op.src.type + assert isinstance(src_type, RStruct), src_type + init_items = [] + for n in src_type.names: + if n != field: + init_items.append(f"{src}.{n}") + else: + init_items.append(item) + self.emit_line(f"{dest} = ({self.ctype(src_type)}) {{ {', '.join(init_items)} }};") + def visit_load_address(self, op: LoadAddress) -> None: typ = op.type dest = self.reg(op) diff --git a/mypyc/ir/ops.py b/mypyc/ir/ops.py index f362b0cca197..fa2b16a01429 100644 --- a/mypyc/ir/ops.py +++ b/mypyc/ir/ops.py @@ -34,6 +34,7 @@ class to enable the new behavior. Sometimes adding a new abstract from mypyc.ir.rtypes import ( RArray, RInstance, + RStruct, RTuple, RType, RVoid, @@ -244,6 +245,24 @@ def __init__(self, value: bytes, line: int = -1) -> None: self.line = line +class Undef(Value): + """An undefined value. + + Use Undef() as the initial value followed by one or more SetElement + ops to initialize a struct. Pseudocode example: + + r0 = set_element undef MyStruct, "field1", f1 + r1 = set_element r0, "field2", f2 + # r1 now has new struct value with two fields set + + Warning: The generated code can use an arbitrary runtime values for + this (likely not explicitly initialized). + """ + + def __init__(self, rtype: RType) -> None: + self.type = rtype + + class Op(Value): """Abstract base class for all IR operations. @@ -1636,6 +1655,39 @@ def accept(self, visitor: OpVisitor[T]) -> T: return visitor.visit_get_element_ptr(self) +@final +class SetElement(RegisterOp): + """Set the value of a struct element. + + This evaluates to a new struct with the changed value. + + Use together with Undef to initialize a fresh struct value + (see Undef for more details). + """ + + error_kind = ERR_NEVER + + def __init__(self, src: Value, field: str, item: Value, line: int = -1) -> None: + super().__init__(line) + assert isinstance(src.type, RStruct), src.type + self.type = src.type + self.src = src + self.item = item + self.field = field + + def sources(self) -> list[Value]: + return [self.src] + + def set_sources(self, new: list[Value]) -> None: + (self.src,) = new + + def stolen(self) -> list[Value]: + return [self.src] + + def accept(self, visitor: OpVisitor[T]) -> T: + return visitor.visit_set_element(self) + + @final class LoadAddress(RegisterOp): """Get the address of a value: result = (type)&src @@ -1908,6 +1960,10 @@ def visit_set_mem(self, op: SetMem) -> T: def visit_get_element_ptr(self, op: GetElementPtr) -> T: raise NotImplementedError + @abstractmethod + def visit_set_element(self, op: SetElement) -> T: + raise NotImplementedError + @abstractmethod def visit_load_address(self, op: LoadAddress) -> T: raise NotImplementedError diff --git a/mypyc/ir/pprint.py b/mypyc/ir/pprint.py index 5bb11cc231cc..2a239a0b4d9d 100644 --- a/mypyc/ir/pprint.py +++ b/mypyc/ir/pprint.py @@ -50,12 +50,14 @@ Register, Return, SetAttr, + SetElement, SetMem, Truncate, TupleGet, TupleSet, Unborrow, Unbox, + Undef, Unreachable, Value, ) @@ -273,6 +275,9 @@ def visit_set_mem(self, op: SetMem) -> str: def visit_get_element_ptr(self, op: GetElementPtr) -> str: return self.format("%r = get_element_ptr %r %s :: %t", op, op.src, op.field, op.src_type) + def visit_set_element(self, op: SetElement) -> str: + return self.format("%r = set_element %r, %s, %r", op, op.src, op.field, op.item) + def visit_load_address(self, op: LoadAddress) -> str: if isinstance(op.src, Register): return self.format("%r = load_address %r", op, op.src) @@ -330,6 +335,8 @@ def format(self, fmt: str, *args: Any) -> str: result.append(repr(arg.value)) elif isinstance(arg, CString): result.append(f"CString({arg.value!r})") + elif isinstance(arg, Undef): + result.append(f"undef {arg.type.name}") else: result.append(self.names[arg]) elif typespec == "d": @@ -486,7 +493,7 @@ def generate_names_for_ir(args: list[Register], blocks: list[BasicBlock]) -> dic continue if isinstance(value, Register) and value.name: name = value.name - elif isinstance(value, (Integer, Float)): + elif isinstance(value, (Integer, Float, Undef)): continue else: name = "r%d" % temp_index diff --git a/mypyc/test/test_emitfunc.py b/mypyc/test/test_emitfunc.py index 6be4875dafa1..6382271cfe94 100644 --- a/mypyc/test/test_emitfunc.py +++ b/mypyc/test/test_emitfunc.py @@ -35,9 +35,11 @@ Register, Return, SetAttr, + SetElement, SetMem, TupleGet, Unbox, + Undef, Unreachable, Value, ) @@ -121,6 +123,11 @@ def add_local(name: str, rtype: RType) -> Register: self.r = add_local("r", RInstance(ir)) self.none = add_local("none", none_rprimitive) + self.struct_type = RStruct( + "Foo", ["b", "x", "y"], [bool_rprimitive, int32_rprimitive, int64_rprimitive] + ) + self.st = add_local("st", self.struct_type) + self.context = EmitterContext(NameGenerator([["mod"]])) def test_goto(self) -> None: @@ -674,6 +681,17 @@ def test_get_element_ptr(self) -> None: GetElementPtr(self.o, r, "i64"), """cpy_r_r0 = (CPyPtr)&((Foo *)cpy_r_o)->i64;""" ) + def test_set_element(self) -> None: + # Use compact syntax when setting the initial element of an undefined value + self.assert_emit( + SetElement(Undef(self.struct_type), "b", self.b), """cpy_r_r0.b = cpy_r_b;""" + ) + # We propagate the unchanged values in subsequent assignments + self.assert_emit( + SetElement(self.st, "x", self.i32), + """cpy_r_r0 = (Foo) { cpy_r_st.b, cpy_r_i32, cpy_r_st.y };""", + ) + def test_load_address(self) -> None: self.assert_emit( LoadAddress(object_rprimitive, "PyDict_Type"), diff --git a/mypyc/transform/ir_transform.py b/mypyc/transform/ir_transform.py index 7834fed39465..bcb6db9b0daf 100644 --- a/mypyc/transform/ir_transform.py +++ b/mypyc/transform/ir_transform.py @@ -39,6 +39,7 @@ RaiseStandardError, Return, SetAttr, + SetElement, SetMem, Truncate, TupleGet, @@ -214,6 +215,9 @@ def visit_set_mem(self, op: SetMem) -> Value | None: def visit_get_element_ptr(self, op: GetElementPtr) -> Value | None: return self.add(op) + def visit_set_element(self, op: SetElement) -> Value | None: + return self.add(op) + def visit_load_address(self, op: LoadAddress) -> Value | None: return self.add(op) @@ -354,6 +358,9 @@ def visit_set_mem(self, op: SetMem) -> None: def visit_get_element_ptr(self, op: GetElementPtr) -> None: op.src = self.fix_op(op.src) + def visit_set_element(self, op: SetElement) -> None: + op.src = self.fix_op(op.src) + def visit_load_address(self, op: LoadAddress) -> None: if isinstance(op.src, LoadStatic): new = self.fix_op(op.src) diff --git a/mypyc/transform/refcount.py b/mypyc/transform/refcount.py index c589918986f0..60daebc415fd 100644 --- a/mypyc/transform/refcount.py +++ b/mypyc/transform/refcount.py @@ -43,6 +43,7 @@ Op, Register, RegisterOp, + Undef, Value, ) @@ -94,7 +95,7 @@ def is_maybe_undefined(post_must_defined: set[Value], src: Value) -> bool: def maybe_append_dec_ref( ops: list[Op], dest: Value, defined: AnalysisDict[Value], key: tuple[BasicBlock, int] ) -> None: - if dest.type.is_refcounted and not isinstance(dest, Integer): + if dest.type.is_refcounted and not isinstance(dest, (Integer, Undef)): ops.append(DecRef(dest, is_xdec=is_maybe_undefined(defined[key], dest))) From 93a99e825226dda4827476a99fa26e61d267619a Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 14 Jul 2025 16:41:11 +0100 Subject: [PATCH 2/3] Tweaks --- mypyc/codegen/emitfunc.py | 9 +++++++-- mypyc/ir/ops.py | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/mypyc/codegen/emitfunc.py b/mypyc/codegen/emitfunc.py index 98972b830c18..9012f072f96b 100644 --- a/mypyc/codegen/emitfunc.py +++ b/mypyc/codegen/emitfunc.py @@ -816,15 +816,20 @@ def visit_get_element_ptr(self, op: GetElementPtr) -> None: ) def visit_set_element(self, op: SetElement) -> None: - # TODO: do properly dest = self.reg(op) item = self.reg(op.item) field = op.field if isinstance(op.src, Undef): + # First assignment to an undefined struct is trivial. self.emit_line(f"{dest}.{field} = {item};") else: + # In the general case create a copy of the struct with a single + # item modified. + # + # TODO: Can we do better if only a subset of fields are initialized? + # TODO: Make this less verbose in the common case + # TODO: Support tuples (or use RStruct for tuples)? src = self.reg(op.src) - # TODO: Support tuples (or use RStruct for tuples) src_type = op.src.type assert isinstance(src_type, RStruct), src_type init_items = [] diff --git a/mypyc/ir/ops.py b/mypyc/ir/ops.py index fa2b16a01429..6e83c9b98be7 100644 --- a/mypyc/ir/ops.py +++ b/mypyc/ir/ops.py @@ -245,6 +245,7 @@ def __init__(self, value: bytes, line: int = -1) -> None: self.line = line +@final class Undef(Value): """An undefined value. From 7243fa536e4465c6887a54de1baf3cd2b3ec2cd7 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Tue, 15 Jul 2025 11:59:26 +0100 Subject: [PATCH 3/3] Clarify docstring --- mypyc/ir/ops.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mypyc/ir/ops.py b/mypyc/ir/ops.py index 6e83c9b98be7..4829dd6a903d 100644 --- a/mypyc/ir/ops.py +++ b/mypyc/ir/ops.py @@ -256,8 +256,9 @@ class Undef(Value): r1 = set_element r0, "field2", f2 # r1 now has new struct value with two fields set - Warning: The generated code can use an arbitrary runtime values for - this (likely not explicitly initialized). + Warning: Always initialize undefined values before using them, + as otherwise the values are garbage. You shouldn't expect that + undefined values are zeroed, in particular. """ def __init__(self, rtype: RType) -> None: