Skip to content

Commit d4d53e9

Browse files
committed
py/emitnative: Access qstr values using indirection table qstr_table.
This changes the native emitter to access qstr values using the qstr indirection table qstr_table, but only when generating native code that will be saved to a .mpy file. This makes the resulting native code fully static, ie it does not require any fix-ups or rewriting when it is imported. The performance of native code is more or less unchanged. Benchmark results on PYBv1.0 (using --via-mpy and --emit native) are: N=100 M=100 baseline -> this-commit diff diff% (error%) bm_chaos.py 407.16 -> 411.85 : +4.69 = +1.152% (+/-0.01%) bm_fannkuch.py 100.89 -> 101.20 : +0.31 = +0.307% (+/-0.01%) bm_fft.py 3521.17 -> 3441.72 : -79.45 = -2.256% (+/-0.00%) bm_float.py 6707.29 -> 6644.83 : -62.46 = -0.931% (+/-0.00%) bm_hexiom.py 55.91 -> 55.41 : -0.50 = -0.894% (+/-0.00%) bm_nqueens.py 5343.54 -> 5326.17 : -17.37 = -0.325% (+/-0.00%) bm_pidigits.py 603.89 -> 632.79 : +28.90 = +4.786% (+/-0.33%) core_qstr.py 64.18 -> 64.09 : -0.09 = -0.140% (+/-0.01%) core_yield_from.py 313.61 -> 311.11 : -2.50 = -0.797% (+/-0.03%) misc_aes.py 654.29 -> 659.75 : +5.46 = +0.834% (+/-0.02%) misc_mandel.py 4205.10 -> 4272.08 : +66.98 = +1.593% (+/-0.01%) misc_pystone.py 3077.79 -> 3128.39 : +50.60 = +1.644% (+/-0.01%) misc_raytrace.py 388.45 -> 393.71 : +5.26 = +1.354% (+/-0.01%) viper_call0.py 576.83 -> 566.76 : -10.07 = -1.746% (+/-0.05%) viper_call1a.py 550.39 -> 540.12 : -10.27 = -1.866% (+/-0.11%) viper_call1b.py 438.32 -> 432.09 : -6.23 = -1.421% (+/-0.11%) viper_call1c.py 442.96 -> 436.11 : -6.85 = -1.546% (+/-0.08%) viper_call2a.py 536.31 -> 527.37 : -8.94 = -1.667% (+/-0.04%) viper_call2b.py 378.99 -> 377.50 : -1.49 = -0.393% (+/-0.08%) Signed-off-by: Damien George <[email protected]>
1 parent 94955e8 commit d4d53e9

12 files changed

+75
-186
lines changed

py/compile.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -3323,7 +3323,7 @@ STATIC void compile_scope_inline_asm(compiler_t *comp, scope_t *scope, pass_kind
33233323
NULL,
33243324
#if MICROPY_PERSISTENT_CODE_SAVE
33253325
0,
3326-
0, 0, NULL,
3326+
0,
33273327
#endif
33283328
0, comp->scope_cur->num_pos_args, type_sig);
33293329
}

py/emitglue.c

-3
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, void
101101
#if MICROPY_PERSISTENT_CODE_SAVE
102102
size_t n_children,
103103
uint16_t prelude_offset,
104-
uint16_t n_qstr, mp_qstr_link_entry_t *qstr_link,
105104
#endif
106105
mp_uint_t scope_flags, mp_uint_t n_pos_args, mp_uint_t type_sig) {
107106

@@ -144,8 +143,6 @@ void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, void
144143
#if MICROPY_PERSISTENT_CODE_SAVE
145144
rc->n_children = n_children;
146145
rc->prelude_offset = prelude_offset;
147-
rc->n_qstr = n_qstr;
148-
rc->qstr_link = qstr_link;
149146
#endif
150147

151148
// These two entries are only needed for MP_CODE_NATIVE_ASM.

py/emitglue.h

-8
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,6 @@ typedef enum {
4949
MP_CODE_NATIVE_ASM,
5050
} mp_raw_code_kind_t;
5151

52-
typedef struct _mp_qstr_link_entry_t {
53-
uint16_t off;
54-
uint16_t qst;
55-
} mp_qstr_link_entry_t;
56-
5752
// compiled bytecode: instance in RAM, referenced by outer scope, usually freed after first (and only) use
5853
// mpy file: instance in RAM, created when .mpy file is loaded (same comments as above)
5954
// frozen: instance in ROM
@@ -78,8 +73,6 @@ typedef struct _mp_raw_code_t {
7873
#endif
7974
#if MICROPY_EMIT_MACHINE_CODE
8075
uint16_t prelude_offset;
81-
uint16_t n_qstr;
82-
mp_qstr_link_entry_t *qstr_link;
8376
#endif
8477
#endif
8578
#if MICROPY_EMIT_MACHINE_CODE
@@ -104,7 +97,6 @@ void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, void
10497
#if MICROPY_PERSISTENT_CODE_SAVE
10598
size_t n_children,
10699
uint16_t prelude_offset,
107-
uint16_t n_qstr, mp_qstr_link_entry_t *qstr_link,
108100
#endif
109101
mp_uint_t scope_flags, mp_uint_t n_pos_args, mp_uint_t type_sig);
110102

py/emitnarm.c

-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010

1111
// Word indices of REG_LOCAL_x in nlr_buf_t
1212
#define NLR_BUF_IDX_LOCAL_1 (3) // r4
13-
#define NLR_BUF_IDX_LOCAL_2 (4) // r5
14-
#define NLR_BUF_IDX_LOCAL_3 (5) // r6
1513

1614
#define N_ARM (1)
1715
#define EXPORT_FUN(name) emit_native_arm_##name

py/emitnative.c

+64-61
Large diffs are not rendered by default.

py/emitnthumb.c

-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010

1111
// Word indices of REG_LOCAL_x in nlr_buf_t
1212
#define NLR_BUF_IDX_LOCAL_1 (3) // r4
13-
#define NLR_BUF_IDX_LOCAL_2 (4) // r5
14-
#define NLR_BUF_IDX_LOCAL_3 (5) // r6
1513

1614
#define N_THUMB (1)
1715
#define EXPORT_FUN(name) emit_native_thumb_##name

py/emitnx64.c

-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010

1111
// Word indices of REG_LOCAL_x in nlr_buf_t
1212
#define NLR_BUF_IDX_LOCAL_1 (5) // rbx
13-
#define NLR_BUF_IDX_LOCAL_2 (6) // r12
14-
#define NLR_BUF_IDX_LOCAL_3 (7) // r13
1513

1614
#define N_X64 (1)
1715
#define EXPORT_FUN(name) emit_native_x64_##name

py/emitnx86.c

-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111

1212
// Word indices of REG_LOCAL_x in nlr_buf_t
1313
#define NLR_BUF_IDX_LOCAL_1 (5) // ebx
14-
#define NLR_BUF_IDX_LOCAL_2 (7) // esi
15-
#define NLR_BUF_IDX_LOCAL_3 (6) // edi
1614

1715
// x86 needs a table to know how many args a given function has
1816
STATIC byte mp_f_n_args[MP_F_NUMBER_OF] = {

py/emitnxtensa.c

-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010

1111
// Word indices of REG_LOCAL_x in nlr_buf_t
1212
#define NLR_BUF_IDX_LOCAL_1 (8) // a12
13-
#define NLR_BUF_IDX_LOCAL_2 (9) // a13
14-
#define NLR_BUF_IDX_LOCAL_3 (10) // a14
1513

1614
#define N_XTENSA (1)
1715
#define EXPORT_FUN(name) emit_native_xtensa_##name

py/emitnxtensawin.c

-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111

1212
// Word indices of REG_LOCAL_x in nlr_buf_t
1313
#define NLR_BUF_IDX_LOCAL_1 (2 + 4) // a4
14-
#define NLR_BUF_IDX_LOCAL_2 (2 + 5) // a5
15-
#define NLR_BUF_IDX_LOCAL_3 (2 + 6) // a6
1614

1715
#define N_NLR_SETJMP (1)
1816
#define N_XTENSAWIN (1)

py/persistentcode.c

+1-41
Original file line numberDiff line numberDiff line change
@@ -75,38 +75,6 @@ typedef struct _reloc_info_t {
7575
uint8_t *bss;
7676
} reloc_info_t;
7777

78-
#if MICROPY_EMIT_THUMB
79-
STATIC void asm_thumb_rewrite_mov(uint8_t *pc, uint16_t val) {
80-
// high part
81-
*(uint16_t *)pc = (*(uint16_t *)pc & 0xfbf0) | (val >> 1 & 0x0400) | (val >> 12);
82-
// low part
83-
*(uint16_t *)(pc + 2) = (*(uint16_t *)(pc + 2) & 0x0f00) | (val << 4 & 0x7000) | (val & 0x00ff);
84-
85-
}
86-
#endif
87-
88-
STATIC void arch_link_qstr(uint8_t *pc, bool is_obj, qstr qst) {
89-
mp_uint_t val = qst;
90-
if (is_obj) {
91-
val = (mp_uint_t)MP_OBJ_NEW_QSTR(qst);
92-
}
93-
#if MICROPY_EMIT_X86 || MICROPY_EMIT_X64 || MICROPY_EMIT_ARM || MICROPY_EMIT_XTENSA || MICROPY_EMIT_XTENSAWIN
94-
pc[0] = val & 0xff;
95-
pc[1] = (val >> 8) & 0xff;
96-
pc[2] = (val >> 16) & 0xff;
97-
pc[3] = (val >> 24) & 0xff;
98-
#elif MICROPY_EMIT_THUMB
99-
if (is_obj) {
100-
// qstr object, movw and movt
101-
asm_thumb_rewrite_mov(pc, val); // movw
102-
asm_thumb_rewrite_mov(pc + 4, val >> 16); // movt
103-
} else {
104-
// qstr number, movw instruction
105-
asm_thumb_rewrite_mov(pc, val); // movw
106-
}
107-
#endif
108-
}
109-
11078
void mp_native_relocate(void *ri_in, uint8_t *text, uintptr_t reloc_text) {
11179
// Relocate native code
11280
reloc_info_t *ri = ri_in;
@@ -285,9 +253,6 @@ STATIC mp_raw_code_t *load_raw_code(mp_reader_t *reader) {
285253
} else if ((off & 3) == 3) {
286254
// Generic, aligned qstr-object link
287255
*(mp_obj_t *)dest = MP_OBJ_NEW_QSTR(qst);
288-
} else {
289-
// Architecture-specific link
290-
arch_link_qstr(dest, (off & 3) == 2, qst);
291256
}
292257
}
293258
}
@@ -424,7 +389,6 @@ STATIC mp_raw_code_t *load_raw_code(mp_reader_t *reader) {
424389
#if MICROPY_PERSISTENT_CODE_SAVE
425390
n_children,
426391
prelude_offset,
427-
0, NULL,
428392
#endif
429393
native_scope_flags, native_n_pos_args, native_type_sig
430394
);
@@ -605,11 +569,7 @@ STATIC void save_raw_code(mp_print_t *print, const mp_raw_code_t *rc) {
605569
#if MICROPY_EMIT_MACHINE_CODE
606570
if (rc->kind == MP_CODE_NATIVE_PY || rc->kind == MP_CODE_NATIVE_VIPER) {
607571
// Save qstr link table for native code
608-
mp_print_uint(print, rc->n_qstr);
609-
for (size_t i = 0; i < rc->n_qstr; ++i) {
610-
mp_print_uint(print, rc->qstr_link[i].off);
611-
save_qstr(print, rc->qstr_link[i].qst);
612-
}
572+
mp_print_uint(print, 0);
613573
}
614574

615575
if (rc->kind == MP_CODE_NATIVE_PY) {

tools/mpy-tool.py

+9-60
Original file line numberDiff line numberDiff line change
@@ -839,7 +839,7 @@ def freeze_children(self, prelude_ptr=None):
839839
print("};")
840840
print()
841841

842-
def freeze_raw_code(self, prelude_ptr=None, qstr_links=(), type_sig=0):
842+
def freeze_raw_code(self, prelude_ptr=None, type_sig=0):
843843
# Generate mp_raw_code_t.
844844
print("static const mp_raw_code_t raw_code_%s = {" % self.escaped_name)
845845
print(" .kind = %s," % RawCode.code_kind_str[self.code_kind])
@@ -879,8 +879,6 @@ def freeze_raw_code(self, prelude_ptr=None, qstr_links=(), type_sig=0):
879879
print(" #endif")
880880
print(" #if MICROPY_EMIT_MACHINE_CODE")
881881
print(" .prelude_offset = %u," % self.prelude_offset)
882-
print(" .n_qstr = %u," % len(qstr_links))
883-
print(" .qstr_link = NULL,") # TODO
884882
print(" #endif")
885883
print(" #endif")
886884
print(" #if MICROPY_EMIT_MACHINE_CODE")
@@ -1038,47 +1036,6 @@ def disassemble(self):
10381036
ip += sz
10391037
self.disassemble_children()
10401038

1041-
def _asm_thumb_rewrite_mov(self, pc, val):
1042-
print(" (%u & 0xf0) | (%s >> 12)," % (self.fun_data[pc], val), end="")
1043-
print(" (%u & 0xfb) | (%s >> 9 & 0x04)," % (self.fun_data[pc + 1], val), end="")
1044-
print(" (%s & 0xff)," % (val,), end="")
1045-
print(" (%u & 0x07) | (%s >> 4 & 0x70)," % (self.fun_data[pc + 3], val))
1046-
1047-
def _link_qstr(self, pc, kind, qst):
1048-
if kind == 0:
1049-
# Generic 16-bit link
1050-
print(" %s & 0xff, %s >> 8," % (qst, qst))
1051-
return 2
1052-
else:
1053-
# Architecture-specific link
1054-
is_obj = kind == 2
1055-
if is_obj:
1056-
qst = "((uintptr_t)MP_OBJ_NEW_QSTR(%s))" % qst
1057-
if config.native_arch in (
1058-
MP_NATIVE_ARCH_X86,
1059-
MP_NATIVE_ARCH_X64,
1060-
MP_NATIVE_ARCH_ARMV6,
1061-
MP_NATIVE_ARCH_XTENSA,
1062-
MP_NATIVE_ARCH_XTENSAWIN,
1063-
):
1064-
print(
1065-
" %s & 0xff, (%s >> 8) & 0xff, (%s >> 16) & 0xff, %s >> 24,"
1066-
% (qst, qst, qst, qst)
1067-
)
1068-
return 4
1069-
elif MP_NATIVE_ARCH_ARMV6M <= config.native_arch <= MP_NATIVE_ARCH_ARMV7EMDP:
1070-
if is_obj:
1071-
# qstr object, movw and movt
1072-
self._asm_thumb_rewrite_mov(pc, qst)
1073-
self._asm_thumb_rewrite_mov(pc + 4, "(%s >> 16)" % qst)
1074-
return 8
1075-
else:
1076-
# qstr number, movw instruction
1077-
self._asm_thumb_rewrite_mov(pc, qst)
1078-
return 4
1079-
else:
1080-
assert 0
1081-
10821039
def freeze(self):
10831040
if self.scope_flags & ~0x0F:
10841041
raise FreezeError("unable to freeze code with relocations")
@@ -1098,21 +1055,13 @@ def freeze(self):
10981055
i = 0
10991056
qi = 0
11001057
while i < i_top:
1101-
if qi < len(self.qstr_links) and i == self.qstr_links[qi][0]:
1102-
# link qstr
1103-
qi_off, qi_kind, qi_val = self.qstr_links[qi]
1104-
i += self._link_qstr(i, qi_kind, qi_val.qstr_id)
1105-
qi += 1
1106-
else:
1107-
# copy machine code (max 16 bytes)
1108-
i16 = min(i + 16, i_top)
1109-
if qi < len(self.qstr_links):
1110-
i16 = min(i16, self.qstr_links[qi][0])
1111-
print(" ", end="")
1112-
for ii in range(i, i16):
1113-
print(" 0x%02x," % self.fun_data[ii], end="")
1114-
print()
1115-
i = i16
1058+
# copy machine code (max 16 bytes)
1059+
i16 = min(i + 16, i_top)
1060+
print(" ", end="")
1061+
for ii in range(i, i16):
1062+
print(" 0x%02x," % self.fun_data[ii], end="")
1063+
print()
1064+
i = i16
11161065

11171066
print("};")
11181067

@@ -1134,7 +1083,7 @@ def freeze(self):
11341083
print("#endif")
11351084

11361085
self.freeze_children(prelude_ptr)
1137-
self.freeze_raw_code(prelude_ptr, self.qstr_links, self.type_sig)
1086+
self.freeze_raw_code(prelude_ptr, self.type_sig)
11381087

11391088

11401089
class MPYSegment:

0 commit comments

Comments
 (0)