From e3c70a7d813ec7e3226510acedd64fc96021d4b0 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 10 Mar 2025 00:05:08 +0100 Subject: [PATCH 01/73] macOS: Fix support for Apple hardened runtime. Reported by Christian Clason. #1334 --- src/lj_mcode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 43694226a6..d8fa165e1d 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -138,7 +138,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz) static int mcode_setprot(void *p, size_t sz, int prot) { #if MCMAP_CREATE - pthread_jit_write_protect_np((prot & PROC_EXEC)); + pthread_jit_write_protect_np((prot & PROT_EXEC)); return 0; #else return mprotect(p, sz, prot); From 4f2bb199fe7138247e0b075c886c9e9197cf0271 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 10 Mar 2025 02:53:20 +0100 Subject: [PATCH 02/73] macOS: Fix Apple hardened runtime support and put behind build option. Reported by vanc. #1334 --- src/lj_ccallback.c | 2 +- src/lj_mcode.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index d93dbc6457..7f08f0a848 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -263,7 +263,7 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) #endif /* Check for macOS hardened runtime. */ -#if LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000 +#if defined(LUAJIT_ENABLE_OSX_HRT) && LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000 #include #define CCMAP_CREATE MAP_JIT #else diff --git a/src/lj_mcode.c b/src/lj_mcode.c index d8fa165e1d..2b8ac2df58 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -99,7 +99,7 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) #endif /* Check for macOS hardened runtime. */ -#if LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000 +#if defined(LUAJIT_ENABLE_OSX_HRT) && LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000 #include #define MCMAP_CREATE MAP_JIT #else @@ -111,6 +111,8 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) #define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC) #ifdef PROT_MPROTECT #define MCPROT_CREATE (PROT_MPROTECT(MCPROT_RWX)) +#elif MCMAP_CREATE +#define MCPROT_CREATE PROT_EXEC #else #define MCPROT_CREATE 0 #endif From 84cb21ffaf648b472ff3884556e2c413e8abe179 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 10 Mar 2025 02:56:07 +0100 Subject: [PATCH 03/73] REVERT: Change handling of nil value markers in template tables. --- src/lj_bcread.c | 10 ++++------ src/lj_bcwrite.c | 8 +++----- src/lj_opt_fold.c | 6 ++---- src/lj_opt_mem.c | 4 +--- src/lj_parse.c | 20 +++++++++++++++----- src/lj_tab.c | 1 - 6 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/lj_bcread.c b/src/lj_bcread.c index 37e909b391..ee7d7c1870 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -179,7 +179,7 @@ static const void *bcread_varinfo(GCproto *pt) } /* Read a single constant key/value of a template table. */ -static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t) +static void bcread_ktabk(LexState *ls, TValue *o) { MSize tp = bcread_uleb128(ls); if (tp >= BCDUMP_KTAB_STR) { @@ -191,8 +191,6 @@ static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t) } else if (tp == BCDUMP_KTAB_NUM) { o->u32.lo = bcread_uleb128(ls); o->u32.hi = bcread_uleb128(ls); - } else if (tp == BCDUMP_KTAB_NIL) { /* Restore nil value marker. */ - settabV(ls->L, o, t); } else { lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp); setpriV(o, ~tp); @@ -209,15 +207,15 @@ static GCtab *bcread_ktab(LexState *ls) MSize i; TValue *o = tvref(t->array); for (i = 0; i < narray; i++, o++) - bcread_ktabk(ls, o, t); + bcread_ktabk(ls, o); } if (nhash) { /* Read hash entries. */ MSize i; for (i = 0; i < nhash; i++) { TValue key; - bcread_ktabk(ls, &key, t); + bcread_ktabk(ls, &key); lj_assertLS(!tvisnil(&key), "nil key"); - bcread_ktabk(ls, lj_tab_set(ls->L, t, &key), t); + bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); } } return t; diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index ec6f13c8d5..de200ef4ad 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -71,8 +71,6 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) *p++ = BCDUMP_KTAB_NUM; p = lj_strfmt_wuleb128(p, o->u32.lo); p = lj_strfmt_wuleb128(p, o->u32.hi); - } else if (tvistab(o)) { /* Write the nil value marker as a nil. */ - *p++ = BCDUMP_KTAB_NIL; } else { lj_assertBCW(tvispri(o), "unhandled type %d", itype(o)); *p++ = BCDUMP_KTAB_NIL+~itype(o); @@ -135,7 +133,7 @@ static void bcwrite_ktab_sorted_hash(BCWriteCtx *ctx, Node *node, MSize nhash) TValue **heap = ctx->heap; MSize i = nhash; for (;; node--) { /* Build heap. */ - if (!tvisnil(&node->val)) { + if (!tvisnil(&node->key)) { bcwrite_ktabk_heap_insert(heap, --i, nhash, &node->key); if (i == 0) break; } @@ -165,7 +163,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) MSize i, hmask = t->hmask; Node *node = noderef(t->node); for (i = 0; i <= hmask; i++) - nhash += !tvisnil(&node[i].val); + nhash += !tvisnil(&node[i].key); } /* Write number of array slots and hash slots. */ p = lj_strfmt_wuleb128(p, narray); @@ -186,7 +184,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) } else { MSize i = nhash; for (;; node--) - if (!tvisnil(&node->val)) { + if (!tvisnil(&node->key)) { bcwrite_ktabk(ctx, &node->key, 0); bcwrite_ktabk(ctx, &node->val, 1); if (--i == 0) break; diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 6fdf45663f..36aacebb03 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -2217,11 +2217,9 @@ LJFOLD(HREF TDUP KNUM) LJFOLDF(fwd_href_tdup) { TValue keyv; - cTValue *val; lj_ir_kvalue(J->L, &keyv, fright); - val = lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv); - /* Check for either nil or the nil value marker in the template table. */ - if ((tvisnil(val) || tvistab(val)) && lj_opt_fwd_href_nokey(J)) + if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) && + lj_opt_fwd_href_nokey(J)) return lj_ir_kkptr(J, niltvg(J2G(J))); return NEXTFOLD; } diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 6f956b37e9..8cacfcfef9 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -233,9 +233,7 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) return lj_ir_knum_u64(J, tv->u64); else if (tvisint(tv)) return lj_ir_kint(J, intV(tv)); - else if (tvistab(tv)) /* Template table nil value marker. */ - return TREF_NIL; - else if (tvisstr(tv)) + else if (tvisgcv(tv)) return lj_ir_kstr(J, strV(tv)); } /* Othwerwise: don't intern as a constant. */ diff --git a/src/lj_parse.c b/src/lj_parse.c index f41163804a..7009759808 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -1725,7 +1725,7 @@ static void expr_table(LexState *ls, ExpDesc *e) FuncState *fs = ls->fs; BCLine line = ls->linenumber; GCtab *t = NULL; - int vcall = 0, needarr = 0; + int vcall = 0, needarr = 0, fixt = 0; uint32_t narr = 1; /* First array index. */ uint32_t nhash = 0; /* Number of hash entries. */ BCReg freg = fs->freereg; @@ -1769,10 +1769,9 @@ static void expr_table(LexState *ls, ExpDesc *e) lj_gc_anybarriert(fs->L, t); if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */ expr_kvalue(fs, v, &val); - /* Mark nil value with table value itself to preserve the key. */ - if (key.k == VKSTR && tvisnil(v)) settabV(fs->L, v, t); - } else { /* Preserve the key for the following non-const store. */ - settabV(fs->L, v, t); + } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */ + settabV(fs->L, v, t); /* Preserve key with table itself as value. */ + fixt = 1; /* Fix this later, after all resizes. */ goto nonconst; } } else { @@ -1814,6 +1813,17 @@ static void expr_table(LexState *ls, ExpDesc *e) } else { if (needarr && t->asize < narr) lj_tab_reasize(fs->L, t, narr-1); + if (fixt) { /* Fix value for dummy keys in template table. */ + Node *node = noderef(t->node); + uint32_t i, hmask = t->hmask; + for (i = 0; i <= hmask; i++) { + Node *n = &node[i]; + if (tvistab(&n->val)) { + lj_assertFS(tabV(&n->val) == t, "bad dummy key in template table"); + setnilV(&n->val); /* Turn value into nil. */ + } + } + } lj_gc_check(fs->L); } } diff --git a/src/lj_tab.c b/src/lj_tab.c index 62e336111a..2d08055206 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -194,7 +194,6 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) Node *next = nextnode(kn); /* Don't use copyTV here, since it asserts on a copy of a dead key. */ n->val = kn->val; n->key = kn->key; - if (tvistab(&n->val)) setnilV(&n->val); /* Replace nil value marker. */ setmref(n->next, next == NULL? next : (Node *)((char *)next + d)); } } From 538a82133ad6fddfd0ca64de167c4aca3bc1a2da Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 11 Mar 2025 23:04:30 +0100 Subject: [PATCH 04/73] Change handling of nil value markers in template tables. Reported by Bernhard M. Wiedemann. #1348 #1155 Fixes from Peter Cawley, Christian Clason, Lewis Russell. --- src/lj_bcread.c | 10 ++++++---- src/lj_bcwrite.c | 8 +++++--- src/lj_opt_fold.c | 6 ++++-- src/lj_opt_mem.c | 4 +++- src/lj_parse.c | 20 +++++--------------- src/lj_tab.c | 1 + 6 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/lj_bcread.c b/src/lj_bcread.c index ee7d7c1870..5570952208 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -179,7 +179,7 @@ static const void *bcread_varinfo(GCproto *pt) } /* Read a single constant key/value of a template table. */ -static void bcread_ktabk(LexState *ls, TValue *o) +static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t) { MSize tp = bcread_uleb128(ls); if (tp >= BCDUMP_KTAB_STR) { @@ -191,6 +191,8 @@ static void bcread_ktabk(LexState *ls, TValue *o) } else if (tp == BCDUMP_KTAB_NUM) { o->u32.lo = bcread_uleb128(ls); o->u32.hi = bcread_uleb128(ls); + } else if (t && tp == BCDUMP_KTAB_NIL) { /* Restore nil value marker. */ + settabV(ls->L, o, t); } else { lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp); setpriV(o, ~tp); @@ -207,15 +209,15 @@ static GCtab *bcread_ktab(LexState *ls) MSize i; TValue *o = tvref(t->array); for (i = 0; i < narray; i++, o++) - bcread_ktabk(ls, o); + bcread_ktabk(ls, o, NULL); } if (nhash) { /* Read hash entries. */ MSize i; for (i = 0; i < nhash; i++) { TValue key; - bcread_ktabk(ls, &key); + bcread_ktabk(ls, &key, NULL); lj_assertLS(!tvisnil(&key), "nil key"); - bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); + bcread_ktabk(ls, lj_tab_set(ls->L, t, &key), t); } } return t; diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index de200ef4ad..ec6f13c8d5 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -71,6 +71,8 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) *p++ = BCDUMP_KTAB_NUM; p = lj_strfmt_wuleb128(p, o->u32.lo); p = lj_strfmt_wuleb128(p, o->u32.hi); + } else if (tvistab(o)) { /* Write the nil value marker as a nil. */ + *p++ = BCDUMP_KTAB_NIL; } else { lj_assertBCW(tvispri(o), "unhandled type %d", itype(o)); *p++ = BCDUMP_KTAB_NIL+~itype(o); @@ -133,7 +135,7 @@ static void bcwrite_ktab_sorted_hash(BCWriteCtx *ctx, Node *node, MSize nhash) TValue **heap = ctx->heap; MSize i = nhash; for (;; node--) { /* Build heap. */ - if (!tvisnil(&node->key)) { + if (!tvisnil(&node->val)) { bcwrite_ktabk_heap_insert(heap, --i, nhash, &node->key); if (i == 0) break; } @@ -163,7 +165,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) MSize i, hmask = t->hmask; Node *node = noderef(t->node); for (i = 0; i <= hmask; i++) - nhash += !tvisnil(&node[i].key); + nhash += !tvisnil(&node[i].val); } /* Write number of array slots and hash slots. */ p = lj_strfmt_wuleb128(p, narray); @@ -184,7 +186,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) } else { MSize i = nhash; for (;; node--) - if (!tvisnil(&node->key)) { + if (!tvisnil(&node->val)) { bcwrite_ktabk(ctx, &node->key, 0); bcwrite_ktabk(ctx, &node->val, 1); if (--i == 0) break; diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 36aacebb03..6fdf45663f 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -2217,9 +2217,11 @@ LJFOLD(HREF TDUP KNUM) LJFOLDF(fwd_href_tdup) { TValue keyv; + cTValue *val; lj_ir_kvalue(J->L, &keyv, fright); - if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) && - lj_opt_fwd_href_nokey(J)) + val = lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv); + /* Check for either nil or the nil value marker in the template table. */ + if ((tvisnil(val) || tvistab(val)) && lj_opt_fwd_href_nokey(J)) return lj_ir_kkptr(J, niltvg(J2G(J))); return NEXTFOLD; } diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 8cacfcfef9..6f956b37e9 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -233,7 +233,9 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) return lj_ir_knum_u64(J, tv->u64); else if (tvisint(tv)) return lj_ir_kint(J, intV(tv)); - else if (tvisgcv(tv)) + else if (tvistab(tv)) /* Template table nil value marker. */ + return TREF_NIL; + else if (tvisstr(tv)) return lj_ir_kstr(J, strV(tv)); } /* Othwerwise: don't intern as a constant. */ diff --git a/src/lj_parse.c b/src/lj_parse.c index 7009759808..f41163804a 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -1725,7 +1725,7 @@ static void expr_table(LexState *ls, ExpDesc *e) FuncState *fs = ls->fs; BCLine line = ls->linenumber; GCtab *t = NULL; - int vcall = 0, needarr = 0, fixt = 0; + int vcall = 0, needarr = 0; uint32_t narr = 1; /* First array index. */ uint32_t nhash = 0; /* Number of hash entries. */ BCReg freg = fs->freereg; @@ -1769,9 +1769,10 @@ static void expr_table(LexState *ls, ExpDesc *e) lj_gc_anybarriert(fs->L, t); if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */ expr_kvalue(fs, v, &val); - } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */ - settabV(fs->L, v, t); /* Preserve key with table itself as value. */ - fixt = 1; /* Fix this later, after all resizes. */ + /* Mark nil value with table value itself to preserve the key. */ + if (key.k == VKSTR && tvisnil(v)) settabV(fs->L, v, t); + } else { /* Preserve the key for the following non-const store. */ + settabV(fs->L, v, t); goto nonconst; } } else { @@ -1813,17 +1814,6 @@ static void expr_table(LexState *ls, ExpDesc *e) } else { if (needarr && t->asize < narr) lj_tab_reasize(fs->L, t, narr-1); - if (fixt) { /* Fix value for dummy keys in template table. */ - Node *node = noderef(t->node); - uint32_t i, hmask = t->hmask; - for (i = 0; i <= hmask; i++) { - Node *n = &node[i]; - if (tvistab(&n->val)) { - lj_assertFS(tabV(&n->val) == t, "bad dummy key in template table"); - setnilV(&n->val); /* Turn value into nil. */ - } - } - } lj_gc_check(fs->L); } } diff --git a/src/lj_tab.c b/src/lj_tab.c index 2d08055206..62e336111a 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -194,6 +194,7 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) Node *next = nextnode(kn); /* Don't use copyTV here, since it asserts on a copy of a dead key. */ n->val = kn->val; n->key = kn->key; + if (tvistab(&n->val)) setnilV(&n->val); /* Replace nil value marker. */ setmref(n->next, next == NULL? next : (Node *)((char *)next + d)); } } From e9e4b6d302b5e7e4a04a3c7f78cb561a2c156a37 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 7 Apr 2025 09:22:07 +0200 Subject: [PATCH 05/73] Initialize unused value when specializing to cdata metatable. Reported by jakitliang. #1354 --- src/lj_record.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lj_record.c b/src/lj_record.c index 20a7ea36e1..d336f642e4 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -854,7 +854,10 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) return 0; /* No metamethod. */ } /* The cdata metatable is treated as immutable. */ - if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; + if (LJ_HASFFI && tref_iscdata(ix->tab)) { + mix.tab = TREF_NIL; + goto immutable_mt; + } ix->mt = mix.tab = lj_ir_ktab(J, mt); goto nocheck; } From e76bb50d44702f601ec5dd167b03b475ed53860c Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 7 Apr 2025 10:27:40 +0200 Subject: [PATCH 06/73] Fix error generation in load*. Reported by Sergey Kaplun. #1353 --- src/lj_load.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lj_load.c b/src/lj_load.c index 90a61027ad..6c8ae9f154 100644 --- a/src/lj_load.c +++ b/src/lj_load.c @@ -108,8 +108,9 @@ LUALIB_API int luaL_loadfilex(lua_State *L, const char *filename, copyTV(L, L->top-1, L->top); } if (err) { + const char *fname = filename ? filename : "stdin"; L->top--; - lua_pushfstring(L, "cannot read %s: %s", chunkname+1, strerror(err)); + lua_pushfstring(L, "cannot read %s: %s", fname, strerror(err)); return LUA_ERRFILE; } return status; From c262976486e1e007b56380b6a36bfbea5f51d470 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 10 Apr 2025 22:06:47 +0200 Subject: [PATCH 07/73] ARM64: Fix pass-by-value struct calling conventions. Reported by AnthonyK213. #1357 --- src/lj_ccall.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/lj_ccall.c b/src/lj_ccall.c index ae69cd28d1..f003d75674 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -781,17 +781,24 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct) { CTSize sz = ct->size; unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); - while (ct->sib) { + while (ct->sib && n <= 4) { + unsigned int m = 1; CType *sct; ct = ctype_get(cts, ct->sib); if (ctype_isfield(ct->info)) { sct = ctype_rawchild(cts, ct); + if (ctype_isarray(sct->info)) { + CType *cct = ctype_rawchild(cts, sct); + if (!cct->size) continue; + m = sct->size / cct->size; + sct = cct; + } if (ctype_isfp(sct->info)) { r |= sct->size; - if (!isu) n++; else if (n == 0) n = 1; + if (!isu) n += m; else if (n < m) n = m; } else if (ctype_iscomplex(sct->info)) { r |= (sct->size >> 1); - if (!isu) n += 2; else if (n < 2) n = 2; + if (!isu) n += 2*m; else if (n < 2*m) n = 2*m; } else if (ctype_isstruct(sct->info)) { goto substruct; } else { @@ -803,10 +810,11 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct) sct = ctype_rawchild(cts, ct); substruct: if (sct->size > 0) { - unsigned int s = ccall_classify_struct(cts, sct); + unsigned int s = ccall_classify_struct(cts, sct), sn; if (s <= 1) goto noth; r |= (s & 255); - if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); + sn = (s >> 8) * m; + if (!isu) n += sn; else if (n < sn) n = sn; } } } From 51d4c26ec7805d77bfc3470fdf99b73c4ef2faec Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 10 Apr 2025 22:45:38 +0200 Subject: [PATCH 08/73] ARM: Fix soft-float math.min()/math.max(). Reported by Dong Jianqiang. #1356 --- src/lj_asm_arm.h | 2 +- src/vm_arm.dasc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index de435057e1..24deaeae27 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -1927,7 +1927,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { as->curins--; /* Always skip the loword min/max. */ if (uselo || usehi) - asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE); + asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HS : CC_LS); return; #elif LJ_HASFFI } else if ((ir-1)->o == IR_CONV) { diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index ca08fc117e..86bef0cfbc 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -1717,8 +1717,8 @@ static void build_subroutines(BuildCtx *ctx) |.endif |.endmacro | - | math_minmax math_min, gt, pl - | math_minmax math_max, lt, le + | math_minmax math_min, gt, hs + | math_minmax math_max, lt, ls | |//-- String library ----------------------------------------------------- | From eec7a8016c3381b949b5d84583800d05897fa960 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 10 Apr 2025 22:53:50 +0200 Subject: [PATCH 09/73] Prevent Clang UB 'optimization' which breaks integerness checks. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks to Kacper Michajłow. #1351 #1355 --- src/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Makefile b/src/Makefile index 4a56d1e8e5..c83abfa0b6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -302,6 +302,9 @@ endif ifneq (,$(INSTALL_LJLIBD)) TARGET_XCFLAGS+= -DLUA_LJDIR=\"$(INSTALL_LJLIBD)\" endif +ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-strict-float-cast-overflow 2>/dev/null || echo 1)) + TARGET_XCFLAGS+= -fno-strict-float-cast-overflow +endif ############################################################################## # Target system detection. From 9c8eb7cfe10ef5939d9b358a0bd805a610818ba5 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 28 May 2025 20:36:24 +0200 Subject: [PATCH 10/73] FFI: Fix dangling CType references. Reported by Sergey Kaplun. Collateral of #1360 --- src/lj_ccall.c | 19 ++++++++++++------- src/lj_crecord.c | 21 +++++++++++++-------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 9c99bec7fa..5d6bb03d50 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -623,7 +623,9 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) /* -- Common C call handling ---------------------------------------------- */ -/* Infer the destination CTypeID for a vararg argument. */ +/* Infer the destination CTypeID for a vararg argument. +** Note: may reallocate cts->tab and invalidate CType pointers. +*/ CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o) { if (tvisnumber(o)) { @@ -651,13 +653,16 @@ CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o) } } -/* Setup arguments for C call. */ +/* Setup arguments for C call. +** Note: may reallocate cts->tab and invalidate CType pointers. +*/ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CCallState *cc) { int gcsteps = 0; TValue *o, *top = L->top; CTypeID fid; + CTInfo info = ct->info; /* lj_ccall_ctid_vararg may invalidate ct pointer. */ CType *ctr; MSize maxgpr, ngpr = 0, nsp = 0, narg; #if CCALL_NARG_FPR @@ -676,7 +681,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, #if LJ_TARGET_X86 /* x86 has several different calling conventions. */ cc->resx87 = 0; - switch (ctype_cconv(ct->info)) { + switch (ctype_cconv(info)) { case CTCC_FASTCALL: maxgpr = 2; break; case CTCC_THISCALL: maxgpr = 1; break; default: maxgpr = 0; break; @@ -693,7 +698,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, } else if (ctype_iscomplex(ctr->info) || ctype_isstruct(ctr->info)) { /* Preallocate cdata object and anchor it after arguments. */ CTSize sz = ctr->size; - GCcdata *cd = lj_cdata_new(cts, ctype_cid(ct->info), sz); + GCcdata *cd = lj_cdata_new(cts, ctype_cid(info), sz); void *dp = cdataptr(cd); setcdataV(L, L->top++, cd); if (ctype_isstruct(ctr->info)) { @@ -729,7 +734,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, lua_assert(ctype_isfield(ctf->info)); did = ctype_cid(ctf->info); } else { - if (!(ct->info & CTF_VARARG)) + if (!(info & CTF_VARARG)) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too many arguments. */ did = lj_ccall_ctid_vararg(cts, o); /* Infer vararg type. */ isva = 1; @@ -869,11 +874,11 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) ct = ctype_rawchild(cts, ct); } if (ctype_isfunc(ct->info)) { + CTypeID id = ctype_typeid(cts, ct); CCallState cc; int gcsteps, ret; cc.func = (void (*)(void))cdata_getptr(cdataptr(cd), sz); gcsteps = ccall_set_args(L, cts, ct, &cc); - ct = (CType *)((intptr_t)ct-(intptr_t)cts->tab); cts->cb.slot = ~0u; lj_vm_ffi_call(&cc); if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */ @@ -881,7 +886,7 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) setlightudV(&tv, (void *)cc.func); setboolV(lj_tab_set(L, cts->miscmap, &tv), 1); } - ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */ + ct = ctype_get(cts, id); /* Table may have been reallocated. */ gcsteps += ccall_get_results(L, cts, ct, &cc, &ret); #if LJ_TARGET_X86 && LJ_ABI_WIN /* Automatically detect __stdcall and fix up C function declaration. */ diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 216144f32d..f686b35f21 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -988,12 +988,15 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) } } -/* Record argument conversions. */ +/* Record argument conversions. +** Note: may reallocate cts->tab and invalidate CType pointers. +*/ static TRef crec_call_args(jit_State *J, RecordFFData *rd, CTState *cts, CType *ct) { TRef args[CCI_NARGS_MAX]; CTypeID fid; + CTInfo info = ct->info; /* lj_ccall_ctid_vararg may invalidate ct pointer. */ MSize i, n; TRef tr, *base; cTValue *o; @@ -1002,9 +1005,9 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, TRef *arg0 = NULL, *arg1 = NULL; #endif int ngpr = 0; - if (ctype_cconv(ct->info) == CTCC_THISCALL) + if (ctype_cconv(info) == CTCC_THISCALL) ngpr = 1; - else if (ctype_cconv(ct->info) == CTCC_FASTCALL) + else if (ctype_cconv(info) == CTCC_FASTCALL) ngpr = 2; #endif @@ -1029,7 +1032,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, lua_assert(ctype_isfield(ctf->info)); did = ctype_cid(ctf->info); } else { - if (!(ct->info & CTF_VARARG)) + if (!(info & CTF_VARARG)) lj_trace_err(J, LJ_TRERR_NYICALL); /* Too many arguments. */ did = lj_ccall_ctid_vararg(cts, o); /* Infer vararg type. */ } @@ -1112,12 +1115,14 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) { CTState *cts = ctype_ctsG(J2G(J)); CType *ct = ctype_raw(cts, cd->ctypeid); + CTInfo info; IRType tp = IRT_PTR; if (ctype_isptr(ct->info)) { tp = (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32; ct = ctype_rawchild(cts, ct); } - if (ctype_isfunc(ct->info)) { + info = ct->info; /* crec_call_args may invalidate ct pointer. */ + if (ctype_isfunc(info)) { TRef func = emitir(IRT(IR_FLOAD, tp), J->base[0], IRFL_CDATA_PTR); CType *ctr = ctype_rawchild(cts, ct); IRType t = crec_ct2irt(cts, ctr); @@ -1135,9 +1140,9 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) ctype_isenum(ctr->info)) || t == IRT_CDATA) { lj_trace_err(J, LJ_TRERR_NYICALL); } - if ((ct->info & CTF_VARARG) + if ((info & CTF_VARARG) #if LJ_TARGET_X86 - || ctype_cconv(ct->info) != CTCC_CDECL + || ctype_cconv(info) != CTCC_CDECL #endif ) func = emitir(IRT(IR_CARG, IRT_NIL), func, @@ -1160,7 +1165,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) } } else if (t == IRT_PTR || (LJ_64 && t == IRT_P32) || t == IRT_I64 || t == IRT_U64 || ctype_isenum(ctr->info)) { - TRef trid = lj_ir_kint(J, ctype_cid(ct->info)); + TRef trid = lj_ir_kint(J, ctype_cid(info)); tr = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, tr); if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J); } else if (t == IRT_FLOAT || t == IRT_U32) { From cd4af8ad80bb6430ad2e547f7af236268c9be7d9 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 28 May 2025 21:02:31 +0200 Subject: [PATCH 11/73] Avoid out-of-range PC for stack overflow error from snapshot restore. Reported by Sergey Kaplun. #1359 --- src/lj_bc.h | 5 +++++ src/lj_parse.c | 14 +------------- src/lj_snap.c | 6 ++++-- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/lj_bc.h b/src/lj_bc.h index 3f0563e4b9..0c7249b39f 100644 --- a/src/lj_bc.h +++ b/src/lj_bc.h @@ -255,6 +255,11 @@ static LJ_AINLINE int bc_isret(BCOp op) return (op == BC_RETM || op == BC_RET || op == BC_RET0 || op == BC_RET1); } +static LJ_AINLINE int bc_isret_or_tail(BCOp op) +{ + return (op == BC_CALLMT || op == BC_CALLT || bc_isret(op)); +} + LJ_DATA const uint16_t lj_bc_mode[]; LJ_DATA const uint16_t lj_bc_ofs[]; diff --git a/src/lj_parse.c b/src/lj_parse.c index ffd11b3bd9..3370296f0f 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -1529,23 +1529,11 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar) #endif -/* Check if bytecode op returns. */ -static int bcopisret(BCOp op) -{ - switch (op) { - case BC_CALLMT: case BC_CALLT: - case BC_RETM: case BC_RET: case BC_RET0: case BC_RET1: - return 1; - default: - return 0; - } -} - /* Fixup return instruction for prototype. */ static void fs_fixup_ret(FuncState *fs) { BCPos lastpc = fs->pc; - if (lastpc <= fs->lasttarget || !bcopisret(bc_op(fs->bcbase[lastpc-1].ins))) { + if (lastpc <= fs->lasttarget || !bc_isret_or_tail(bc_op(fs->bcbase[lastpc-1].ins))) { if ((fs->bl->flags & FSCOPE_UPVAL)) bcemit_AJ(fs, BC_UCLO, 0, 0); bcemit_AD(fs, BC_RET0, 0, 1); /* Need final return. */ diff --git a/src/lj_snap.c b/src/lj_snap.c index 82ab6983d3..5426002119 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -872,8 +872,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) const BCIns *pc = snap_pc(map[nent]); lua_State *L = J->L; - /* Set interpreter PC to the next PC to get correct error messages. */ - setcframe_pc(L->cframe, pc+1); + /* Set interpreter PC to the next PC to get correct error messages. + ** But not for returns or tail calls, since pc+1 may be out-of-range. + */ + setcframe_pc(L->cframe, bc_isret_or_tail(bc_op(*pc)) ? pc : pc+1); setcframe_pc(cframe_raw(cframe_prev(L->cframe)), pc); /* Make sure the stack is big enough for the slots from the snapshot. */ From 048972dbfdb6b441fe8a9bfe4d1f048966579ba8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 28 May 2025 21:13:17 +0200 Subject: [PATCH 12/73] Fix JIT slot overflow during up-recursion. Reported by Sergey Kaplun. #1358 --- src/lj_record.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lj_record.c b/src/lj_record.c index d336f642e4..1d535a2299 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -749,7 +749,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lj_trace_err(J, LJ_TRERR_LLEAVE); } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ lj_trace_err(J, LJ_TRERR_NYIRETL); /* No way to insert snapshot here. */ - } else if (1 + pt->framesize >= LJ_MAX_JSLOTS) { + } else if (1 + pt->framesize >= LJ_MAX_JSLOTS || + J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) { lj_trace_err(J, LJ_TRERR_STACKOV); } else { /* Return to lower frame. Guard for the target we return to. */ TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); From c64020f3c6d124503213147f2fb47c20335a395b Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 24 Jul 2025 15:29:54 +0200 Subject: [PATCH 13/73] FFI: Fix dangling CType references (again). Reported by Sergey Kaplun. Collateral of #1360 --- src/lj_crecord.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/lj_crecord.c b/src/lj_crecord.c index f686b35f21..80e25ef8a1 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -1125,6 +1125,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) if (ctype_isfunc(info)) { TRef func = emitir(IRT(IR_FLOAD, tp), J->base[0], IRFL_CDATA_PTR); CType *ctr = ctype_rawchild(cts, ct); + CTInfo ctr_info = ctr->info; /* crec_call_args may invalidate ctr. */ IRType t = crec_ct2irt(cts, ctr); TRef tr; TValue tv; @@ -1133,11 +1134,11 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4)); if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv))) lj_trace_err(J, LJ_TRERR_BLACKL); - if (ctype_isvoid(ctr->info)) { + if (ctype_isvoid(ctr_info)) { t = IRT_NIL; rd->nres = 0; - } else if (!(ctype_isnum(ctr->info) || ctype_isptr(ctr->info) || - ctype_isenum(ctr->info)) || t == IRT_CDATA) { + } else if (!(ctype_isnum(ctr_info) || ctype_isptr(ctr_info) || + ctype_isenum(ctr_info)) || t == IRT_CDATA) { lj_trace_err(J, LJ_TRERR_NYICALL); } if ((info & CTF_VARARG) @@ -1148,7 +1149,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) func = emitir(IRT(IR_CARG, IRT_NIL), func, lj_ir_kint(J, ctype_typeid(cts, ct))); tr = emitir(IRT(IR_CALLXS, t), crec_call_args(J, rd, cts, ct), func); - if (ctype_isbool(ctr->info)) { + if (ctype_isbool(ctr_info)) { if (frame_islua(J->L->base-1) && bc_b(frame_pc(J->L->base-1)[-1]) == 1) { /* Don't check result if ignored. */ tr = TREF_NIL; @@ -1164,7 +1165,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) tr = TREF_TRUE; } } else if (t == IRT_PTR || (LJ_64 && t == IRT_P32) || - t == IRT_I64 || t == IRT_U64 || ctype_isenum(ctr->info)) { + t == IRT_I64 || t == IRT_U64 || ctype_isenum(ctr_info)) { TRef trid = lj_ir_kint(J, ctype_cid(info)); tr = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, tr); if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J); From e3fa3c48d8a4aadcf86429e9f7f6f1171914b15a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 24 Jul 2025 15:35:56 +0200 Subject: [PATCH 14/73] Avoid out-of-range PC for stack overflow error from snapshot restore. Reported by Sergey Kaplun. #1369 --- src/lj_debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lj_debug.c b/src/lj_debug.c index a639cddf8f..f340964917 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -101,6 +101,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) pt = funcproto(fn); pos = proto_bcpos(pt, ins) - 1; #if LJ_HASJIT + if (pos == NO_BCPOS) return 1; /* Pretend it's the first bytecode. */ if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */ if (bc_isret(bc_op(ins[-1]))) { GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); From c92d0cb19263e7e302b4740ba6617a32c201c613 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 24 Jul 2025 15:38:45 +0200 Subject: [PATCH 15/73] x86/x64: Don't use undefined MUL/IMUL zero flag. Reported by VrIgHtEr. #1376 --- src/lj_asm_x86.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 239066d4a5..8b6ce47983 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1841,7 +1841,8 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) RegSet allow = RSET_GPR; Reg dest, right; int32_t k = 0; - if (as->flagmcp == as->mcp) { /* Drop test r,r instruction. */ + if (as->flagmcp == as->mcp && xa != XOg_X_IMUL) { + /* Drop test r,r instruction. */ MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2); MCode *q = p[0] == 0x0f ? p+1 : p; if ((*q & 15) < 14) { From 871db2c84ecefd70a850e03a6c340214a81739f0 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 24 Jul 2025 15:45:24 +0200 Subject: [PATCH 16/73] Windows: Add lua52compat option to msvcbuild.bat. Thanks to Gil Reis. #1366 --- src/msvcbuild.bat | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index 69c0c61a9f..d6aed17009 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat @@ -5,11 +5,12 @@ @rem Then cd to this directory and run this script. Use the following @rem options (in order), if needed. The default is a dynamic release build. @rem -@rem nogc64 disable LJ_GC64 mode for x64 -@rem debug emit debug symbols -@rem amalg amalgamated build -@rem static create static lib to statically link into your project -@rem mixed create static lib to build a DLL in your project +@rem nogc64 disable LJ_GC64 mode for x64 +@rem debug emit debug symbols +@rem lua52compat enable extra Lua 5.2 extensions +@rem amalg amalgamated build +@rem static create static lib to statically link into your project +@rem mixed create static lib to build a DLL in your project @if not defined INCLUDE goto :FAIL @@ -101,6 +102,10 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @set LJDYNBUILD=%LJDYNBUILD_DEBUG% @set LJLINKTYPE=%LJLINKTYPE_DEBUG% :NODEBUG +@if "%1" neq "lua52compat" goto :NOLUA52COMPAT +@shift +@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_LUA52COMPAT +:NOLUA52COMPAT @set LJCOMPILE=%LJCOMPILE% %LJCOMPILETARGET% @set LJLINK=%LJLINK% %LJLINKTYPE% %LJLINKTARGET% @if "%1"=="amalg" goto :AMALGDLL From 54a162688ed25902122077149df9b456bc5a763e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 16 Oct 2025 13:11:02 +0200 Subject: [PATCH 17/73] Fix reporting of an error during error handling. Reported by Sergey Kaplun. #1381 --- src/lj_err.c | 10 ++++++++++ src/lj_state.c | 1 + 2 files changed, 11 insertions(+) diff --git a/src/lj_err.c b/src/lj_err.c index 03b5030be6..e8e1875805 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -803,9 +803,17 @@ LJ_NOINLINE GCstr *lj_err_str(lua_State *L, ErrMsg em) return lj_str_newz(L, err2msg(em)); } +LJ_NORET LJ_NOINLINE static void lj_err_err(lua_State *L) +{ + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRERR)); + lj_err_throw(L, LUA_ERRERR); +} + /* Out-of-memory error. */ LJ_NOINLINE void lj_err_mem(lua_State *L) { + if (L->status == LUA_ERRERR) + lj_err_err(L); if (L->status == LUA_ERRERR+1) /* Don't touch the stack during lua_open. */ lj_vm_unwind_c(L->cframe, LUA_ERRMEM); if (LJ_HASJIT) { @@ -902,6 +910,8 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) /* Stack overflow error. */ void LJ_FASTCALL lj_err_stkov(lua_State *L) { + if (L->status == LUA_ERRERR) + lj_err_err(L); lj_debug_addloc(L, err2msg(LJ_ERR_STKOV), L->base-1, NULL); lj_err_run(L); } diff --git a/src/lj_state.c b/src/lj_state.c index d8fc545a0d..3cad8cc184 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -195,6 +195,7 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) lj_meta_init(L); lj_lex_init(L); fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ + fixstring(lj_err_str(L, LJ_ERR_ERRERR)); /* Preallocate err in err msg. */ g->gc.threshold = 4*g->gc.total; #if LJ_HASFFI lj_ctype_initfin(L); From a69aef43fe1838da26c193d188580229b2387583 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 16 Oct 2025 13:13:51 +0200 Subject: [PATCH 18/73] Fix io.write() of newly created buffer. Reported by vfprintf. #1386 --- src/lj_strfmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index 04aebaa472..bb649fc840 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c @@ -170,7 +170,7 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) } else if (tvisbuf(o)) { SBufExt *sbx = bufV(o); *lenp = sbufxlen(sbx); - return sbx->r; + return sbx->r ? sbx->r : ""; } else if (tvisint(o)) { sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); } else if (tvisnum(o)) { From a21ba1c9b5218ef83eb8bc6d374764da84f77ffd Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 16 Oct 2025 13:17:45 +0200 Subject: [PATCH 19/73] Add GNU/Hurd build support. Note: this is not an officially supported target. Contributed by Pino Toscano and Samuel Thibault. #1383 #1384 --- src/Makefile | 3 +++ src/lj_arch.h | 3 +++ src/lj_prng.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index c83abfa0b6..5dd98a31f6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -357,6 +357,9 @@ else ifeq (GNU/kFreeBSD,$(TARGET_SYS)) TARGET_XLIBS+= -ldl endif + ifeq (GNU,$(TARGET_SYS)) + TARGET_XLIBS+= -ldl + endif endif endif endif diff --git a/src/lj_arch.h b/src/lj_arch.h index a4eecf27e0..865bfa2322 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -96,6 +96,9 @@ #elif defined(__QNX__) #define LJ_TARGET_QNX 1 #define LUAJIT_OS LUAJIT_OS_POSIX +#elif defined(__GNU__) +#define LJ_TARGET_HURD 1 +#define LUAJIT_OS LUAJIT_OS_POSIX #else #define LUAJIT_OS LUAJIT_OS_OTHER #endif diff --git a/src/lj_prng.c b/src/lj_prng.c index 02146b273a..1bbb7eaba3 100644 --- a/src/lj_prng.c +++ b/src/lj_prng.c @@ -125,7 +125,7 @@ static PRGR libfunc_rgr; #if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200 #define LJ_TARGET_HAS_GETENTROPY 1 #endif -#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN || LJ_TARGET_QNX +#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN || LJ_TARGET_QNX || LJ_TARGET_HURD #define LJ_TARGET_HAS_GETENTROPY 1 #endif From 5c3254d68d2579bf8c5bd1e39e612582fb5a04f6 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 16 Oct 2025 13:23:51 +0200 Subject: [PATCH 20/73] Gracefully handle broken custom allocator. Reported by Alex Orlenko. #1393 --- src/lj_state.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/lj_state.c b/src/lj_state.c index 3cad8cc184..fb6d41a5f9 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -261,7 +261,11 @@ LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd) } #endif GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State)); - if (GG == NULL || !checkptrGC(GG)) return NULL; + if (GG == NULL) return NULL; + if (!checkptrGC(GG)) { + allocf(allocd, GG, sizeof(GG_State), 0); + return NULL; + } memset(GG, 0, sizeof(GG_State)); L = &GG->L; g = &GG->g; From 25a61a182166fec06f1a1a025eb8fabbb6cf483e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 16 Oct 2025 14:24:52 +0200 Subject: [PATCH 21/73] x64: Add support for CET IBT. Note: this is not enabled by default, look for CET in lj_arch.h Contributed by Yuichiro Naito. #1391 --- src/Makefile | 4 ++++ src/jit/dis_x86.lua | 20 +++++++++++++++- src/lj_arch.h | 11 +++++++++ src/lj_asm.c | 3 +++ src/lj_emit_x86.h | 7 ++++++ src/lj_target_x86.h | 3 +++ src/vm_x64.dasc | 57 ++++++++++++++++++++++++++++++++++++++------- 7 files changed, 95 insertions(+), 10 deletions(-) diff --git a/src/Makefile b/src/Makefile index 5dd98a31f6..d23e0db255 100644 --- a/src/Makefile +++ b/src/Makefile @@ -446,6 +446,10 @@ ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D PAUTH TARGET_ARCH+= -DLJ_ABI_PAUTH=1 endif +ifneq (,$(findstring LJ_CET_BR 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D CET_BR + TARGET_ARCH+= -DLJ_CET_BR=1 +endif DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) ifeq (Windows,$(TARGET_SYS)) DASM_AFLAGS+= -D WIN diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua index b1de0eeae1..6b04ee8495 100644 --- a/src/jit/dis_x86.lua +++ b/src/jit/dis_x86.lua @@ -122,7 +122,7 @@ local map_opc2 = { "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", "movhpsXmr||movhpdXmr", "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", -"hintnopVm","hintnopVm","hintnopVm","hintnopVm", +"hintnopVm","hintnopVm","endbr*hintnopVm","hintnopVm", --2x "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, "movapsXrm||movapdXrm", @@ -804,6 +804,24 @@ map_act = { return dispatch(ctx, map_opcvm[ctx.mrm]) end, + -- Special NOP for endbr64/endbr32. + endbr = function(ctx, name, pat) + if ctx.rep then + local pos = ctx.pos + local b = byte(ctx.code, pos) + local text + if b == 0xfa then text = "endbr64" + elseif b == 0xfb then text = "endbr64" + end + if text then + ctx.pos = pos + 1 + ctx.rep = nil + return putop(ctx, text) + end + end + return dispatch(ctx, pat) + end, + -- Floating point opcode dispatch. fp = function(ctx, name, pat) local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end diff --git a/src/lj_arch.h b/src/lj_arch.h index 865bfa2322..42c65879bd 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -219,6 +219,17 @@ #error "macOS requires GC64 -- don't disable it" #endif +#if (__CET__ & 1) && defined(LUAJIT_ENABLE_CET_BR) +/* +** Control-Flow Enforcement Technique (CET) indirect branch tracking (IBT). +** This is not enabled by default because it causes a notable slowdown of +** the interpreter on all x64 CPUs, whether they have CET enabled or not. +** If your toolchain enables -fcf-protection=branch by default, you need +** to build with: make XCFLAGS=-DLUAJIT_ENABLE_CET_BR +*/ +#define LJ_CET_BR 1 +#endif + #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM #define LJ_ARCH_NAME "arm" diff --git a/src/lj_asm.c b/src/lj_asm.c index fec4351251..e7f3ec1cd5 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2586,6 +2586,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T) asm_head_side(as); else asm_head_root(as); +#if LJ_CET_BR + emit_endbr(as); +#endif asm_phi_fixup(as); if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index f477301162..848301bce1 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -70,6 +70,13 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, return p; } +#if LJ_CET_BR +static void emit_endbr(ASMState *as) +{ + emit_u32(as, XI_ENDBR64); +} +#endif + /* op + modrm */ #define emit_opm(xo, mode, rr, rb, p, delta) \ (p[(delta)-1] = MODRM((mode), (rr), (rb)), \ diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 6a528e8288..fa32a5d46f 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -242,6 +242,9 @@ typedef enum { XV_SHLX = XV_660f38(f7), XV_SHRX = XV_f20f38(f7), + /* Special NOP instructions. */ + XI_ENDBR64 = 0xfa1e0ff3, + /* Variable-length opcodes. XO_* prefix. */ XO_OR = XO_(0b), XO_MOV = XO_(8b), diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index f501495b11..52ef88af42 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -189,16 +189,24 @@ | |.endif | +|//-- Control-Flow Enforcement Technique (CET) --------------------------- +| +|.if CET_BR +|.macro endbr; endbr64; .endmacro +|.else +|.macro endbr; .endmacro +|.endif +| |//----------------------------------------------------------------------- | |// Instruction headers. -|.macro ins_A; .endmacro -|.macro ins_AD; .endmacro -|.macro ins_AJ; .endmacro -|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro -|.macro ins_AB_; movzx RBd, RCH; .endmacro -|.macro ins_A_C; movzx RCd, RCL; .endmacro -|.macro ins_AND; not RD; .endmacro +|.macro ins_A; endbr; .endmacro +|.macro ins_AD; endbr; .endmacro +|.macro ins_AJ; endbr; .endmacro +|.macro ins_ABC; endbr; movzx RBd, RCH; movzx RCd, RCL; .endmacro +|.macro ins_AB_; endbr; movzx RBd, RCH; .endmacro +|.macro ins_A_C; endbr; movzx RCd, RCL; .endmacro +|.macro ins_AND; endbr; not RD; .endmacro | |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). |.macro ins_NEXT @@ -479,20 +487,24 @@ static void build_subroutines(BuildCtx *ctx) | jmp <3 | |->vm_unwind_yield: + | endbr | mov al, LUA_YIELD | jmp ->vm_unwind_c_eh | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | endbr | // (void *cframe, int errcode) | mov eax, CARG2d // Error return status for vm_pcall. | mov rsp, CARG1 |->vm_unwind_c_eh: // Landing pad for external unwinder. + | endbr | mov L:RB, SAVE_L | mov GL:RB, L:RB->glref | mov dword GL:RB->vmstate, ~LJ_VMST_C | jmp ->vm_leave_unw | |->vm_unwind_rethrow: + | endbr |.if not X64WIN | mov CARG1, SAVE_L | mov CARG2d, eax @@ -501,10 +513,12 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | endbr | // (void *cframe) | and CARG1, CFRAME_RAWMASK | mov rsp, CARG1 |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | endbr | mov L:RB, SAVE_L | mov RDd, 1+1 // Really 1+2 results, incr. later. | mov BASE, L:RB->base @@ -675,6 +689,7 @@ static void build_subroutines(BuildCtx *ctx) |//-- Continuation dispatch ---------------------------------------------- | |->cont_dispatch: + | endbr | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) | add RA, BASE | and PC, -8 @@ -706,6 +721,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->cont_cat: // BASE = base, RC = result, RB = mbase + | endbr | movzx RAd, PC_RB | sub RB, 32 | lea RA, [BASE+RA*8] @@ -774,6 +790,7 @@ static void build_subroutines(BuildCtx *ctx) | test RC, RC | jz >3 |->cont_ra: // BASE = base, RC = result + | endbr | movzx RAd, PC_RA | mov RB, [RC] | mov [BASE+RA*8], RB @@ -851,6 +868,7 @@ static void build_subroutines(BuildCtx *ctx) | mov RB, [BASE+RA*8] | mov [RC], RB |->cont_nop: // BASE = base, (RC = result) + | endbr | ins_next | |3: // Call __newindex metamethod. @@ -921,6 +939,7 @@ static void build_subroutines(BuildCtx *ctx) | ins_next | |->cont_condt: // BASE = base, RC = result + | endbr | add PC, 4 | mov ITYPE, [RC] | sar ITYPE, 47 @@ -929,6 +948,7 @@ static void build_subroutines(BuildCtx *ctx) | jmp <6 | |->cont_condf: // BASE = base, RC = result + | endbr | mov ITYPE, [RC] | sar ITYPE, 47 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false. @@ -1132,16 +1152,17 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc, name |->ff_ .. name: + | endbr |.endmacro | |.macro .ffunc_1, name |->ff_ .. name: - | cmp NARGS:RDd, 1+1; jb ->fff_fallback + | endbr; cmp NARGS:RDd, 1+1; jb ->fff_fallback |.endmacro | |.macro .ffunc_2, name |->ff_ .. name: - | cmp NARGS:RDd, 2+1; jb ->fff_fallback + | endbr; cmp NARGS:RDd, 2+1; jb ->fff_fallback |.endmacro | |.macro .ffunc_n, name, op @@ -2207,6 +2228,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_record: // Dispatch target for recording phase. |.if JIT + | endbr | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | test RDL, HOOK_VMEVENT // No recording while in vmevent. | jnz >5 @@ -2220,12 +2242,14 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_rethook: // Dispatch target for return hooks. + | endbr | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | test RDL, HOOK_ACTIVE // Hook already active? | jnz >5 | jmp >1 | |->vm_inshook: // Dispatch target for instr/line hooks. + | endbr | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | test RDL, HOOK_ACTIVE // Hook already active? | jnz >5 @@ -2253,6 +2277,7 @@ static void build_subroutines(BuildCtx *ctx) | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. | |->cont_hook: // Continue from hook yield. + | endbr | add PC, 4 | mov RA, [RB-40] | mov MULTRES, RAd // Restore MULTRES for *M ins. @@ -2277,6 +2302,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_callhook: // Dispatch target for call hooks. + | endbr | mov SAVE_PC, PC |.if JIT | jmp >1 @@ -2312,6 +2338,7 @@ static void build_subroutines(BuildCtx *ctx) | |->cont_stitch: // Trace stitching. |.if JIT + | endbr | // BASE = base, RC = result, RB = mbase | mov TRACE:ITYPE, [RB-40] // Save previous trace. | cleartp TRACE:ITYPE @@ -2364,6 +2391,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_profhook: // Dispatch target for profiler hook. #if LJ_HASPROFILE + | endbr | mov L:RB, SAVE_L | mov L:RB->base, BASE | mov CARG2, PC // Caveat: CARG2 == BASE @@ -2383,6 +2411,7 @@ static void build_subroutines(BuildCtx *ctx) |// The 16 bit exit number is stored with two (sign-extended) push imm8. |->vm_exit_handler: |.if JIT + | endbr | push r13; push r12 | push r11; push r10; push r9; push r8 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp @@ -2431,6 +2460,7 @@ static void build_subroutines(BuildCtx *ctx) | jmp >1 |.endif |->vm_exit_interp: + | endbr | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. |.if JIT | // Restore additional callee-save registers only used in compiled code. @@ -2524,6 +2554,7 @@ static void build_subroutines(BuildCtx *ctx) |.macro vm_round, name, mode, cond |->name: |->name .. _sse: + | endbr | sseconst_abs xmm2, RD | sseconst_2p52 xmm3, RD | movaps xmm1, xmm0 @@ -2634,6 +2665,7 @@ static void build_subroutines(BuildCtx *ctx) |// Next idx returned in edx. |->vm_next: |.if JIT + | endbr | mov NEXT_ASIZE, NEXT_TAB->asize |1: // Traverse array part. | cmp NEXT_IDX, NEXT_ASIZE; jae >5 @@ -4087,6 +4119,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERN: |.if JIT + | endbr | hotloop RBd |.endif |->vm_IITERN: @@ -4266,6 +4299,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | jnz >7 // Not returning to a fixarg Lua func? switch (op) { case BC_RET: + | endbr |->BC_RET_Z: | mov KBASE, BASE // Use KBASE for result move. | sub RDd, 1 @@ -4284,10 +4318,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ja >6 break; case BC_RET1: + | endbr | mov RB, [BASE+RA] | mov [BASE-16], RB /* fallthrough */ case BC_RET0: + | endbr |5: | cmp PC_RB, RDL // More results expected? | ja >6 @@ -4334,6 +4370,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_FORL: |.if JIT + | endbr | hotloop RBd |.endif | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. @@ -4485,6 +4522,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERL: |.if JIT + | endbr | hotloop RBd |.endif | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. @@ -4578,6 +4616,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_FUNCF: |.if JIT + | endbr | hotcall RBd |.endif case BC_FUNCV: /* NYI: compiled vararg functions. */ From e34a78acf6b8656874b1c25a12a7cd1813d73af9 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 28 Oct 2025 00:27:15 +0100 Subject: [PATCH 22/73] x64: Various fixes for CET IBT. Also add ELF notes. #1391 --- src/Makefile | 10 ++++-- src/lj_arch.h | 18 +++++++++-- src/lj_asm.c | 4 +-- src/lj_ccallback.c | 24 ++++++++++---- src/lj_emit_x86.h | 4 +-- src/vm_x64.dasc | 79 +++++++++++++++++++++++++++++++++------------- 6 files changed, 101 insertions(+), 38 deletions(-) diff --git a/src/Makefile b/src/Makefile index d23e0db255..e657af1343 100644 --- a/src/Makefile +++ b/src/Makefile @@ -446,9 +446,13 @@ ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D PAUTH TARGET_ARCH+= -DLJ_ABI_PAUTH=1 endif -ifneq (,$(findstring LJ_CET_BR 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D CET_BR - TARGET_ARCH+= -DLJ_CET_BR=1 +ifneq (,$(findstring LJ_ABI_BRANCH_TRACK 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D BRANCH_TRACK + TARGET_ARCH+= -DLJ_ABI_BRANCH_TRACK=1 +endif +ifneq (,$(findstring LJ_ABI_SHADOW_STACK 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D SHADOW_STACK + TARGET_ARCH+= -DLJ_ABI_SHADOW_STACK=1 endif DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) ifeq (Windows,$(TARGET_SYS)) diff --git a/src/lj_arch.h b/src/lj_arch.h index 42c65879bd..a775b51f4c 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -219,15 +219,27 @@ #error "macOS requires GC64 -- don't disable it" #endif -#if (__CET__ & 1) && defined(LUAJIT_ENABLE_CET_BR) +#if !defined(LJ_ABI_BRANCH_TRACK) && (__CET__ & 1) && \ + LJ_TARGET_GC64 && defined(LUAJIT_ENABLE_CET_BR) /* ** Control-Flow Enforcement Technique (CET) indirect branch tracking (IBT). ** This is not enabled by default because it causes a notable slowdown of ** the interpreter on all x64 CPUs, whether they have CET enabled or not. ** If your toolchain enables -fcf-protection=branch by default, you need -** to build with: make XCFLAGS=-DLUAJIT_ENABLE_CET_BR +** to build with: make amalg XCFLAGS=-DLUAJIT_ENABLE_CET_BR */ -#define LJ_CET_BR 1 +#define LJ_ABI_BRANCH_TRACK 1 +#endif + +#if !defined(LJ_ABI_SHADOW_STACK) && (__CET__ & 2) +/* +** Control-Flow Enforcement Technique (CET) shadow stack (CET-SS). +** It has no code overhead and doesn't cause any slowdowns when unused. +** It can also be unconditionally enabled since all code already follows +** a strict CALL to RET correspondence for performance reasons (all modern +** CPUs use a (non-enforcing) shadow stack for return branch prediction). +*/ +#define LJ_ABI_SHADOW_STACK 1 #endif #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM diff --git a/src/lj_asm.c b/src/lj_asm.c index e7f3ec1cd5..8f558a0392 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2586,8 +2586,8 @@ void lj_asm_trace(jit_State *J, GCtrace *T) asm_head_side(as); else asm_head_root(as); -#if LJ_CET_BR - emit_endbr(as); +#if LJ_ABI_BRANCH_TRACK + emit_branch_track(as); #endif asm_phi_fixup(as); diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 7f08f0a848..5594a731e4 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -34,22 +34,29 @@ #elif LJ_TARGET_X86ORX64 +#if LJ_ABI_BRANCH_TRACK +#define CALLBACK_MCODE_SLOTSZ 8 +#else +#define CALLBACK_MCODE_SLOTSZ 4 +#endif +#define CALLBACK_MCODE_NSLOT (128 / CALLBACK_MCODE_SLOTSZ) + #define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) #define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5)) #define CALLBACK_SLOT2OFS(slot) \ - (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) + (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/CALLBACK_MCODE_NSLOT) + CALLBACK_MCODE_SLOTSZ*(slot)) static MSize CALLBACK_OFS2SLOT(MSize ofs) { MSize group; ofs -= CALLBACK_MCODE_HEAD; - group = ofs / (32*4 + CALLBACK_MCODE_GROUP); - return (ofs % (32*4 + CALLBACK_MCODE_GROUP))/4 + group*32; + group = ofs / (128 + CALLBACK_MCODE_GROUP); + return (ofs % (128 + CALLBACK_MCODE_GROUP))/CALLBACK_MCODE_SLOTSZ + group*CALLBACK_MCODE_NSLOT; } #define CALLBACK_MAX_SLOT \ - (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+4*32))*32) + (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+128))*CALLBACK_MCODE_NSLOT) #elif LJ_TARGET_ARM @@ -118,9 +125,13 @@ static void *callback_mcode_init(global_State *g, uint8_t *page) *(void **)p = target; p += 8; #endif for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { +#if LJ_ABI_BRANCH_TRACK + *(uint32_t *)p = XI_ENDBR64; p += 4; +#endif /* mov al, slot; jmp group */ *p++ = XI_MOVrib | RID_EAX; *p++ = (uint8_t)slot; - if ((slot & 31) == 31 || slot == CALLBACK_MAX_SLOT-1) { + if ((slot & (CALLBACK_MCODE_NSLOT-1)) == (CALLBACK_MCODE_NSLOT-1) || + slot == CALLBACK_MAX_SLOT-1) { /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ *p++ = XI_PUSH + RID_EBP; *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); @@ -140,7 +151,8 @@ static void *callback_mcode_init(global_State *g, uint8_t *page) *p++ = XI_JMP; *(int32_t *)p = target-(p+4); p += 4; #endif } else { - *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); + *p++ = XI_JMPs; + *p++ = (uint8_t)(CALLBACK_MCODE_SLOTSZ*(CALLBACK_MCODE_NSLOT-1-(slot&(CALLBACK_MCODE_NSLOT-1))) - 2); } } return p; diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index 848301bce1..5fd6cfa7eb 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -70,8 +70,8 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, return p; } -#if LJ_CET_BR -static void emit_endbr(ASMState *as) +#if LJ_ABI_BRANCH_TRACK +static void emit_branch_track(ASMState *as) { emit_u32(as, XI_ENDBR64); } diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 52ef88af42..2e9f05056d 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -191,7 +191,7 @@ | |//-- Control-Flow Enforcement Technique (CET) --------------------------- | -|.if CET_BR +|.if BRANCH_TRACK |.macro endbr; endbr64; .endmacro |.else |.macro endbr; .endmacro @@ -200,13 +200,13 @@ |//----------------------------------------------------------------------- | |// Instruction headers. -|.macro ins_A; endbr; .endmacro -|.macro ins_AD; endbr; .endmacro -|.macro ins_AJ; endbr; .endmacro -|.macro ins_ABC; endbr; movzx RBd, RCH; movzx RCd, RCL; .endmacro -|.macro ins_AB_; endbr; movzx RBd, RCH; .endmacro -|.macro ins_A_C; endbr; movzx RCd, RCL; .endmacro -|.macro ins_AND; endbr; not RD; .endmacro +|.macro ins_A; .endmacro +|.macro ins_AD; .endmacro +|.macro ins_AJ; .endmacro +|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro +|.macro ins_AB_; movzx RBd, RCH; .endmacro +|.macro ins_A_C; movzx RCd, RCL; .endmacro +|.macro ins_AND; not RD; .endmacro | |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). |.macro ins_NEXT @@ -487,13 +487,12 @@ static void build_subroutines(BuildCtx *ctx) | jmp <3 | |->vm_unwind_yield: - | endbr | mov al, LUA_YIELD | jmp ->vm_unwind_c_eh | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | endbr | // (void *cframe, int errcode) + | endbr | mov eax, CARG2d // Error return status for vm_pcall. | mov rsp, CARG1 |->vm_unwind_c_eh: // Landing pad for external unwinder. @@ -513,8 +512,8 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | endbr | // (void *cframe) + | endbr | and CARG1, CFRAME_RAWMASK | mov rsp, CARG1 |->vm_unwind_ff_eh: // Landing pad for external unwinder. @@ -689,7 +688,6 @@ static void build_subroutines(BuildCtx *ctx) |//-- Continuation dispatch ---------------------------------------------- | |->cont_dispatch: - | endbr | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) | add RA, BASE | and PC, -8 @@ -1152,7 +1150,7 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc, name |->ff_ .. name: - | endbr + | endbr |.endmacro | |.macro .ffunc_1, name @@ -2338,8 +2336,8 @@ static void build_subroutines(BuildCtx *ctx) | |->cont_stitch: // Trace stitching. |.if JIT - | endbr | // BASE = base, RC = result, RB = mbase + | endbr | mov TRACE:ITYPE, [RB-40] // Save previous trace. | cleartp TRACE:ITYPE | mov TMPRd, MULTRES @@ -2460,8 +2458,8 @@ static void build_subroutines(BuildCtx *ctx) | jmp >1 |.endif |->vm_exit_interp: - | endbr | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. + | endbr |.if JIT | // Restore additional callee-save registers only used in compiled code. |.if X64WIN @@ -2849,6 +2847,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |=>defop: switch (op) { +#if !LJ_HASJIT + case BC_FORL: + case BC_JFORI: + case BC_JFORL: + case BC_ITERL: + case BC_JITERL: + case BC_LOOP: + case BC_JLOOP: + case BC_FUNCF: + case BC_JFUNCF: + case BC_JFUNCV: +#endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + break; /* Avoid redundant endbr instructions. */ + default: + | endbr + break; + } + + switch (op) { /* -- Comparison ops ---------------------------------------------------- */ @@ -4119,7 +4137,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERN: |.if JIT - | endbr | hotloop RBd |.endif |->vm_IITERN: @@ -4299,7 +4316,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | jnz >7 // Not returning to a fixarg Lua func? switch (op) { case BC_RET: - | endbr |->BC_RET_Z: | mov KBASE, BASE // Use KBASE for result move. | sub RDd, 1 @@ -4318,12 +4334,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ja >6 break; case BC_RET1: - | endbr | mov RB, [BASE+RA] | mov [BASE-16], RB /* fallthrough */ case BC_RET0: - | endbr |5: | cmp PC_RB, RDL // More results expected? | ja >6 @@ -4370,7 +4384,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_FORL: |.if JIT - | endbr | hotloop RBd |.endif | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. @@ -4522,7 +4535,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERL: |.if JIT - | endbr | hotloop RBd |.endif | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. @@ -4616,7 +4628,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_FUNCF: |.if JIT - | endbr | hotcall RBd |.endif case BC_FUNCV: /* NYI: compiled vararg functions. */ @@ -4886,6 +4897,30 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.align 8\n" ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); #endif +#endif +#if LJ_TARGET_LINUX && (LJ_ABI_BRANCH_TRACK || LJ_ABI_SHADOW_STACK) + fprintf(ctx->fp, + "\t.section .note.gnu.property,\"a\"\n" + "\t.align 8\n" + "\t.long 4\n" + "\t.long 16\n" + "\t.long 5\n" + "\t.long 0x00554e47\n" + "\t.long 0xc0000002\n" + "\t.long 4\n" + "\t.long %d\n" + "\t.long 0\n", +#if LJ_ABI_BRANCH_TRACK + 1| +#else + 0| +#endif +#if LJ_ABI_SHADOW_STACK + 2 +#else + 0 +#endif + ); #endif break; #if !LJ_NO_UNWIND From 8651ef6df45189ad5ab734275568c9538038fcfa Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 28 Oct 2025 04:46:10 +0100 Subject: [PATCH 23/73] ARM64: Add support for ARM BTI. Note: this is not enabled by default, look for CET in lj_arch.h. Thanks to Yuichiro Naito. #1398 --- dynasm/dasm_arm64.lua | 22 ++++++++++++++ src/jit/dis_arm64.lua | 8 +++++- src/lj_arch.h | 5 ++++ src/lj_ccallback.c | 14 +++++++-- src/lj_emit_arm64.h | 7 +++++ src/lj_target_arm64.h | 4 +++ src/vm_arm64.dasc | 67 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 124 insertions(+), 3 deletions(-) diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua index 8b27e9625c..db3adb4845 100644 --- a/dynasm/dasm_arm64.lua +++ b/dynasm/dasm_arm64.lua @@ -244,6 +244,10 @@ local map_cond = { hs = 2, lo = 3, } +local map_bti = { + c = 0x40, j = 0x80, jc = 0xc0, +} + ------------------------------------------------------------------------------ local parse_reg_type @@ -475,6 +479,12 @@ local function parse_cond(expr, inv) return shl(bit.bxor(c, inv), 12) end +local function parse_map(expr, map) + local x = map[expr] + if not x then werror("bad operand") end + return x +end + local function parse_load(params, nparams, n, op) if params[n+2] then werror("too many operands") end local scale = shr(op, 30) @@ -823,11 +833,21 @@ map_op = { tbz_3 = "36000000DTBw|36000000DTBx", tbnz_3 = "37000000DTBw|37000000DTBx", + -- Branch Target Identification. + bti_1 = "d503241ft", + -- ARM64e: Pointer authentication codes (PAC). blraaz_1 = "d63f081fNx", + blrabz_1 = "d63f0c1fNx", braa_2 = "d71f0800NDx", + brab_2 = "d71f0c00NDx", braaz_1 = "d61f081fNx", + brabz_1 = "d61f0c1fNx", + paciasp_0 = "d503233f", pacibsp_0 = "d503237f", + autiasp_0 = "d50323bf", + autibsp_0 = "d50323ff", + retaa_0 = "d65f0bff", retab_0 = "d65f0fff", -- Miscellaneous instructions. @@ -996,6 +1016,8 @@ local function parse_template(params, template, nparams, pos) op = op + parse_cond(q, 0); n = n + 1 elseif p == "c" then op = op + parse_cond(q, 1); n = n + 1 + elseif p == "t" then + op = op + parse_map(q, map_bti); n = n + 1 else assert(false) diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua index 4457aac080..944f1a6ced 100644 --- a/src/jit/dis_arm64.lua +++ b/src/jit/dis_arm64.lua @@ -695,7 +695,10 @@ local map_br = { -- Branches, exception generating and system instructions. }, { -- System instructions. shift = 0, mask = 0x3fffff, - [0x03201f] = "nop" + [0x03201f] = "nop", + [0x03245f] = "bti c", + [0x03249f] = "bti j", + [0x0324df] = "bti jc", }, { -- Unconditional branch, register. shift = 0, mask = 0xfffc1f, @@ -1171,6 +1174,9 @@ local function disass_ins(ctx) end end second0 = true + elseif p == " " then + operands[#operands+1] = pat:match(" (.*)") + break else assert(false) end diff --git a/src/lj_arch.h b/src/lj_arch.h index a775b51f4c..6d1a92714c 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -288,6 +288,11 @@ #if !defined(LJ_ABI_PAUTH) && defined(__arm64e__) #define LJ_ABI_PAUTH 1 #endif +#if !defined(LJ_ABI_BRANCH_TRACK) && (__ARM_FEATURE_BTI_DEFAULT & 1) && \ + defined(LUAJIT_ENABLE_CET_BR) +/* See comments about LUAJIT_ENABLE_CET_BR above. */ +#define LJ_ABI_BRANCH_TRACK 1 +#endif #define LJ_TARGET_ARM64 1 #define LJ_TARGET_EHRETREG 0 #define LJ_TARGET_EHRAREG 30 diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 5594a731e4..c4b25cd7d1 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -64,6 +64,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #elif LJ_TARGET_ARM64 +#if LJ_ABI_BRANCH_TRACK +#define CALLBACK_MCODE_SLOTSZ 12 +#endif + #define CALLBACK_MCODE_HEAD 32 #elif LJ_TARGET_PPC @@ -88,8 +92,11 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #endif #ifndef CALLBACK_SLOT2OFS -#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) -#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) +#ifndef CALLBACK_MCODE_SLOTSZ +#define CALLBACK_MCODE_SLOTSZ 8 +#endif +#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_SLOTSZ*(slot)) +#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/CALLBACK_MCODE_SLOTSZ) #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) #endif @@ -193,6 +200,9 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) ((void **)p)[1] = g; p += 4; for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { +#if LJ_ABI_BRANCH_TRACK + *p++ = A64I_BTI_C; +#endif *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot)); *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); p++; diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index ca1269b7c3..a8be741562 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h @@ -409,6 +409,13 @@ static void emit_call(ASMState *as, ASMFunction target) } } +#if LJ_ABI_BRANCH_TRACK +static void emit_branch_track(ASMState *as) +{ + *--as->mcp = A64I_BTI_J; +} +#endif + /* -- Emit generic operations --------------------------------------------- */ /* Generic move between two regs. */ diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 9274187117..30aff47882 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h @@ -265,6 +265,10 @@ typedef enum A64Ins { A64I_BRAAZ = 0xd61f081f, A64I_BLRAAZ = 0xd63f081f, + A64I_BTI_C = 0xd503245f, + A64I_BTI_J = 0xd503249f, + A64I_BTI_JC = 0xd50324df, + A64I_NOP = 0xd503201f, /* FP */ diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 58efe400e4..85d38de384 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -92,6 +92,17 @@ |.macro ret_auth; ret; .endmacro |.endif | +|// ARM64 branch target identification (BTI). +|.if BRANCH_TRACK +|.macro bti_jump; bti j; .endmacro +|.macro bti_call; bti c; .endmacro +|.macro bti_tailcall; bti jc; .endmacro +|.else +|.macro bti_jump; .endmacro +|.macro bti_call; .endmacro +|.macro bti_tailcall; .endmacro +|.endif +| |//----------------------------------------------------------------------- | |// Stack layout while in interpreter. Must match with lj_frame.h. @@ -439,24 +450,28 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | // (void *cframe, int errcode) + | bti_tailcall | add fp, CARG1, # SAVE_FP_LR_ | mov sp, CARG1 | mov CRET1, CARG2 | ldr L, SAVE_L | ldr GL, L->glref |->vm_unwind_c_eh: // Landing pad for external unwinder. + | bti_tailcall | mv_vmstate TMP0w, C | st_vmstate TMP0w | b ->vm_leave_unw | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | // (void *cframe) + | bti_tailcall | add fp, CARG1, # SAVE_FP_LR_ | mov sp, CARG1 | ldr L, SAVE_L | init_constants | ldr GL, L->glref // Setup pointer to global state. |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | bti_tailcall | mov RC, #16 // 2 results: false + error message. | ldr BASE, L->base | mov_false TMP0 @@ -632,6 +647,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->cont_cat: // RA = resultptr, CARG4 = meta base + | bti_jump | ldr INSw, [PC, #-4] | sub CARG2, CARG4, #32 | ldr TMP0, [RA] @@ -789,9 +805,11 @@ static void build_subroutines(BuildCtx *ctx) | sub RB, RB, #0x20000 | csel PC, PC, RB, lo |->cont_nop: + | bti_jump | ins_next | |->cont_ra: // RA = resultptr + | bti_jump | ldr INSw, [PC, #-4] | ldr TMP0, [RA] | decode_RA TMP1, INS @@ -799,12 +817,14 @@ static void build_subroutines(BuildCtx *ctx) | b ->cont_nop | |->cont_condt: // RA = resultptr + | bti_jump | ldr TMP0, [RA] | mov_true TMP1 | cmp TMP1, TMP0 // Branch if result is true. | b <4 | |->cont_condf: // RA = resultptr + | bti_jump | ldr TMP0, [RA] | mov_false TMP1 | cmp TMP0, TMP1 // Branch if result is false. @@ -956,10 +976,12 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc, name |->ff_ .. name: + | bti_jump |.endmacro | |.macro .ffunc_1, name |->ff_ .. name: + | bti_jump | ldr CARG1, [BASE] | cmp NARGS8:RC, #8 | blo ->fff_fallback @@ -967,6 +989,7 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc_2, name |->ff_ .. name: + | bti_jump | ldp CARG1, CARG2, [BASE] | cmp NARGS8:RC, #16 | blo ->fff_fallback @@ -1810,6 +1833,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_record: // Dispatch target for recording phase. |.if JIT + | bti_jump | ldrb CARG1w, GL->hookmask | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. | bne >5 @@ -1825,6 +1849,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_rethook: // Dispatch target for return hooks. + | bti_jump | ldrb TMP2w, GL->hookmask | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active? |5: // Re-dispatch to static ins. @@ -1832,6 +1857,7 @@ static void build_subroutines(BuildCtx *ctx) | br_auth TMP0 | |->vm_inshook: // Dispatch target for instr/line hooks. + | bti_jump | ldrb TMP2w, GL->hookmask | ldr TMP3w, GL->hookcount | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active? @@ -1858,6 +1884,7 @@ static void build_subroutines(BuildCtx *ctx) | br_auth TMP0 | |->cont_hook: // Continue from hook yield. + | bti_jump | ldr CARG1, [CARG4, #-40] | add PC, PC, #4 | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins. @@ -1881,6 +1908,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_callhook: // Dispatch target for call hooks. + | bti_jump | mov CARG2, PC |.if JIT | b >1 @@ -1910,6 +1938,7 @@ static void build_subroutines(BuildCtx *ctx) |->cont_stitch: // Trace stitching. |.if JIT | // RA = resultptr, CARG4 = meta base + | bti_jump | ldr RBw, SAVE_MULTRES | ldr INSw, [PC, #-4] | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. @@ -1958,6 +1987,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_profhook: // Dispatch target for profiler hook. #if LJ_HASPROFILE + | bti_jump | mov CARG1, L | str BASE, L->base | mov CARG2, PC @@ -1979,6 +2009,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_exit_handler: |.if JIT + | bti_call | sub sp, sp, #(64*8) | savex_, 0, 1 | savex_, 2, 3 @@ -2029,6 +2060,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_exit_interp: + | bti_jump | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. |.if JIT | ldr L, SAVE_L @@ -2106,6 +2138,7 @@ static void build_subroutines(BuildCtx *ctx) | | // int lj_vm_modi(int dividend, int divisor); |->vm_modi: + | bti_call | eor CARG4w, CARG1w, CARG2w | cmp CARG4w, #0 | eor CARG3w, CARG1w, CARG1w, asr #31 @@ -2142,6 +2175,7 @@ static void build_subroutines(BuildCtx *ctx) |// Next idx returned in CRET2w. |->vm_next: |.if JIT + | bti_call | ldr NEXT_LIM, NEXT_TAB->asize | ldr NEXT_TMP1, NEXT_TAB->array |1: // Traverse array part. @@ -2286,6 +2320,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |=>defop: switch (op) { +#if !LJ_HASJIT + case BC_FORL: + case BC_JFORI: + case BC_JFORL: + case BC_ITERL: + case BC_JITERL: + case BC_LOOP: + case BC_JLOOP: + case BC_FUNCF: + case BC_JFUNCF: + case BC_JFUNCV: +#endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + break; /* Avoid redundant bti instructions. */ + default: + | bti_jump + break; + } + + switch (op) { /* -- Comparison ops ---------------------------------------------------- */ @@ -4122,6 +4176,19 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.align 3\n" ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); #endif +#endif +#if LJ_TARGET_LINUX && LJ_ABI_BRANCH_TRACK + fprintf(ctx->fp, + "\t.section .note.gnu.property,\"a\"\n" + "\t.align 3\n" + "\t.long 4\n" + "\t.long 16\n" + "\t.long 5\n" + "\t.long 0x00554e47\n" + "\t.long 0xc0000000\n" + "\t.long 4\n" + "\t.long 1\n" + "\t.long 0\n"); #endif break; #if !LJ_NO_UNWIND From 864e78d66cb21335823c7782fa21beae8e7914b0 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 28 Oct 2025 04:59:18 +0100 Subject: [PATCH 24/73] Windows: Fix lua52compat option for msvcbuild.bat. Thanks to Alex Orlenko. #1395 #1366 --- src/msvcbuild.bat | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index d6aed17009..3f32e1a0e5 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat @@ -6,8 +6,8 @@ @rem options (in order), if needed. The default is a dynamic release build. @rem @rem nogc64 disable LJ_GC64 mode for x64 -@rem debug emit debug symbols @rem lua52compat enable extra Lua 5.2 extensions +@rem debug emit debug symbols @rem amalg amalgamated build @rem static create static lib to statically link into your project @rem mixed create static lib to build a DLL in your project @@ -19,7 +19,7 @@ @set DEBUGCFLAGS= @set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /D_CRT_STDIO_INLINE=__declspec(dllexport)__inline @set LJDYNBUILD=/DLUA_BUILD_AS_DLL /MD -@set LJDYNBUILD_DEBUG=/DLUA_BUILD_AS_DLL /MDd +@set LJDYNBUILD_DEBUG=/DLUA_BUILD_AS_DLL /MDd @set LJCOMPILETARGET=/Zi @set LJLINKTYPE=/DEBUG /RELEASE @set LJLINKTYPE_DEBUG=/DEBUG @@ -65,6 +65,10 @@ if exist minilua.exe.manifest^ @set DASC=vm_x86.dasc @set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64 :DA +@if "%1" neq "lua52compat" goto :NOLUA52COMPAT +@shift +@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_LUA52COMPAT +:NOLUA52COMPAT minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% @if errorlevel 1 goto :BAD @@ -102,10 +106,6 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @set LJDYNBUILD=%LJDYNBUILD_DEBUG% @set LJLINKTYPE=%LJLINKTYPE_DEBUG% :NODEBUG -@if "%1" neq "lua52compat" goto :NOLUA52COMPAT -@shift -@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_LUA52COMPAT -:NOLUA52COMPAT @set LJCOMPILE=%LJCOMPILE% %LJCOMPILETARGET% @set LJLINK=%LJLINK% %LJLINKTYPE% %LJLINKTARGET% @if "%1"=="amalg" goto :AMALGDLL From 5b20d6e305b67765de357137105f5af007bac705 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 29 Oct 2025 09:38:31 +0100 Subject: [PATCH 25/73] ARM64: Fix ARM BTI. Reported by Yuichiro Naito. #1400 --- src/vm_arm64.dasc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 85d38de384..a437b65766 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -3497,6 +3497,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif |->vm_IITERN: | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + |.if JIT + | bti_jump + |.endif | add RA, BASE, RA, lsl #3 | ldr TAB:RB, [RA, #-16] | ldrh TMP3w, [PC, # OFS_RD] From 8518c0b40b1734901de888a0a363450c0709d3f8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 29 Oct 2025 09:38:41 +0100 Subject: [PATCH 26/73] x64: Fix CET IBT. Reported by Yuichiro Naito. #1400 --- src/vm_x64.dasc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 2e9f05056d..4cfb7b6ad2 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -4140,6 +4140,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | hotloop RBd |.endif |->vm_IITERN: + |.if JIT + | endbr + |.endif | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | mov TAB:RB, [BASE+RA*8-16] | cleartp TAB:RB From 3c7b158b799405545775f7ec52e17019fcf6ace8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 6 Nov 2025 00:30:22 +0100 Subject: [PATCH 27/73] ARM64: Fix disassembly of >2GB branch targets. --- src/jit/dis_arm64.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua index 944f1a6ced..facc6e4a1e 100644 --- a/src/jit/dis_arm64.lua +++ b/src/jit/dis_arm64.lua @@ -923,7 +923,7 @@ local function disass_ins(ctx) elseif p == "B" then local addr = ctx.addr + pos + parse_immpc(op, name) ctx.rel = addr - x = "0x"..tohex(addr) + x = format("0x%08x", addr) elseif p == "T" then x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31)) elseif p == "V" then From 68354f444728ef99bb51bb4d86e8f1b40853a898 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 6 Nov 2025 00:42:02 +0100 Subject: [PATCH 28/73] Allow mcode allocations outside of the jump range to the support code. Thank you for your patience. #285 --- doc/running.html | 4 +- src/lib_jit.c | 15 ++- src/lj_arch.h | 3 +- src/lj_asm.c | 6 +- src/lj_asm_arm.h | 76 +++++++++---- src/lj_asm_arm64.h | 56 +++++++--- src/lj_asm_mips.h | 60 +++++++--- src/lj_asm_ppc.h | 68 +++++++---- src/lj_asm_x86.h | 88 +++++++++------ src/lj_emit_arm.h | 11 +- src/lj_emit_mips.h | 3 + src/lj_emit_ppc.h | 3 + src/lj_emit_x86.h | 13 ++- src/lj_jit.h | 23 ++-- src/lj_mcode.c | 255 ++++++++++++++++++++++++++---------------- src/lj_target_arm.h | 2 + src/lj_target_arm64.h | 1 + src/lj_target_ppc.h | 1 + src/lj_trace.c | 17 ++- 19 files changed, 477 insertions(+), 228 deletions(-) diff --git a/doc/running.html b/doc/running.html index f71eee42f6..56d4c7bfbe 100644 --- a/doc/running.html +++ b/doc/running.html @@ -299,9 +299,9 @@

-O[level]
recunroll2Min. unroll factor for true recursion -sizemcode32Size of each machine code area in KBytes (Windows: 64K) +sizemcode64Size of each machine code area in KBytes -maxmcode512Max. total size of all machine code areas in KBytes +maxmcode2048Max. total size of all machine code areas in KBytes
diff --git a/src/lib_jit.c b/src/lib_jit.c index fd8e585b83..1b74d957b5 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -479,12 +479,21 @@ static int jitopt_param(jit_State *J, const char *str) size_t len = *(const uint8_t *)lst; lj_assertJ(len != 0, "bad JIT_P_STRING"); if (strncmp(str, lst+1, len) == 0 && str[len] == '=') { - int32_t n = 0; + uint32_t n = 0; const char *p = &str[len+1]; while (*p >= '0' && *p <= '9') n = n*10 + (*p++ - '0'); - if (*p) return 0; /* Malformed number. */ - J->param[i] = n; + if (*p || (int32_t)n < 0) return 0; /* Malformed number. */ + if (i == JIT_P_sizemcode) { /* Adjust to required range here. */ +#if LJ_TARGET_JUMPRANGE + uint32_t maxkb = ((1 << (LJ_TARGET_JUMPRANGE - 10)) - 64); +#else + uint32_t maxkb = ((1 << (31 - 10)) - 64); +#endif + n = (n + (LJ_PAGESIZE >> 10) - 1) & ~((LJ_PAGESIZE >> 10) - 1); + if (n > maxkb) n = maxkb; + } + J->param[i] = (int32_t)n; if (i == JIT_P_hotloop) lj_dispatch_init_hotcount(J2G(J)); return 1; /* Ok. */ diff --git a/src/lj_arch.h b/src/lj_arch.h index 6d1a92714c..799f9c6cc3 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -301,6 +301,7 @@ #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_TARGET_GC64 1 +#define LJ_PAGESIZE 16384 #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL #define LJ_ARCH_VERSION 80 @@ -456,7 +457,7 @@ #define LJ_TARGET_MIPS 1 #define LJ_TARGET_EHRETREG 4 #define LJ_TARGET_EHRAREG 31 -#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ +#define LJ_TARGET_JUMPRANGE 28 /* 2^28 = 256MB-aligned region */ #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ diff --git a/src/lj_asm.c b/src/lj_asm.c index 8f558a0392..0e888c294a 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -93,6 +93,10 @@ typedef struct ASMState { MCode *invmcp; /* Points to invertible loop branch (or NULL). */ MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ MCode *realign; /* Realign loop if not NULL. */ + MCode *mctail; /* Tail of trace before stack adjust + jmp. */ +#if LJ_TARGET_PPC || LJ_TARGET_ARM64 + MCode *mcexit; /* Pointer to exit stubs. */ +#endif #ifdef LUAJIT_RANDOM_RA /* Randomize register allocation. OK for fuzz testing, not for production. */ @@ -2541,7 +2545,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) RA_DBGX((as, "===== STOP =====")); /* General trace setup. Emit tail of trace. */ - asm_tail_prep(as); + asm_tail_prep(as, T->link); as->mcloop = NULL; as->flagmcp = NULL; as->topslot = 0; diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 24deaeae27..406360d26a 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -79,18 +79,43 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) /* Generate an exit stub group at the bottom of the reserved MCode memory. */ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) { + ExitNo i; + int ind = 0; + MCode *target = (MCode *)(void *)lj_vm_exit_handler; MCode *mxp = as->mcbot; - int i; - if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop) + if (mxp + 6+EXITSTUBS_PER_GROUP >= as->mctop) asm_mclimit(as); - /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */ - *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP); - *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu); - mxp++; + if ((((target - mxp - 2) + 0x00800000u) >> 24) == 0) { + /* str lr, [sp]; bl ->vm_exit_handler; + ** .long DISPATCH_address, group. + */ + *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP); + *mxp = ARMI_BL | ((target - mxp - 2) & 0x00ffffffu); mxp++; + } else if ((as->flags & JIT_F_ARMV6T2)) { + /* + ** str lr, [sp]; movw/movt lr, vm_exit_handler; blx lr; + ** .long DISPATCH_address, group; + */ + *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP); + *mxp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR); + *mxp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR); + *mxp++ = ARMI_BLXr | ARMF_M(RID_LR); + ind = 2; + } else { + /* .long vm_exit_handler; + ** str lr, [sp]; ldr lr, [pc, #-16]; blx lr; + ** .long DISPATCH_address, group; + */ + *mxp++ = (MCode)target; + *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP); + *mxp++ = ARMI_LDRL | ARMF_D(RID_LR) | 16; + *mxp++ = ARMI_BLXr | ARMF_M(RID_LR); + ind = 1; + } *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */ *mxp++ = group*EXITSTUBS_PER_GROUP; for (i = 0; i < EXITSTUBS_PER_GROUP; i++) - *mxp++ = ARMI_B|((-6-i)&0x00ffffffu); + *mxp++ = ARMI_B | ((-6-ind-i) & 0x00ffffffu); lj_mcode_sync(as->mcbot, mxp); lj_mcode_commitbot(as->J, mxp); as->mcbot = mxp; @@ -2210,33 +2235,46 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *p = as->mctop; - MCode *target; + MCode *target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp; + MCode *mcp = as->mctail; int32_t spadj = as->T->spadjust; - if (spadj == 0) { - as->mctop = --p; - } else { - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ uint32_t k = emit_isk12(ARMI_ADD, spadj); lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); - p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); + *mcp++ = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); + } + if ((((target - mcp - 2) + 0x00800000u) >> 24) == 0) { + *mcp = ARMI_B | ((target - mcp - 2) & 0x00ffffffu); mcp++; + } else if ((as->flags & JIT_F_ARMV6T2)) { + *mcp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR); + *mcp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR); + *mcp++ = ARMI_BX | ARMF_M(RID_LR); + } else { + *mcp++ = ARMI_LDRL | ARMI_LS_U | ARMF_D(RID_LR) | 0; + *mcp++ = ARMI_BX | ARMF_M(RID_LR); + *mcp++ = (MCode)target; } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu); + while (as->mctop > mcp) *--as->mctop = ARMI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop - 1; /* Leave room for exit branch. */ if (as->loopref) { as->invmcp = as->mcp = p; } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + if (!lnk) { + MCode *target = (MCode *)(void *)lj_vm_exit_interp; + if ((((target - p - 2) + 0x00800000u) >> 24) || + (((target - p - 1) + 0x00800000u) >> 24)) p -= 2; + } + p--; /* Leave room for stack pointer adjustment. */ + as->mcp = p; as->invmcp = NULL; } *p = 0; /* Prevent load/store merging. */ + as->mctail = p; } /* -- Trace setup --------------------------------------------------------- */ diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 4feaa3b0c2..085f935728 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -51,15 +51,27 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) static void asm_exitstub_setup(ASMState *as, ExitNo nexits) { ExitNo i; + int ind; + MCode *target = (MCode *)(void *)lj_vm_exit_handler; MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) + if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) asm_mclimit(as); - /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ + ind = !A64F_S_OK(target - (mxp - nexits - 2), 26); + /* !ind: 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; + ** ind: 1: str lr,[sp]; ldr lr, [gl, K64_VXH]; blr lr; movz w0,traceno; + ** bl <1; bl <1; ... + */ for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i)); + *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-ind-i)); + as->mcexit = mxp; *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno)); - mxp--; - *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp))); + if (ind) { + *--mxp = A64I_LE(A64I_BLR_AUTH | A64F_N(RID_LR)); + *--mxp = A64I_LE(A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]) >> 3)); + } else { + mxp--; + *mxp = A64I_LE(A64I_BL | A64F_S26(target-mxp)); + } *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP)); as->mctop = mxp; } @@ -67,7 +79,7 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) { /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; + return as->mcexit + exitno; } /* Emit conditional branch to exit for guard. */ @@ -1917,34 +1929,42 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *p = as->mctop; + MCode *mcp = as->mctail; MCode *target; /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); - if (spadj == 0) { - *--p = A64I_LE(A64I_NOP); - as->mctop = p; - } else { - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ uint32_t k = emit_isk12(spadj); lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); - p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); + *mcp++ = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = A64I_B | A64F_S26((target-p)+1); + /* Emit exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp; + if (lnk || A64F_S_OK(target - mcp, 26)) { + *mcp = A64I_B | A64F_S26(target - mcp); mcp++; + } else { + *mcp++ = A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]) >> 3); + *mcp++ = A64I_BR_AUTH | A64F_N(RID_LR); + } + while (as->mctop > mcp) *--as->mctop = A64I_LE(A64I_NOP); } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop - 1; /* Leave room for exit branch. */ if (as->loopref) { as->invmcp = as->mcp = p; } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + if (!lnk) { + MCode *target = (MCode *)(void *)lj_vm_exit_interp; + if (!A64F_S_OK(target - p, 26) || !A64F_S_OK(target - (p+1), 26)) p--; + } + p--; /* Leave room for stack pointer adjustment. */ + as->mcp = p; as->invmcp = NULL; } + as->mctail = p; *p = 0; /* Prevent load/store merging. */ } diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index af0e714f15..8dadabe4a0 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -92,13 +92,23 @@ static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump) /* Setup exit stub after the end of each trace. */ static void asm_exitstub_setup(ASMState *as) { + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler; MCode *mxp = as->mctop; - /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ - *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; - *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); - lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0, - "branch target out of range"); - *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; + *--mxp = MIPSI_LI | MIPSF_T(RID_TMP) | as->T->traceno; + if (((uintptr_t)(mxp-1) ^ target) >> 28 == 0) { + /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ + *--mxp = MIPSI_J | ((target >> 2) & 0x03ffffffu); + } else { + /* sw TMP, 0(sp); li TMP, K*_VXH(jgl); jr TMP ; li TMP, traceno */ + *--mxp = MIPSI_JR | MIPSF_S(RID_TMP); + *--mxp = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) | +#if LJ_64 + jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]); +#else + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]); +#endif + } + *--mxp = MIPSI_SW | MIPSF_T(RID_TMP) | MIPSF_S(RID_SP) | 0; as->mctop = mxp; } @@ -428,7 +438,8 @@ static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) { /* The modified regs must match with the *.dasc implementation. */ RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| - RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR) + RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)| + RID2RSET(RID_CFUNCADDR) #if LJ_TARGET_MIPSR6 |RID2RSET(RID_F21) #endif @@ -514,7 +525,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg r) { /* The modified regs must match with the *.dasc implementation. */ RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| - RID2RSET(RID_R1)|RID2RSET(RID_R12); + RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_CFUNCADDR); if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ra_evictset(as, drop); /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ @@ -2699,18 +2710,37 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; + uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp; + MCode *mcp = as->mctail; int32_t spadj = as->T->spadjust; - MCode *p = as->mctop-1; - *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; - p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); + if (((uintptr_t)mcp ^ target) >> 28 == 0) { + *mcp++ = MIPSI_J | ((target >> 2) & 0x03ffffffu); + } else { + *mcp++ = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) | +#if LJ_64 + jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]); +#else + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]); +#endif + *mcp++ = MIPSI_JR | MIPSF_S(RID_TMP); + } + *mcp++ = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { - as->mcp = as->mctop-2; /* Leave room for branch plus nop or stack adj. */ - as->invmcp = as->loopref ? as->mcp : NULL; + as->mcp = as->mctop - 2; /* Leave room for branch plus nop or stack adj. */ + if (as->loopref) { + as->invmcp = as->mcp; + } else { + if (!lnk) { + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp; + if (((uintptr_t)as->mcp ^ target) >> 28 != 0) as->mcp--; + } + as->invmcp = NULL; + } + as->mctail = as->mcp; } /* -- Trace setup --------------------------------------------------------- */ diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index df1ac42f7a..d77c45ce9b 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -48,23 +48,38 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) static void asm_exitstub_setup(ASMState *as, ExitNo nexits) { ExitNo i; + int ind; + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler; MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) + if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) asm_mclimit(as); - /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */ + ind = ((target - (uintptr_t)(mxp - nexits - 2) + 0x02000000u) >> 26) ? 2 : 0; + /* !ind: 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; + ** ind: 1: lwz r0, K32_VXH(jgl); mtctr r0; mflr r0; bctrl; li r0, traceno; + ** bl <1; bl <1; ... + */ for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2); + *--mxp = PPCI_BL | (((-3-ind-i) & 0x00ffffffu) << 2); + as->mcexit = mxp; *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */ - mxp--; - *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2); - *--mxp = PPCI_MFLR|PPCF_T(RID_TMP); + if (ind) { + *--mxp = PPCI_BCTRL; + *--mxp = PPCI_MFLR | PPCF_T(RID_TMP); + *--mxp = PPCI_MTCTR | PPCF_T(RID_TMP); + *--mxp = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) | + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]); + } else { + mxp--; + *mxp = PPCI_BL | ((target - (uintptr_t)mxp) & 0x03fffffcu); + *--mxp = PPCI_MFLR | PPCF_T(RID_TMP); + } as->mctop = mxp; } static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) { /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; + return as->mcexit + exitno; } /* Emit conditional branch to exit for guard. */ @@ -2218,34 +2233,43 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *p = as->mctop; - MCode *target; + uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp; + MCode *mcp = as->mctail; int32_t spadj = as->T->spadjust; - if (spadj == 0) { - *--p = PPCI_NOP; - *--p = PPCI_NOP; - as->mctop = p; - } else { - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range"); - p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); - p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; + *mcp++ = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); + *mcp++ = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); + /* Emit exit branch. */ + if ((((target - (uintptr_t)mcp) + 0x02000000u) >> 26) == 0) { + *mcp = PPCI_B | ((target - (uintptr_t)mcp) & 0x03fffffcu); mcp++; + } else { + *mcp++ = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) | + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]); + *mcp++ = PPCI_MTCTR | PPCF_T(RID_TMP); + *mcp++ = PPCI_BCTR; + } + while (as->mctop > mcp) *--as->mctop = PPCI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop - 1; /* Leave room for exit branch. */ if (as->loopref) { as->invmcp = as->mcp = p; } else { - as->mcp = p-2; /* Leave room for stack pointer adjustment. */ + if (!lnk) { + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp; + if ((((target - (uintptr_t)p) + 0x02000000u) >> 26) || + (((target - (uintptr_t)(p-2)) + 0x02000000u) >> 26)) p -= 2; + } + p -= 2; /* Leave room for stack pointer adjustment. */ + as->mcp = p; as->invmcp = NULL; } + as->mctail = p; } /* -- Trace setup --------------------------------------------------------- */ diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 774e77b433..f3c2238a2f 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -9,9 +9,12 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) { ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff; + MCode *target = (MCode *)(void *)lj_vm_exit_handler; MCode *mxp = as->mcbot; MCode *mxpstart = mxp; - if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop) + if (mxp + ((2+2)*EXITSTUBS_PER_GROUP + + (LJ_GC64 ? 0 : 8) + + (LJ_64 ? 6 : 5)) >= as->mctop) asm_mclimit(as); /* Push low byte of exitno for each exit stub. */ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs; @@ -30,8 +33,13 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; #endif /* Jump to exit handler which fills in the ExitState. */ - *mxp++ = XI_JMP; mxp += 4; - *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler); + if (jmprel_ok(mxp + 5, target)) { /* Direct jump. */ + *mxp++ = XI_JMP; mxp += 4; + *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, target); + } else { /* RIP-relative indirect jump. */ + *mxp++ = XI_GROUP5; *mxp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mxp += 4; + *((int32_t *)(mxp-4)) = (int32_t)((group ? as->J->exitstubgroup[0] : mxpstart) - 8 - mxp); + } /* Commit the code for this group (even if assembly fails later on). */ lj_mcode_commitbot(as->J, mxp); as->mcbot = mxp; @@ -45,6 +53,16 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) ExitNo i; if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) lj_trace_err(as->J, LJ_TRERR_SNAPOV); +#if LJ_64 + if (as->J->exitstubgroup[0] == NULL) { + /* Store the two potentially out-of-range targets below group 0. */ + MCode *mxp = as->mcbot; + while ((uintptr_t)mxp & 7) *mxp++ = XI_INT3; + *((void **)mxp) = (void *)lj_vm_exit_interp; mxp += 8; + *((void **)mxp) = (void *)lj_vm_exit_handler; mxp += 8; + as->mcbot = mxp; /* Don't bother to commit, done in asm_exitstub_gen. */ + } +#endif for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) if (as->J->exitstubgroup[i] == NULL) as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); @@ -396,7 +414,7 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) "bad interned 64 bit constant"); } else { while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; - *(uint64_t*)as->mcbot = *k; + *(uint64_t *)as->mcbot = *k; ir->i = (int32_t)(as->mctop - as->mcbot); as->mcbot += 8; as->mclim = as->mcbot + MCLIM_REDZONE; @@ -728,7 +746,7 @@ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) p = (MCode *)(void *)ir_k64(irf)->u64; else p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i; - if (p - as->mcp == (int32_t)(p - as->mcp)) + if (jmprel_ok(p, as->mcp)) return p; /* Call target is still in +-2GB range. */ /* Avoid the indirect case of emit_call(). Try to hoist func addr. */ } @@ -2806,6 +2824,8 @@ static void asm_gc_check(ASMState *as) emit_rr(as, XO_TEST, RID_RET, RID_RET); args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ASMREF_TMP2; /* MSize steps */ + /* Insert nop to simplify GC exit recognition in lj_asm_patchexit. */ + if (!jmprel_ok(as->mcp, (MCode *)(void *)ci->func)) *--as->mcp = XI_NOP; asm_gencall(as, ci, args); tmp = ra_releasetmp(as, ASMREF_TMP1); #if LJ_GC64 @@ -2919,40 +2939,36 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) static void asm_tail_fixup(ASMState *as, TraceNo lnk) { /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */ - MCode *p = as->mctop; - MCode *target, *q; + MCode *mcp = as->mctail; + MCode *target; int32_t spadj = as->T->spadjust; - if (spadj == 0) { - p -= LJ_64 ? 7 : 6; - } else { - MCode *p1; - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ + if (LJ_64) *mcp++ = 0x48; if (checki8(spadj)) { - p -= 3; - p1 = p-6; - *p1 = (MCode)spadj; + *mcp++ = XI_ARITHi8; + *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP); + *mcp++ = (MCode)spadj; } else { - p1 = p-9; - *(int32_t *)p1 = spadj; + *mcp++ = XI_ARITHi; + *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP); + *(int32_t *)mcp = spadj; mcp += 4; } -#if LJ_64 - p1[-3] = 0x48; -#endif - p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); - p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - *(int32_t *)(p-4) = jmprel(as->J, p, target); - p[-5] = XI_JMP; + /* Emit exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp; + if (lnk || jmprel_ok(mcp + 5, target)) { /* Direct jump. */ + *mcp++ = XI_JMP; mcp += 4; + *(int32_t *)(mcp-4) = jmprel(as->J, mcp, target); + } else { /* RIP-relative indirect jump. */ + *mcp++ = XI_GROUP5; *mcp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mcp += 4; + *((int32_t *)(mcp-4)) = (int32_t)(as->J->exitstubgroup[0] - 16 - mcp); + } /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ - for (q = as->mctop-1; q >= p; q--) - *q = XI_NOP; - as->mctop = p; + while (as->mctop > mcp) *--as->mctop = XI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop; /* Realign and leave room for backwards loop branch or exit branch. */ @@ -2964,15 +2980,17 @@ static void asm_tail_prep(ASMState *as) as->mctop = p; p -= (as->loopinv ? 5 : 2); /* Space for short/near jmp. */ } else { - p -= 5; /* Space for exit branch (near jmp). */ + p -= (LJ_64 && !lnk) ? 6 : 5; /* Space for exit branch. */ } if (as->loopref) { as->invmcp = as->mcp = p; } else { - /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ - as->mcp = p - (LJ_64 ? 7 : 6); + /* Leave room for ESP adjustment: add esp, imm */ + p -= LJ_64 ? 7 : 6; + as->mcp = p; as->invmcp = NULL; } + as->mctail = p; } /* -- Trace setup --------------------------------------------------------- */ @@ -3132,6 +3150,10 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) } else if (*p == XI_CALL && (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) { pgc = p+7; /* Do not patch GC check exit. */ + } else if (LJ_64 && *p == 0xff && + p[1] == MODRM(XM_REG, XOg_CALL, RID_RET) && + p[2] == XI_NOP) { + pgc = p+5; /* Do not patch GC check exit. */ } } lj_mcode_sync(T->mcode, T->mcode + T->szmcode); diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index c60e7d7560..3e1eb64bfc 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h @@ -173,6 +173,11 @@ static int emit_kdelta2(ASMState *as, Reg rd, int32_t i) return 0; /* Failed. */ } +#define emit_movw_k(k) \ + (ARMI_MOVW | ((k) & 0x0fffu) | (((k) & 0xf000u) << 4)) +#define emit_movt_k(k) \ + (ARMI_MOVT | (((k) >> 16) & 0x0fffu) | ((((k) >> 16) & 0xf000u) << 4)) + /* Load a 32 bit constant into a GPR. */ static void emit_loadi(ASMState *as, Reg rd, int32_t i) { @@ -184,13 +189,13 @@ static void emit_loadi(ASMState *as, Reg rd, int32_t i) emit_d(as, ARMI_MOV^k, rd); } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { /* 16 bit loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + emit_d(as, emit_movw_k(i), rd); } else if (emit_kdelta1(as, rd, i)) { /* One step delta relative to another constant. */ } else if ((as->flags & JIT_F_ARMV6T2)) { /* 32 bit hiword/loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd); - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + emit_d(as, emit_movt_k(i), rd); + emit_d(as, emit_movw_k(i), rd); } else if (emit_kdelta2(as, rd, i)) { /* Two step delta relative to another constant. */ } else { diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index d8104959aa..d65b1c5777 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h @@ -80,6 +80,9 @@ static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, /* -- Emit loads/stores --------------------------------------------------- */ +#define jglofs(as, k) \ + (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff) + /* Prefer rematerialization of BASE/L from global_State over spills. */ #define emit_canremat(ref) ((ref) <= REF_BASE) diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h index b13f00fe5b..56928e4235 100644 --- a/src/lj_emit_ppc.h +++ b/src/lj_emit_ppc.h @@ -53,6 +53,9 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) /* -- Emit loads/stores --------------------------------------------------- */ +#define jglofs(as, k) \ + (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff) + /* Prefer rematerialization of BASE/L from global_State over spills. */ #define emit_canremat(ref) ((ref) <= REF_BASE) diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index 5fd6cfa7eb..858fe753be 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -478,6 +478,17 @@ static void emit_sfixup(ASMState *as, MCLabel source) /* Return label pointing to current PC. */ #define emit_label(as) ((as)->mcp) +/* Check if two adresses are in relative jump range. */ +static LJ_AINLINE int jmprel_ok(MCode *a, MCode *b) +{ +#if LJ_64 + return a - b == (int32_t)(a - b); +#else + UNUSED(a); UNUSED(b); + return 1; +#endif +} + /* Compute relative 32 bit offset for jump and call instructions. */ static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target) { @@ -511,7 +522,7 @@ static void emit_call_(ASMState *as, MCode *target) { MCode *p = as->mcp; #if LJ_64 - if (target-p != (int32_t)(target-p)) { + if (!jmprel_ok(target, p)) { /* Assumes RID_RET is never an argument to calls and always clobbered. */ emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET); emit_loadu64(as, RID_RET, (uint64_t)target); diff --git a/src/lj_jit.h b/src/lj_jit.h index 102ba0b4b7..05a8e9bbe9 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -104,14 +104,6 @@ /* -- JIT engine parameters ----------------------------------------------- */ -#if LJ_TARGET_WINDOWS || LJ_64 -/* See: https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 */ -#define JIT_P_sizemcode_DEFAULT 64 -#else -/* Could go as low as 4K, but the mmap() overhead would be rather high. */ -#define JIT_P_sizemcode_DEFAULT 32 -#endif - /* Optimization parameters and their defaults. Length is a char in octal! */ #define JIT_PARAMDEF(_) \ _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \ @@ -131,9 +123,9 @@ _(\011, recunroll, 2) /* Min. unroll for true recursion. */ \ \ /* Size of each machine code area (in KBytes). */ \ - _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \ + _(\011, sizemcode, 64) \ /* Max. total size of all machine code areas (in KBytes). */ \ - _(\010, maxmcode, 512) \ + _(\010, maxmcode, 2048) \ /* End of list. */ enum { @@ -374,10 +366,14 @@ enum { LJ_K64_2P63, /* 2^63 */ LJ_K64_M2P64, /* -2^64 */ #endif +#endif +#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 + LJ_K64_VM_EXIT_HANDLER, + LJ_K64_VM_EXIT_INTERP, #endif LJ_K64__MAX, }; -#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS) +#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS) enum { #if LJ_TARGET_X86ORX64 @@ -393,6 +389,10 @@ enum { #if LJ_TARGET_MIPS64 LJ_K32_2P63, /* 2^63 */ LJ_K32_M2P64, /* -2^64 */ +#endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS32 + LJ_K32_VM_EXIT_HANDLER, + LJ_K32_VM_EXIT_INTERP, #endif LJ_K32__MAX }; @@ -513,6 +513,7 @@ typedef struct jit_State { MCode *mcbot; /* Bottom of current mcode area. */ size_t szmcarea; /* Size of current mcode area. */ size_t szallmcarea; /* Total size of all allocated mcode areas. */ + uintptr_t mcmin, mcmax; /* Mcode allocation range. */ TValue errinfo; /* Additional info element for trace errors. */ diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 2b8ac2df58..c3032f4e2d 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -63,31 +63,46 @@ void lj_mcode_sync(void *start, void *end) #if LJ_HASJIT +#if LUAJIT_SECURITY_MCODE != 0 +/* Protection twiddling failed. Probably due to kernel security. */ +static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J) +{ + lua_CFunction panic = J2G(J)->panic; + if (panic) { + lua_State *L = J->L; + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT)); + panic(L); + } + exit(EXIT_FAILURE); +} +#endif + #if LJ_TARGET_WINDOWS #define MCPROT_RW PAGE_READWRITE #define MCPROT_RX PAGE_EXECUTE_READ #define MCPROT_RWX PAGE_EXECUTE_READWRITE -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) +static void *mcode_alloc_at(uintptr_t hint, size_t sz, DWORD prot) { - void *p = LJ_WIN_VALLOC((void *)hint, sz, - MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); - if (!p && !hint) - lj_trace_err(J, LJ_TRERR_MCODEAL); - return p; + return LJ_WIN_VALLOC((void *)hint, sz, + MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); } -static void mcode_free(jit_State *J, void *p, size_t sz) +static void mcode_free(void *p, size_t sz) { - UNUSED(J); UNUSED(sz); + UNUSED(sz); VirtualFree(p, 0, MEM_RELEASE); } -static int mcode_setprot(void *p, size_t sz, DWORD prot) +static void mcode_setprot(jit_State *J, void *p, size_t sz, DWORD prot) { +#if LUAJIT_SECURITY_MCODE != 0 DWORD oprot; - return !LJ_WIN_VPROTECT(p, sz, prot, &oprot); + if (!LJ_WIN_VPROTECT(p, sz, prot, &oprot)) mcode_protfail(J); +#else + UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot); +#endif } #elif LJ_TARGET_POSIX @@ -117,33 +132,33 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) #define MCPROT_CREATE 0 #endif -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) +static void *mcode_alloc_at(uintptr_t hint, size_t sz, int prot) { void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS|MCMAP_CREATE, -1, 0); - if (p == MAP_FAILED) { - if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); - p = NULL; + if (p == MAP_FAILED) return NULL; #if MCMAP_CREATE - } else { - pthread_jit_write_protect_np(0); + pthread_jit_write_protect_np(0); #endif - } return p; } -static void mcode_free(jit_State *J, void *p, size_t sz) +static void mcode_free(void *p, size_t sz) { - UNUSED(J); munmap(p, sz); } -static int mcode_setprot(void *p, size_t sz, int prot) +static void mcode_setprot(jit_State *J, void *p, size_t sz, int prot) { +#if LUAJIT_SECURITY_MCODE != 0 #if MCMAP_CREATE + UNUSED(J); UNUSED(p); UNUSED(sz); pthread_jit_write_protect_np((prot & PROT_EXEC)); return 0; #else - return mprotect(p, sz, prot); + if (mprotect(p, sz, prot)) mcode_protfail(J); +#endif +#else + UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot); #endif } @@ -153,6 +168,49 @@ static int mcode_setprot(void *p, size_t sz, int prot) #endif +#ifdef LUAJIT_MCODE_TEST +/* Test wrapper for mcode allocation. DO NOT ENABLE in production! Try: +** LUAJIT_MCODE_TEST=hhhhhhhhhhhhhhhh luajit -jv main.lua +** LUAJIT_MCODE_TEST=F luajit -jv main.lua +*/ +static void *mcode_alloc_at_TEST(jit_State *J, uintptr_t hint, size_t sz, int prot) +{ + static int test_ofs = 0; + static const char *test_str; + if (!test_str) { + test_str = getenv("LUAJIT_MCODE_TEST"); + if (!test_str) test_str = ""; + } + switch (test_str[test_ofs]) { + case 'a': /* OK for one allocation. */ + test_ofs++; + /* fallthrough */ + case '\0': /* EOS: OK for any further allocations. */ + break; + case 'h': /* Ignore one hint. */ + test_ofs++; + /* fallthrough */ + case 'H': /* Ignore any further hints. */ + hint = 0u; + break; + case 'r': /* Randomize one hint. */ + test_ofs++; + /* fallthrough */ + case 'R': /* Randomize any further hints. */ + hint = lj_prng_u64(&J2G(J)->prng) & ~(uintptr_t)0xffffu; + hint &= ((uintptr_t)1 << (LJ_64 ? 47 : 31)) - 1; + break; + case 'f': /* Fail one allocation. */ + test_ofs++; + /* fallthrough */ + default: /* 'F' or unknown: Fail any further allocations. */ + return NULL; + } + return mcode_alloc_at(hint, sz, prot); +} +#define mcode_alloc_at(hint, sz, prot) mcode_alloc_at_TEST(J, hint, sz, prot) +#endif + /* -- MCode area protection ----------------------------------------------- */ #if LUAJIT_SECURITY_MCODE == 0 @@ -174,7 +232,7 @@ static int mcode_setprot(void *p, size_t sz, int prot) static void mcode_protect(jit_State *J, int prot) { - UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot); + UNUSED(J); UNUSED(prot); } #else @@ -190,24 +248,11 @@ static void mcode_protect(jit_State *J, int prot) #define MCPROT_GEN MCPROT_RW #define MCPROT_RUN MCPROT_RX -/* Protection twiddling failed. Probably due to kernel security. */ -static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J) -{ - lua_CFunction panic = J2G(J)->panic; - if (panic) { - lua_State *L = J->L; - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT)); - panic(L); - } - exit(EXIT_FAILURE); -} - /* Change protection of MCode area. */ static void mcode_protect(jit_State *J, int prot) { if (J->mcprot != prot) { - if (LJ_UNLIKELY(mcode_setprot(J->mcarea, J->szmcarea, prot))) - mcode_protfail(J); + mcode_setprot(J, J->mcarea, J->szmcarea, prot); J->mcprot = prot; } } @@ -216,47 +261,74 @@ static void mcode_protect(jit_State *J, int prot) /* -- MCode area allocation ----------------------------------------------- */ -#if LJ_64 -#define mcode_validptr(p) (p) -#else -#define mcode_validptr(p) ((p) && (uintptr_t)(p) < 0xffff0000) -#endif - #ifdef LJ_TARGET_JUMPRANGE -/* Get memory within relative jump distance of our code in 64 bit mode. */ -static void *mcode_alloc(jit_State *J, size_t sz) +#define MCODE_RANGE64 ((1u << LJ_TARGET_JUMPRANGE) - 0x10000u) + +/* Set a memory range for mcode allocation with addr in the middle. */ +static void mcode_setrange(jit_State *J, uintptr_t addr) { - /* Target an address in the static assembler code (64K aligned). - ** Try addresses within a distance of target-range/2+1MB..target+range/2-1MB. - ** Use half the jump range so every address in the range can reach any other. - */ #if LJ_TARGET_MIPS - /* Use the middle of the 256MB-aligned region. */ - uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & - ~(uintptr_t)0x0fffffffu) + 0x08000000u; + /* Use the whole 256MB-aligned region. */ + J->mcmin = addr & ~(uintptr_t)((1u << LJ_TARGET_JUMPRANGE) - 1); + J->mcmax = J->mcmin + (1u << LJ_TARGET_JUMPRANGE); #else - uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; + /* Every address in the 64KB-aligned range should be able to reach + ** any other, so MCODE_RANGE64 is only half the (signed) branch range. + */ + J->mcmin = (addr - (MCODE_RANGE64 >> 1) + 0xffffu) & ~(uintptr_t)0xffffu; + J->mcmax = J->mcmin + MCODE_RANGE64; #endif - const uintptr_t range = (1u << (LJ_TARGET_JUMPRANGE-1)) - (1u << 21); - /* First try a contiguous area below the last one. */ - uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0; - int i; - /* Limit probing iterations, depending on the available pool size. */ - for (i = 0; i < LJ_TARGET_JUMPRANGE; i++) { - if (mcode_validptr(hint)) { - void *p = mcode_alloc_at(J, hint, sz, MCPROT_GEN); - - if (mcode_validptr(p) && - ((uintptr_t)p + sz - target < range || target - (uintptr_t)p < range)) - return p; - if (p) mcode_free(J, p, sz); /* Free badly placed area. */ - } - /* Next try probing 64K-aligned pseudo-random addresses. */ + /* Avoid wrap-around and the 64KB corners. */ + if (addr < J->mcmin || !J->mcmin) J->mcmin = 0x10000u; + if (addr > J->mcmax) J->mcmax = ~(uintptr_t)0xffffu; +} + +/* Check if an address is in range of the mcode allocation range. */ +static LJ_AINLINE int mcode_inrange(jit_State *J, uintptr_t addr, size_t sz) +{ + /* Take care of unsigned wrap-around of addr + sz, too. */ + return addr >= J->mcmin && addr + sz >= J->mcmin && addr + sz <= J->mcmax; +} + +/* Get memory within a specific jump range in 64 bit mode. */ +static void *mcode_alloc(jit_State *J, size_t sz) +{ + uintptr_t hint; + int i = 0, j; + if (!J->mcmin) /* Place initial range near the interpreter code. */ + mcode_setrange(J, (uintptr_t)(void *)lj_vm_exit_handler); + else if (!J->mcmax) /* Switch to a new range (already flushed). */ + goto newrange; + /* First try a contiguous area below the last one (if in range). */ + hint = (uintptr_t)J->mcarea - sz; + if (!mcode_inrange(J, hint, sz)) /* Also takes care of NULL J->mcarea. */ + goto probe; + for (; i < 16; i++) { + void *p = mcode_alloc_at(hint, sz, MCPROT_GEN); + if (mcode_inrange(J, (uintptr_t)p, sz)) + return p; /* Success. */ + else if (p) + mcode_free(p, sz); /* Free badly placed area. */ + probe: + /* Next try probing 64KB-aligned pseudo-random addresses. */ + j = 0; do { - hint = lj_prng_u64(&J2G(J)->prng) & ((1u<mcmin + (lj_prng_u64(&J2G(J)->prng) & MCODE_RANGE64); + if (++j > 15) goto fail; + } while (!mcode_inrange(J, hint, sz)); + } +fail: + if (!J->mcarea) { /* Switch to a new range now. */ + void *p; + newrange: + p = mcode_alloc_at(0, sz, MCPROT_GEN); + if (p) { + mcode_setrange(J, (uintptr_t)p + (sz >> 1)); + return p; /* Success. */ + } + } else { + J->mcmax = 0; /* Switch to a new range after the flush. */ } lj_trace_err(J, LJ_TRERR_MCODEAL); /* Give up. OS probably ignores hints? */ return NULL; @@ -269,15 +341,13 @@ static void *mcode_alloc(jit_State *J, size_t sz) { #if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP /* Allow better executable memory allocation for OpenBSD W^X mode. */ - void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); - if (p && mcode_setprot(p, sz, MCPROT_GEN)) { - mcode_free(J, p, sz); - return NULL; - } - return p; + void *p = mcode_alloc_at(0, sz, MCPROT_RUN); + if (p) mcode_setprot(J, p, sz, MCPROT_GEN); #else - return mcode_alloc_at(J, 0, sz, MCPROT_GEN); + void *p = mcode_alloc_at(0, sz, MCPROT_GEN); #endif + if (!p) lj_trace_err(J, LJ_TRERR_MCODEAL); + return p; } #endif @@ -289,7 +359,6 @@ static void mcode_allocarea(jit_State *J) { MCode *oldarea = J->mcarea; size_t sz = (size_t)J->param[JIT_P_sizemcode] << 10; - sz = (sz + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); J->mcarea = (MCode *)mcode_alloc(J, sz); J->szmcarea = sz; J->mcprot = MCPROT_GEN; @@ -311,7 +380,7 @@ void lj_mcode_free(jit_State *J) MCode *next = ((MCLink *)mc)->next; size_t sz = ((MCLink *)mc)->size; lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink)); - mcode_free(J, mc, sz); + mcode_free(mc, sz); mc = next; } } @@ -347,32 +416,25 @@ void lj_mcode_abort(jit_State *J) MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) { if (finish) { -#if LUAJIT_SECURITY_MCODE if (J->mcarea == ptr) mcode_protect(J, MCPROT_RUN); - else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN))) - mcode_protfail(J); -#endif + else + mcode_setprot(J, ptr, ((MCLink *)ptr)->size, MCPROT_RUN); return NULL; } else { - MCode *mc = J->mcarea; + uintptr_t base = (uintptr_t)J->mcarea, addr = (uintptr_t)ptr; /* Try current area first to use the protection cache. */ - if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) { -#if LUAJIT_SECURITY_MCODE + if (addr >= base && addr < base + J->szmcarea) { mcode_protect(J, MCPROT_GEN); -#endif - return mc; + return (MCode *)base; } /* Otherwise search through the list of MCode areas. */ for (;;) { - mc = ((MCLink *)mc)->next; - lj_assertJ(mc != NULL, "broken MCode area chain"); - if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) { -#if LUAJIT_SECURITY_MCODE - if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN))) - mcode_protfail(J); -#endif - return mc; + base = (uintptr_t)(((MCLink *)base)->next); + lj_assertJ(base != 0, "broken MCode area chain"); + if (addr >= base && addr < base + ((MCLink *)base)->size) { + mcode_setprot(J, (MCode *)base, ((MCLink *)base)->size, MCPROT_GEN); + return (MCode *)base; } } } @@ -384,7 +446,6 @@ void lj_mcode_limiterr(jit_State *J, size_t need) size_t sizemcode, maxmcode; lj_mcode_abort(J); sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10; - sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10; if (need * sizeof(MCode) > sizemcode) lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */ diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index d0bbc5a5fb..947545f821 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h @@ -190,6 +190,7 @@ typedef enum ARMIns { ARMI_LDRSB = 0xe01000d0, ARMI_LDRSH = 0xe01000f0, ARMI_LDRD = 0xe00000d0, + ARMI_LDRL = 0xe51f0000, ARMI_STR = 0xe4000000, ARMI_STRB = 0xe4400000, ARMI_STRH = 0xe00000b0, @@ -200,6 +201,7 @@ typedef enum ARMIns { ARMI_BL = 0xeb000000, ARMI_BLX = 0xfa000000, ARMI_BLXr = 0xe12fff30, + ARMI_BX = 0xe12fff10, /* ARMv6 */ ARMI_REV = 0xe6bf0f30, diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 30aff47882..3113d1410a 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h @@ -110,6 +110,7 @@ typedef struct { static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) { while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */ + if ((LJ_LE ? p[1] >> 28 : p[1] & 0xf) == 0xf) p++; /* Skip A64I_LDRx. */ return p + 3 + exitno; } /* Avoid dependence on lj_jit.h if only including lj_target.h. */ diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index 5a1b5a7cca..58f311884f 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h @@ -115,6 +115,7 @@ typedef struct { static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) { while (*p == 0x60000000) p++; /* Skip PPCI_NOP. */ + if (p[3] == 0x4e800421) p += 2; /* Indirect branch PPCI_BCTRL. */ return p + 3 + exitno; } /* Avoid dependence on lj_jit.h if only including lj_target.h. */ diff --git a/src/lj_trace.c b/src/lj_trace.c index 0e948e8d08..3e2cd0b393 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -343,6 +343,14 @@ void lj_trace_initstate(global_State *g) J->k32[LJ_K32_M2P64] = 0xdf800000; #endif #endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS32 + J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler; + J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp; +#endif +#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 + J->k64[LJ_K64_VM_EXIT_HANDLER].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_handler, 0); + J->k64[LJ_K64_VM_EXIT_INTERP].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_interp, 0); +#endif } /* Free everything associated with the JIT compiler state. */ @@ -637,10 +645,15 @@ static int trace_abort(jit_State *J) J->cur.traceno = 0; } L->top--; /* Remove error object */ - if (e == LJ_TRERR_DOWNREC) + if (e == LJ_TRERR_DOWNREC) { return trace_downrec(J); - else if (e == LJ_TRERR_MCODEAL) + } else if (e == LJ_TRERR_MCODEAL) { + if (!J->mcarea) { /* Disable JIT compiler if first mcode alloc fails. */ + J->flags &= ~JIT_F_ON; + lj_dispatch_update(J2G(J)); + } lj_trace_flushall(L); + } return 0; } From 578c41ceb73bdf9d97f23c9e0342f8d027c08e77 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 8 Nov 2025 15:41:42 +0100 Subject: [PATCH 29/73] ARM64: Enable unaligned accesses if indicated by the toolchain. If you get a crash in jit_init() then you need to fix your toolchain. --- src/lib_jit.c | 9 +++++++++ src/lj_arch.h | 4 ++++ src/lj_asm_arm64.h | 3 ++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/lib_jit.c b/src/lib_jit.c index 1b74d957b5..0f75c5ac64 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -723,7 +723,16 @@ static void jit_init(lua_State *L) jit_State *J = L2J(L); J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT; memcpy(J->param, jit_param_default, sizeof(J->param)); +#if LJ_TARGET_UNALIGNED + G(L)->tmptv.u64 = U64x(0000504d,4d500000); +#endif lj_dispatch_update(G(L)); +#if LJ_TARGET_UNALIGNED + /* If you get a crash below then your toolchain indicates unaligned + ** accesses are OK, but your kernel disagrees. I.e. fix your toolchain. + */ + if (*(uint32_t *)((char *)&G(L)->tmptv + 2) != 0x504d4d50u) L->top = NULL; +#endif } #endif diff --git a/src/lj_arch.h b/src/lj_arch.h index 799f9c6cc3..5f3880680b 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -304,6 +304,10 @@ #define LJ_PAGESIZE 16384 #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL +#if __ARM_FEATURE_UNALIGNED +#define LJ_TARGET_UNALIGNED 1 +#endif + #define LJ_ARCH_VERSION 80 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 085f935728..fdcff1db24 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1056,7 +1056,8 @@ static void asm_fstore(ASMState *as, IRIns *ir) static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); + lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), + "unaligned XLOAD"); asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); } From fdf2379ccba1eb68ff07f8bc48541568f5bbdfbf Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 8 Nov 2025 16:54:00 +0100 Subject: [PATCH 30/73] macOS: Change Mach-O object file layout required by XCode 15.0. Reported by George Zhao. #1404 --- src/jit/bcsave.lua | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index a30a34b6be..e4ca19779d 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -465,9 +465,11 @@ typedef struct { mach_segment_command_64 seg; mach_section_64 sec; mach_symtab_command sym; +} mach_obj_64; +typedef struct { mach_nlist_64 sym_entry; uint8_t space[4096]; -} mach_obj_64; +} mach_obj_64_tail; ]] local symname = '_'..LJBC_PREFIX..ctx.modname local cputype, cpusubtype = 0x01000007, 3 @@ -479,7 +481,10 @@ typedef struct { -- Create Mach-O object and fill in header. local o = ffi.new("mach_obj_64") - local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, 8) + local t = ffi.new("mach_obj_64_tail") + local ofs_bc = ffi.sizeof(o) + local sz_bc = aligned(#s, 8) + local ofs_sym = ofs_bc + sz_bc -- Fill in sections and symbols. o.hdr.magic = 0xfeedfacf @@ -491,7 +496,7 @@ typedef struct { o.seg.cmd = 0x19 o.seg.cmdsize = ffi.sizeof(o.seg)+ffi.sizeof(o.sec) o.seg.vmsize = #s - o.seg.fileoff = mach_size + o.seg.fileoff = ofs_bc o.seg.filesize = #s o.seg.maxprot = 1 o.seg.initprot = 1 @@ -499,22 +504,23 @@ typedef struct { ffi.copy(o.sec.sectname, "__data") ffi.copy(o.sec.segname, "__DATA") o.sec.size = #s - o.sec.offset = mach_size + o.sec.offset = ofs_bc o.sym.cmd = 2 o.sym.cmdsize = ffi.sizeof(o.sym) - o.sym.symoff = ffi.offsetof(o, "sym_entry") + o.sym.symoff = ofs_sym o.sym.nsyms = 1 - o.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry) + o.sym.stroff = ofs_sym + ffi.offsetof(t, "space") o.sym.strsize = aligned(#symname+2, 8) - o.sym_entry.type = 0xf - o.sym_entry.sect = 1 - o.sym_entry.strx = 1 - ffi.copy(o.space+1, symname) + t.sym_entry.type = 0xf + t.sym_entry.sect = 1 + t.sym_entry.strx = 1 + ffi.copy(t.space+1, symname) -- Write Mach-O object file. local fp = savefile(output, "wb") - fp:write(ffi.string(o, mach_size)) - bcsave_tail(fp, output, s) + fp:write(ffi.string(o, ofs_bc)) + fp:write(s, ("\0"):rep(sz_bc - #s)) + bcsave_tail(fp, output, ffi.string(t, ffi.offsetof(t, "space") + o.sym.strsize)) end local function bcsave_obj(ctx, output, s) From 5c647754a687a910ef40a097fbf8f7415561c8aa Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 10 Nov 2025 18:11:26 +0100 Subject: [PATCH 31/73] Run VM events and finalizers in separate state. Reported by Sergey Kaplun. #1403 --- src/lj_gc.c | 18 +++++----- src/lj_obj.h | 2 ++ src/lj_parse.c | 4 +-- src/lj_state.c | 1 + src/lj_trace.c | 91 +++++++++++++++++++++++------------------------- src/lj_vmevent.c | 5 +++ src/lj_vmevent.h | 22 ++++++------ 7 files changed, 76 insertions(+), 67 deletions(-) diff --git a/src/lj_gc.c b/src/lj_gc.c index d9581d20d3..c779d583e9 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -106,6 +106,7 @@ static void gc_mark_start(global_State *g) setgcrefnull(g->gc.weak); gc_markobj(g, mainthread(g)); gc_markobj(g, tabref(mainthread(g)->env)); + gc_markobj(g, vmthread(g)); gc_marktv(g, &g->registrytv); gc_mark_gcroot(g); g->gc.state = GCSpropagate; @@ -507,24 +508,25 @@ static void gc_call_finalizer(global_State *g, lua_State *L, uint8_t oldh = hook_save(g); GCSize oldt = g->gc.threshold; int errcode; + lua_State *VL = vmthread(g); TValue *top; lj_trace_abort(g); hook_entergc(g); /* Disable hooks and new traces during __gc. */ if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ - top = L->top; - copyTV(L, top++, mo); + top = VL->top; + copyTV(VL, top++, mo); if (LJ_FR2) setnilV(top++); - setgcV(L, top, o, ~o->gch.gct); - L->top = top+1; - errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */ + setgcV(VL, top, o, ~o->gch.gct); + VL->top = top+1; + errcode = lj_vm_pcall(VL, top, 1+0, -1); /* Stack: |mo|o| -> | */ + setgcref(g->cur_L, obj2gco(L)); hook_restore(g, oldh); if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); g->gc.threshold = oldt; /* Restore GC threshold. */ if (errcode) { - ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */ - lj_vmevent_send(L, ERRFIN, - copyTV(L, L->top++, restorestack(L, errobj)); + lj_vmevent_send(g, ERRFIN, + copyTV(V, V->top++, L->top-1); ); L->top--; } diff --git a/src/lj_obj.h b/src/lj_obj.h index 855727bfab..73b186e256 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -647,6 +647,7 @@ typedef struct global_State { TValue tmptv, tmptv2; /* Temporary TValues. */ Node nilnode; /* Fallback 1-element hash part (nil key and value). */ TValue registrytv; /* Anchor for registry. */ + GCRef vmthref; /* Link to VM thread. */ GCupval uvhead; /* Head of double-linked list of all open upvalues. */ int32_t hookcount; /* Instruction hook countdown. */ int32_t hookcstart; /* Start count for instruction hook counter. */ @@ -663,6 +664,7 @@ typedef struct global_State { } global_State; #define mainthread(g) (&gcref(g->mainthref)->th) +#define vmthread(g) (&gcref(g->vmthref)->th) #define niltv(L) \ check_exp(tvisnil(&G(L)->nilnode.val), &G(L)->nilnode.val) #define niltvg(g) \ diff --git a/src/lj_parse.c b/src/lj_parse.c index e326432abb..181ce4d7e2 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -1593,8 +1593,8 @@ static GCproto *fs_finish(LexState *ls, BCLine line) fs_fixup_line(fs, pt, (void *)((char *)pt + ofsli), numline); fs_fixup_var(ls, pt, (uint8_t *)((char *)pt + ofsdbg), ofsvar); - lj_vmevent_send(L, BC, - setprotoV(L, L->top++, pt); + lj_vmevent_send(G(L), BC, + setprotoV(V, V->top++, pt); ); L->top--; /* Pop table of constants. */ diff --git a/src/lj_state.c b/src/lj_state.c index fb6d41a5f9..9d4fdcee3a 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -202,6 +202,7 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) #endif lj_trace_initstate(g); lj_err_verify(); + setgcref(g->vmthref, obj2gco(lj_state_new(L))); return NULL; } diff --git a/src/lj_trace.c b/src/lj_trace.c index 3e2cd0b393..47d7faa5c9 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -296,8 +296,8 @@ int lj_trace_flushall(lua_State *L) /* Free the whole machine code and invalidate all exit stub groups. */ lj_mcode_free(J); memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup)); - lj_vmevent_send(L, TRACE, - setstrV(L, L->top++, lj_str_newlit(L, "flush")); + lj_vmevent_send(J2G(J), TRACE, + setstrV(V, V->top++, lj_str_newlit(V, "flush")); ); return 0; } @@ -416,7 +416,6 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) /* Start tracing. */ static void trace_start(jit_State *J) { - lua_State *L; TraceNo traceno; if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ @@ -466,20 +465,19 @@ static void trace_start(jit_State *J) J->ktrace = 0; setgcref(J->cur.startpt, obj2gco(J->pt)); - L = J->L; - lj_vmevent_send(L, TRACE, - setstrV(L, L->top++, lj_str_newlit(L, "start")); - setintV(L->top++, traceno); - setfuncV(L, L->top++, J->fn); - setintV(L->top++, proto_bcpos(J->pt, J->pc)); + lj_vmevent_send(J2G(J), TRACE, + setstrV(V, V->top++, lj_str_newlit(V, "start")); + setintV(V->top++, traceno); + setfuncV(V, V->top++, J->fn); + setintV(V->top++, proto_bcpos(J->pt, J->pc)); if (J->parent) { - setintV(L->top++, J->parent); - setintV(L->top++, J->exitno); + setintV(V->top++, J->parent); + setintV(V->top++, J->exitno); } else { BCOp op = bc_op(*J->pc); if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) { - setintV(L->top++, J->exitno); /* Parent of stitched trace. */ - setintV(L->top++, -1); + setintV(V->top++, J->exitno); /* Parent of stitched trace. */ + setintV(V->top++, -1); } } ); @@ -494,7 +492,6 @@ static void trace_stop(jit_State *J) GCproto *pt = &gcref(J->cur.startpt)->pt; TraceNo traceno = J->cur.traceno; GCtrace *T = J->curfinal; - lua_State *L; switch (op) { case BC_FORL: @@ -551,11 +548,10 @@ static void trace_stop(jit_State *J) J->postproc = LJ_POST_NONE; trace_save(J, T); - L = J->L; - lj_vmevent_send(L, TRACE, - setstrV(L, L->top++, lj_str_newlit(L, "stop")); - setintV(L->top++, traceno); - setfuncV(L, L->top++, J->fn); + lj_vmevent_send(J2G(J), TRACE, + setstrV(V, V->top++, lj_str_newlit(V, "stop")); + setintV(V->top++, traceno); + setfuncV(V, V->top++, J->fn); ); } @@ -610,18 +606,17 @@ static int trace_abort(jit_State *J) /* Is there anything to abort? */ traceno = J->cur.traceno; if (traceno) { - ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */ J->cur.link = 0; J->cur.linktype = LJ_TRLINK_NONE; - lj_vmevent_send(L, TRACE, + lj_vmevent_send(J2G(J), TRACE, cTValue *bot = tvref(L->stack)+LJ_FR2; cTValue *frame; const BCIns *pc; BCPos pos = 0; - setstrV(L, L->top++, lj_str_newlit(L, "abort")); - setintV(L->top++, traceno); + setstrV(V, V->top++, lj_str_newlit(V, "abort")); + setintV(V->top++, traceno); /* Find original Lua function call to generate a better error message. */ - for (frame = J->L->base-1, pc = J->pc; ; frame = frame_prev(frame)) { + for (frame = L->base-1, pc = J->pc; ; frame = frame_prev(frame)) { if (isluafunc(frame_func(frame))) { pos = proto_bcpos(funcproto(frame_func(frame)), pc); break; @@ -633,10 +628,10 @@ static int trace_abort(jit_State *J) pc = frame_pc(frame) - 1; } } - setfuncV(L, L->top++, frame_func(frame)); - setintV(L->top++, pos); - copyTV(L, L->top++, restorestack(L, errobj)); - copyTV(L, L->top++, &J->errinfo); + setfuncV(V, V->top++, frame_func(frame)); + setintV(V->top++, pos); + copyTV(V, V->top++, L->top-1); + copyTV(V, V->top++, &J->errinfo); ); /* Drop aborted trace after the vmevent (which may still access it). */ setgcrefnull(J->trace[traceno]); @@ -692,16 +687,16 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) case LJ_TRACE_RECORD: trace_pendpatch(J, 0); setvmstate(J2G(J), RECORD); - lj_vmevent_send_(L, RECORD, + lj_vmevent_send_(J2G(J), RECORD, /* Save/restore state for trace recorder. */ TValue savetv = J2G(J)->tmptv; TValue savetv2 = J2G(J)->tmptv2; TraceNo parent = J->parent; ExitNo exitno = J->exitno; - setintV(L->top++, J->cur.traceno); - setfuncV(L, L->top++, J->fn); - setintV(L->top++, J->pt ? (int32_t)proto_bcpos(J->pt, J->pc) : -1); - setintV(L->top++, J->framedepth); + setintV(V->top++, J->cur.traceno); + setfuncV(V, V->top++, J->fn); + setintV(V->top++, J->pt ? (int32_t)proto_bcpos(J->pt, J->pc) : -1); + setintV(V->top++, J->framedepth); , J2G(J)->tmptv = savetv; J2G(J)->tmptv2 = savetv2; @@ -839,23 +834,23 @@ static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud) #ifndef LUAJIT_DISABLE_VMEVENT /* Push all registers from exit state. */ -static void trace_exit_regs(lua_State *L, ExitState *ex) +static void trace_exit_regs(lua_State *V, ExitState *ex) { int32_t i; - setintV(L->top++, RID_NUM_GPR); - setintV(L->top++, RID_NUM_FPR); + setintV(V->top++, RID_NUM_GPR); + setintV(V->top++, RID_NUM_FPR); for (i = 0; i < RID_NUM_GPR; i++) { if (sizeof(ex->gpr[i]) == sizeof(int32_t)) - setintV(L->top++, (int32_t)ex->gpr[i]); + setintV(V->top++, (int32_t)ex->gpr[i]); else - setnumV(L->top++, (lua_Number)ex->gpr[i]); + setnumV(V->top++, (lua_Number)ex->gpr[i]); } #if !LJ_SOFTFP for (i = 0; i < RID_NUM_FPR; i++) { - setnumV(L->top, ex->fpr[i]); - if (LJ_UNLIKELY(tvisnan(L->top))) - setnanV(L->top); - L->top++; + setnumV(V->top, ex->fpr[i]); + if (LJ_UNLIKELY(tvisnan(V->top))) + setnanV(V->top); + V->top++; } #endif } @@ -897,6 +892,8 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) #ifdef EXITSTATE_PCREG J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]); +#else + UNUSED(ex); #endif T = traceref(J, J->parent); UNUSED(T); #ifdef EXITSTATE_CHECKEXIT @@ -917,11 +914,11 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) if (exitcode) copyTV(L, L->top++, &exiterr); /* Anchor the error object. */ if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE))) - lj_vmevent_send(L, TEXIT, - lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); - setintV(L->top++, J->parent); - setintV(L->top++, J->exitno); - trace_exit_regs(L, ex); + lj_vmevent_send(G(L), TEXIT, + lj_state_checkstack(V, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); + setintV(V->top++, J->parent); + setintV(V->top++, J->exitno); + trace_exit_regs(V, ex); ); pc = exd.pc; diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c index 070c6144aa..8913ead946 100644 --- a/src/lj_vmevent.c +++ b/src/lj_vmevent.c @@ -38,6 +38,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev) void lj_vmevent_call(lua_State *L, ptrdiff_t argbase) { global_State *g = G(L); + lua_State *oldL = gco2th(gcref(g->cur_L)); uint8_t oldmask = g->vmevmask; uint8_t oldh = hook_save(g); int status; @@ -51,6 +52,10 @@ void lj_vmevent_call(lua_State *L, ptrdiff_t argbase) fputs(tvisstr(L->top) ? strVdata(L->top) : "?", stderr); fputc('\n', stderr); } + setgcref(g->cur_L, obj2gco(oldL)); +#if LJ_HASJIT + G2J(g)->L = oldL; +#endif hook_restore(g, oldh); if (g->vmevmask != VMEVENT_NOCACHE) g->vmevmask = oldmask; /* Restore event mask, but not if not modified. */ diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h index 8a99536068..cdd4f75825 100644 --- a/src/lj_vmevent.h +++ b/src/lj_vmevent.h @@ -32,23 +32,25 @@ typedef enum { } VMEvent; #ifdef LUAJIT_DISABLE_VMEVENT -#define lj_vmevent_send(L, ev, args) UNUSED(L) -#define lj_vmevent_send_(L, ev, args, post) UNUSED(L) +#define lj_vmevent_send(g, ev, args) UNUSED(g) +#define lj_vmevent_send_(g, ev, args, post) UNUSED(g) #else -#define lj_vmevent_send(L, ev, args) \ - if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ - ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \ +#define lj_vmevent_send(g, ev, args) \ + if ((g)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ + lua_State *V = vmthread(g); \ + ptrdiff_t argbase = lj_vmevent_prepare(V, LJ_VMEVENT_##ev); \ if (argbase) { \ args \ - lj_vmevent_call(L, argbase); \ + lj_vmevent_call(V, argbase); \ } \ } -#define lj_vmevent_send_(L, ev, args, post) \ - if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ - ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \ +#define lj_vmevent_send_(g, ev, args, post) \ + if ((g)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ + lua_State *V = vmthread(g); \ + ptrdiff_t argbase = lj_vmevent_prepare(V, LJ_VMEVENT_##ev); \ if (argbase) { \ args \ - lj_vmevent_call(L, argbase); \ + lj_vmevent_call(V, argbase); \ post \ } \ } From c94312d348e3530b369b4e517fce4c65df6cd270 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 10 Nov 2025 18:15:11 +0100 Subject: [PATCH 32/73] FFI: Avoid dangling cts->L. Reported by ZumiKua. #1405 --- src/lj_state.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lj_state.c b/src/lj_state.c index ecf1519865..18a55e7227 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -304,6 +304,10 @@ lua_State *lj_state_new(lua_State *L) void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) { lua_assert(L != mainthread(g)); +#if LJ_HASFFI + if (ctype_ctsG(g) && ctype_ctsG(g)->L == L) /* Avoid dangling cts->L. */ + ctype_ctsG(g)->L = mainthread(g); +#endif lj_func_closeuv(L, tvref(L->stack)); lua_assert(gcref(L->openupval) == NULL); lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); From af9763a50da87ff8ba16e828cbd5664135e05a88 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 14 Nov 2025 20:15:44 +0100 Subject: [PATCH 33/73] Fix edge cases when generating IR for string.byte/sub/find. Contributed by XmiliaH. #1407 --- src/lj_ffrecord.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index e22d8da687..7d83396b82 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -697,7 +697,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) emitir(IRTGI(IR_EQ), trstart, tr0); trstart = tr0; } else { - trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1)); + trstart = emitir(IRTGI(IR_ADDOV), trstart, lj_ir_kint(J, -1)); emitir(IRTGI(IR_GE), trstart, tr0); start--; } @@ -705,7 +705,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) if (rd->data) { /* Return string.sub result. */ if (end - start >= 0) { /* Also handle empty range here, to avoid extra traces. */ - TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); + TRef trptr, trslen = emitir(IRTGI(IR_SUBOV), trend, trstart); emitir(IRTGI(IR_GE), trslen, tr0); trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); @@ -716,7 +716,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) } else { /* Return string.byte result(s). */ ptrdiff_t i, len = end - start; if (len > 0) { - TRef trslen = emitir(IRTI(IR_SUB), trend, trstart); + TRef trslen = emitir(IRTGI(IR_SUBOV), trend, trstart); emitir(IRTGI(IR_EQ), trslen, lj_ir_kint(J, (int32_t)len)); if (J->baseslot + len > LJ_MAX_JSLOTS) lj_trace_err_info(J, LJ_TRERR_STACKOV); From 34b59f8aa93ac55d3d3492e51d455e8a0ba0308f Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 16 Nov 2025 19:34:11 +0100 Subject: [PATCH 34/73] x86/x64: Backport fix for math.min()/math.max() argument check. Reported by puffy. --- src/vm_x86.dasc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 85378a3b90..abc8ab2193 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -2232,7 +2232,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |.macro math_minmax, name, cmovop, fcmovop, sseop - | .ffunc name + | .ffunc_1 name | mov RA, 2 | cmp dword [BASE+4], LJ_TISNUM |.if DUALNUM From e17ee83326f73d2bbfce5750ae8dc592a3b63c27 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 16 Nov 2025 19:41:51 +0100 Subject: [PATCH 35/73] ELF/Mach-O: Force default visibility for public API functions. Thanks to Dymphna. #1409 --- src/jit/bcsave.lua | 2 ++ src/luaconf.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index e4ca19779d..5a55789a9a 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -165,6 +165,8 @@ extern "C" #endif #ifdef _WIN32 __declspec(dllexport) +#elif (defined(__ELF__) || defined(__MACH__) || defined(__psp2__)) && !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__)) +__attribute__((visibility("default"))) #endif const unsigned char %s%s[] = { ]], LJBC_PREFIX, ctx.modname)) diff --git a/src/luaconf.h b/src/luaconf.h index 7cd2edb5a4..6959313d3d 100644 --- a/src/luaconf.h +++ b/src/luaconf.h @@ -132,6 +132,8 @@ #else #define LUA_API __declspec(dllimport) #endif +#elif (defined(__ELF__) || defined(__MACH__) || defined(__psp2__)) && !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__)) +#define LUA_API extern __attribute__((visibility("default"))) #else #define LUA_API extern #endif From 3215838aa744d148e79a8ea0bd7c014e984302cb Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 27 Nov 2025 16:39:15 +0100 Subject: [PATCH 36/73] ARM64: Fix disassembly of certain sub-word-size loads/stores. --- src/jit/dis_arm64.lua | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua index facc6e4a1e..ea2db17c70 100644 --- a/src/jit/dis_arm64.lua +++ b/src/jit/dis_arm64.lua @@ -485,15 +485,15 @@ local map_ls = { -- Loads and stores. shift = 30, mask = 3, [0] = { shift = 22, mask = 3, - [0] = "strbDwzU", "ldrbDwzU" + [0] = "strbDwzU", "ldrbDwzU", "ldrsbDwzU", "ldrsbDxzU" }, { shift = 22, mask = 3, - [0] = "strhDwzU", "ldrhDwzU" + [0] = "strhDwzU", "ldrhDwzU", "ldrshDwzU", "ldrshDxzU" }, { shift = 22, mask = 3, - [0] = "strDwzU", "ldrDwzU" + [0] = "strDwzU", "ldrDwzU", "ldrswDxzU" }, { shift = 22, mask = 3, From f80b349d5490aa289b2925d297f3f3c618977570 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 27 Nov 2025 17:45:17 +0100 Subject: [PATCH 37/73] Unify Lua number to FFI integer conversions. Phew. #1411 --- doc/ext_ffi_semantics.html | 56 +++++++++----- src/lib_io.c | 7 +- src/lib_os.c | 8 +- src/lj_api.c | 24 +----- src/lj_asm.c | 21 ++++-- src/lj_asm_arm.h | 5 +- src/lj_asm_arm64.h | 20 +++-- src/lj_asm_mips.h | 79 +++++++------------- src/lj_asm_ppc.h | 27 +------ src/lj_asm_x86.h | 45 ++++++------ src/lj_bcwrite.c | 11 ++- src/lj_cconv.c | 12 ++- src/lj_cdata.c | 7 +- src/lj_crecord.c | 36 ++++++--- src/lj_def.h | 2 + src/lj_ffrecord.c | 4 +- src/lj_ir.c | 23 ++---- src/lj_ircall.h | 32 +------- src/lj_jit.h | 27 +++---- src/lj_lib.c | 2 +- src/lj_meta.c | 3 +- src/lj_obj.h | 91 ++++++++++++++--------- src/lj_opt_fold.c | 64 ++++++++-------- src/lj_opt_narrow.c | 32 +++----- src/lj_opt_split.c | 15 ++-- src/lj_parse.c | 23 +++--- src/lj_record.c | 21 ++++-- src/lj_strfmt.c | 9 +-- src/lj_strscan.c | 8 +- src/lj_tab.c | 24 +++--- src/lj_target_x86.h | 1 + src/lj_trace.c | 30 ++++---- src/lj_vm.h | 34 +++++---- src/lj_vmmath.c | 2 +- src/vm_arm.dasc | 115 ++++++++++++++++++++++++++++- src/vm_arm64.dasc | 36 +++++++++ src/vm_mips.dasc | 119 +++++++++++++++++++++++++++++- src/vm_mips64.dasc | 142 +++++++++++++++++++++++++++++++++--- src/vm_ppc.dasc | 146 +++++++++++++++++++++++++++++++++++++ src/vm_x64.dasc | 43 +++++++++++ src/vm_x86.dasc | 92 +++++++++++++++++++++++ 41 files changed, 1067 insertions(+), 431 deletions(-) diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html index cd533e8c88..cd72da21d0 100644 --- a/doc/ext_ffi_semantics.html +++ b/doc/ext_ffi_semantics.html @@ -338,42 +338,44 @@

Conversions between C types

Integer→rounddouble, float -double, floattrunc int32_tnarrow(u)int8_t, (u)int16_t +double, floattrunc int64_tnarrow *(u)int8_t, (u)int16_t, (u)int32_t -double, floattrunc(u)int32_t, (u)int64_t +double, floattruncint64_t +double, floattrunc uint64_t ∪ int64_t →reinterpret *uint64_t + double, floatroundfloat, double - + Numbern == 0 → 0, otherwise 1bool - + boolfalse → 0, true → 1Number - + Complex numberconvert real partNumber - -Numberconvert real part, imag = 0Complex number +Numberconvert real part, imag = 0Complex number + Complex numberconvert real and imag partComplex number - + Numberconvert scalar and replicateVector - + Vectorcopy (same size)Vector - + struct/uniontake base address (compat)Pointer - -Arraytake base address (compat)Pointer +Arraytake base address (compat)Pointer + Functiontake function addressFunction pointer - + Numberconvert via uintptr_t (cast)Pointer - -Pointerconvert address (compat/cast)Pointer -Pointerconvert address (cast)Integer +Pointerconvert address (compat/cast)Pointer +Pointerconvert address (cast)Integer + Arrayconvert base address (cast)Integer - + Arraycopy (compat)Array - + struct/unioncopy (identical type)struct/union

@@ -384,6 +386,24 @@

Conversions between C types

Conversions not listed above will raise an error. E.g. it's not possible to convert a pointer to a complex number or vice versa.

+

+* Some conversions from double have a larger defined range to +allow for mixed-signedness conversions, which are common in C code. +E.g. initializing an int32_t field with 0xffffffff +or initializing an uint32_t or uint64_t field with +-1. Under strict conversion rules, these assignments would +give undefined results, since Lua numbers are doubles. The extended +ranges make these conversions defined. Lua numbers that are even +outside that range give an architecture-specific result. +

+

+Please note that doubles do not have the precision to represent the +whole signed or unsigned 64 bit integer range. Beware of large hex +constants in particular: e.g. 0xffffffffffffffff is a double +rounded up to 0x1p64 during parsing. This will not +convert to a defined 64 bit integer value. Use the 64 bit literal +syntax instead, i.e. 0xffffffffffffffffULL. +

Conversions for vararg C function arguments

diff --git a/src/lib_io.c b/src/lib_io.c index 5659ff518b..ec7d254596 100644 --- a/src/lib_io.c +++ b/src/lib_io.c @@ -127,8 +127,9 @@ static int io_file_readnum(lua_State *L, FILE *fp) lua_Number d; if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { if (LJ_DUALNUM) { - int32_t i = lj_num2int(d); - if (d == (lua_Number)i && !tvismzero((cTValue *)&d)) { + int64_t i64; + int32_t i; + if (lj_num2int_check(d, i64, i) && !tvismzero((cTValue *)&d)) { setintV(L->top++, i); return 1; } @@ -335,7 +336,7 @@ LJLIB_CF(io_method_seek) if (tvisint(o)) ofs = (int64_t)intV(o); else if (tvisnum(o)) - ofs = (int64_t)numV(o); + ofs = lj_num2i64(numV(o)); else if (!tvisnil(o)) lj_err_argt(L, 3, LUA_TNUMBER); } diff --git a/src/lib_os.c b/src/lib_os.c index ae3fc85783..fffc923e23 100644 --- a/src/lib_os.c +++ b/src/lib_os.c @@ -171,7 +171,8 @@ static int getfield(lua_State *L, const char *key, int d) LJLIB_CF(os_date) { const char *s = luaL_optstring(L, 1, "%c"); - time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL)); + time_t t = lua_isnoneornil(L, 2) ? time(NULL) : + lj_num2int_type(luaL_checknumber(L, 2), time_t); struct tm *stm; #if LJ_TARGET_POSIX struct tm rtm; @@ -253,8 +254,9 @@ LJLIB_CF(os_time) LJLIB_CF(os_difftime) { - lua_pushnumber(L, difftime((time_t)(luaL_checknumber(L, 1)), - (time_t)(luaL_optnumber(L, 2, (lua_Number)0)))); + lua_pushnumber(L, + difftime(lj_num2int_type(luaL_checknumber(L, 1), time_t), + lj_num2int_type(luaL_optnumber(L, 2, (lua_Number)0), time_t))); return 1; } diff --git a/src/lj_api.c b/src/lj_api.c index e9fc25b438..94d8bc7e80 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -416,11 +416,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) return intV(&tmp); n = numV(&tmp); } -#if LJ_64 - return (lua_Integer)n; -#else - return lj_num2int(n); -#endif + return lj_num2int_type(n, lua_Integer); } LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) @@ -445,11 +441,7 @@ LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) n = numV(&tmp); } if (ok) *ok = 1; -#if LJ_64 - return (lua_Integer)n; -#else - return lj_num2int(n); -#endif + return lj_num2int_type(n, lua_Integer); } LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) @@ -468,11 +460,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) return (lua_Integer)intV(&tmp); n = numV(&tmp); } -#if LJ_64 - return (lua_Integer)n; -#else - return lj_num2int(n); -#endif + return lj_num2int_type(n, lua_Integer); } LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) @@ -493,11 +481,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) return (lua_Integer)intV(&tmp); n = numV(&tmp); } -#if LJ_64 - return (lua_Integer)n; -#else - return lj_num2int(n); -#endif + return lj_num2int_type(n, lua_Integer); } LUA_API int lua_toboolean(lua_State *L, int idx) diff --git a/src/lj_asm.c b/src/lj_asm.c index 0e888c294a..8f7ae9a3b6 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1329,27 +1329,32 @@ static void asm_conv64(ASMState *as, IRIns *ir) IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); IRCallID id; + const CCallInfo *ci; +#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP + CCallInfo cim; +#endif IRRef args[2]; lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP, "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS); args[LJ_BE] = (ir-1)->op1; args[LJ_LE] = ir->op1; - if (st == IRT_NUM || st == IRT_FLOAT) { - id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); + lj_assertA(st != IRT_FLOAT, "bad CONV *64.float emitted"); + if (st == IRT_NUM) { + id = IRCALL_lj_vm_num2u64; ir--; + ci = &lj_ir_callinfo[id]; } else { id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); - } - { #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP - CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; + cim = lj_ir_callinfo[id]; cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ + ci = &cim; #else - const CCallInfo *ci = &lj_ir_callinfo[id]; + ci = &lj_ir_callinfo[id]; #endif - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); } + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); } #endif diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 406360d26a..1ddd2b3e07 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -624,10 +624,9 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); Reg dest = ra_dest(as, ir, RSET_GPR); ARMIns ai; + lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - ai = irt_isint(ir->t) ? - (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) : - (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32); + ai = st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32; emit_dm(as, ai, (tmp & 15), (left & 15)); } } else diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index fdcff1db24..507fc08453 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -648,14 +648,18 @@ static void asm_conv(ASMState *as, IRIns *ir) } else { Reg left = ra_alloc1(as, lref, RSET_FPR); Reg dest = ra_dest(as, ir, RSET_GPR); - A64Ins ai = irt_is64(ir->t) ? - (st == IRT_NUM ? - (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : - (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : - (st == IRT_NUM ? - (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : - (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); - emit_dn(as, ai, dest, (left & 31)); + lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); + if (irt_isu64(ir->t)) { + emit_dnm(as, A64I_CSELx | A64F_CC(CC_VC), dest, dest, RID_TMP); + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), dest); + emit_dn(as, st == IRT_NUM ? A64I_FCVT_U64_F64 : A64I_FCVT_U64_F32, RID_TMP, (left & 31)); + emit_dn(as, st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32, dest, (left & 31)); + } else { + A64Ins ai = irt_is64(ir->t) ? + (st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32) : + (st == IRT_NUM ? A64I_FCVT_S32_F64 : A64I_FCVT_S32_F32); + emit_dn(as, ai, dest, (left & 31)); + } } } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg dest = ra_dest(as, ir, RSET_GPR); diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 8dadabe4a0..36ed5de4fb 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -635,64 +635,38 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, lref, RSET_FPR); Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ - /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ - emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); - emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D, - tmp, tmp); - emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D, - tmp, left, tmp); - if (st == IRT_FLOAT) - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); - else - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); + lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); #if LJ_64 - } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ - MCLabel l_end; + if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ + MCLabel l_end = emit_label(as); emit_tg(as, MIPSI_DMFC1, dest, tmp); - l_end = emit_label(as); - /* For inputs >= 2^63 add -2^64 and convert again. */ + /* For result == INT64_MAX add -2^64 and convert again. */ if (st == IRT_NUM) { emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_M2P64], - rset_exclude(RSET_GPR, dest)); - emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ -#if !LJ_TARGET_MIPSR6 - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); -#else - emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end); - emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp); -#endif - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P63], - rset_exclude(RSET_GPR, dest)); + rset_exclude(RSET_GPR, dest)); /* Delay slot. */ + emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end); /* != INT64_MAX? */ + emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1); + emit_ti(as, MIPSI_LI, RID_TMP, -1); + emit_tg(as, MIPSI_DMFC1, dest, tmp); + emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); } else { emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_M2P64], - rset_exclude(RSET_GPR, dest)); - emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ -#if !LJ_TARGET_MIPSR6 - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); -#else - emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end); - emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp); -#endif - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P63], - rset_exclude(RSET_GPR, dest)); + rset_exclude(RSET_GPR, dest)); /* Delay slot. */ + emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end); /* != INT64_MAX? */ + emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1); + emit_ti(as, MIPSI_LI, RID_TMP, -1); + emit_tg(as, MIPSI_DMFC1, dest, tmp); + emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); } + } else #endif - } else { + { #if LJ_32 emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, @@ -733,13 +707,11 @@ static void asm_conv(ASMState *as, IRIns *ir) "bad type for checked CONV"); asm_tointg(as, ir, RID_NONE); } else { - IRCallID cid = irt_is64(ir->t) ? - ((st == IRT_NUM) ? - (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : - (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : - ((st == IRT_NUM) ? - (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : - (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)); + IRCallID cid; + lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); + lj_assertA(!(irt_is64(ir->t) && st != IRT_NUM), "bad CONV *64.float emitted"); + cid = irt_is64(ir->t) ? IRCALL_lj_vm_num2u64 : + (st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i); asm_callid(as, ir, cid); } } else @@ -780,7 +752,10 @@ static void asm_conv(ASMState *as, IRIns *ir) } } } else { - if (st64 && !(ir->op2 & IRCONV_NONE)) { + if (!irt_isu32(ir->t)) { /* Implicit sign extension. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_dta(as, MIPSI_SLL, dest, left, 0); + } else if (st64 && !(ir->op2 & IRCONV_NONE)) { /* This is either a 32 bit reg/reg mov which zeroes the hiword ** or a load of the loword from a 64 bit address. */ diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index d77c45ce9b..9e2af4144a 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -512,29 +512,10 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, lref, RSET_FPR); Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - if (irt_isu32(ir->t)) { - /* Convert both x and x-2^31 to int and merge results. */ - Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest)); - emit_asb(as, PPCI_OR, dest, dest, tmpi); /* Select with mask idiom. */ - emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP); - emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP); - emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO); /* tmp = (int)(x) */ - emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000); /* dest += 2^31 */ - emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31); /* mask = -(dest < 0) */ - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_tai(as, PPCI_LWZ, dest, - RID_SP, SPOFS_TMPLO); /* dest = (int)(x-2^31) */ - emit_fb(as, PPCI_FCTIWZ, tmp, left); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, tmp, tmp); - emit_fab(as, PPCI_FSUB, tmp, left, tmp); - emit_lsptr(as, PPCI_LFS, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); - } else { - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, tmp, left); - } + lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); + emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); + emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); + emit_fb(as, PPCI_FCTIWZ, tmp, left); } } else #endif diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index f3c2238a2f..bdbce1163c 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -905,29 +905,28 @@ static void asm_conv(ASMState *as, IRIns *ir) } else { Reg dest = ra_dest(as, ir, RSET_GPR); x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; - if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { - /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ - /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ + lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); +#if LJ_64 + if (irt_isu64(ir->t)) { + /* For the indefinite result -2^63, add -2^64 and convert again. */ Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : ra_scratch(as, RSET_FPR); MCLabel l_end = emit_label(as); - if (LJ_32) - emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); emit_rr(as, op, dest|REX_64, tmp); if (st == IRT_NUM) - emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]); + emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64]); else - emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]); - emit_sjcc(as, CC_NS, l_end); - emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ + emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64]); + emit_sjcc(as, CC_NO, l_end); + emit_gmrmi(as, XG_ARITHi(XOg_CMP), dest|REX_64, 1); emit_rr(as, op, dest|REX_64, tmp); ra_left(as, tmp, lref); - } else { - if (LJ_64 && irt_isu32(ir->t)) - emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ + + } else +#endif + { emit_mrm(as, op, - dest|((LJ_64 && - (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), + dest|((LJ_64 && irt_is64(ir->t)) ? REX_64 : 0), asm_fuseload(as, lref, RSET_FPR)); } } @@ -1020,6 +1019,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); Reg lo, hi; + int usehi = ra_used(ir); lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV"); lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV"); hi = ra_dest(as, ir, RSET_GPR); @@ -1032,21 +1032,24 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff); } if (dt == IRT_U64) { - /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ + /* For the indefinite result -2^63, add -2^64 and convert again. */ MCLabel l_pop, l_end = emit_label(as); emit_x87op(as, XI_FPOP); l_pop = emit_label(as); emit_sjmp(as, l_end); - emit_rmro(as, XO_MOV, hi, RID_ESP, 4); + if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4); if ((as->flags & JIT_F_SSE3)) emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); else emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); - emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); - emit_sjcc(as, CC_NS, l_pop); - emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ - } - emit_rmro(as, XO_MOV, hi, RID_ESP, 4); + emit_rma(as, XO_FADDd, XOg_FADDd, &as->J->k32[LJ_K32_M2P64]); + emit_sjcc(as, CC_NE, l_pop); + emit_gmroi(as, XG_ARITHi(XOg_CMP), RID_ESP, 0, 0); + emit_sjcc(as, CC_NO, l_pop); + emit_gmrmi(as, XG_ARITHi(XOg_CMP), hi, 1); + usehi = 1; + } + if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4); if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */ emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); } else { /* Otherwise set FPU rounding mode to truncate before the store. */ diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index ec6f13c8d5..cd7ae942a7 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -59,9 +59,9 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) p = lj_strfmt_wuleb128(p, intV(o)); } else if (tvisnum(o)) { if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ - lua_Number num = numV(o); - int32_t k = lj_num2int(num); - if (num == (lua_Number)k) { /* -0 is never a constant. */ + int64_t i64; + int32_t k; + if (lj_num2int_check(numV(o), i64, k)) { /* -0 is never a constant. */ *p++ = BCDUMP_KTAB_INT; p = lj_strfmt_wuleb128(p, k); ctx->sb.w = p; @@ -270,9 +270,8 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */ if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) { /* Narrow number constants to integers. */ - lua_Number num = numV(o); - k = lj_num2int(num); - if (num == (lua_Number)k) { /* -0 is never a constant. */ + int64_t i64; + if (lj_num2int_check(numV(o), i64, k)) { /* -0 is never a constant. */ save_int: p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u)); if (k < 0) diff --git a/src/lj_cconv.c b/src/lj_cconv.c index 854b51db74..2b9349cdcc 100644 --- a/src/lj_cconv.c +++ b/src/lj_cconv.c @@ -197,18 +197,16 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, else goto err_conv; /* NYI: long double. */ /* Then convert double to integer. */ /* The conversion must exactly match the semantics of JIT-compiled code! */ - if (dsize < 4 || (dsize == 4 && !(dinfo & CTF_UNSIGNED))) { - int32_t i = (int32_t)n; + if (dsize < 8) { + int64_t i = lj_num2i64(n); /* Always convert via int64_t. */ if (dsize == 4) *(int32_t *)dp = i; else if (dsize == 2) *(int16_t *)dp = (int16_t)i; else *(int8_t *)dp = (int8_t)i; - } else if (dsize == 4) { - *(uint32_t *)dp = (uint32_t)n; } else if (dsize == 8) { - if (!(dinfo & CTF_UNSIGNED)) - *(int64_t *)dp = (int64_t)n; - else + if ((dinfo & CTF_UNSIGNED)) *(uint64_t *)dp = lj_num2u64(n); + else + *(int64_t *)dp = lj_num2i64(n); } else { goto err_conv; /* NYI: conversion to >64 bit integers. */ } diff --git a/src/lj_cdata.c b/src/lj_cdata.c index 3b48f76c1e..2dc56a802c 100644 --- a/src/lj_cdata.c +++ b/src/lj_cdata.c @@ -133,12 +133,7 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp, idx = (ptrdiff_t)intV(key); goto integer_key; } else if (tvisnum(key)) { /* Numeric key. */ -#ifdef _MSC_VER - /* Workaround for MSVC bug. */ - volatile -#endif - lua_Number n = numV(key); - idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n); + idx = lj_num2int_type(numV(key), ptrdiff_t); integer_key: if (ctype_ispointer(ct->info)) { CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 27f2c1dd99..45c559cf63 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -445,7 +445,20 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, /* fallthrough */ case CCX(I, F): if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; - sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY); + conv_I_F: +#if LJ_SOFTFP || LJ_32 + if (st == IRT_FLOAT) { /* Uncommon. Simplify split backends. */ + sp = emitconv(sp, IRT_NUM, IRT_FLOAT, 0); + st = IRT_NUM; + } +#endif + if (dsize < 8) { + lj_needsplit(J); + sp = emitconv(sp, IRT_I64, st, IRCONV_ANY); + sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, IRT_I64, 0); + } else { + sp = emitconv(sp, dt, st, IRCONV_ANY); + } goto xstore; case CCX(I, P): case CCX(I, A): @@ -523,10 +536,9 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, goto xstore; case CCX(P, F): if (st == IRT_CDATA) goto err_nyi; - /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ - sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, - st, IRCONV_ANY); - goto xstore; + /* The signed 64 bit conversion is cheaper. */ + dt = (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32; + goto conv_I_F; /* Destination is an array. */ case CCX(A, A): @@ -1878,7 +1890,7 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd) if (J->base[0] && tref_iscdata(J->base[1])) { tsh = crec_bit64_arg(J, ctype_get(cts, CTID_INT64), J->base[1], &rd->argv[1]); - if (!tref_isinteger(tsh)) + if (LJ_32 && !tref_isinteger(tsh)) tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0); J->base[1] = tsh; } @@ -1886,15 +1898,17 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd) if (id) { TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]); uint32_t op = rd->data; + IRType t; if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]); + t = tref_isinteger(tsh) ? IRT_INT : tref_type(tsh); if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && !tref_isk(tsh)) - tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63)); + tsh = emitir(IRT(IR_BAND, t), tsh, lj_ir_kint(J, 63)); #ifdef LJ_TARGET_UNIFYROT - if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { - op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; - tsh = emitir(IRTI(IR_NEG), tsh, tsh); - } + if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { + op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; + tsh = emitir(IRT(IR_NEG, t), tsh, tsh); + } #endif tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh); J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); diff --git a/src/lj_def.h b/src/lj_def.h index a9e2372988..f34b1a3996 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -127,6 +127,7 @@ typedef uintptr_t BloomFilter; #define LJ_INLINE inline #define LJ_AINLINE inline __attribute__((always_inline)) #define LJ_NOINLINE __attribute__((noinline)) +#define LJ_CONSTF __attribute__((nothrow,const)) #if defined(__ELF__) || defined(__MACH__) || defined(__psp2__) #if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__)) @@ -245,6 +246,7 @@ static LJ_AINLINE uint32_t lj_getu32(const void *p) #define LJ_INLINE __inline #define LJ_AINLINE __forceinline #define LJ_NOINLINE __declspec(noinline) +#define LJ_CONSTF __declspec(nothrow noalias) #if defined(_M_IX86) #define LJ_FASTCALL __fastcall #endif diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 527b6c0681..290986f6bd 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -70,7 +70,7 @@ static int32_t argv2int(jit_State *J, TValue *o) { if (!lj_strscan_numberobj(o)) lj_trace_err(J, LJ_TRERR_BADTYPE); - return tvisint(o) ? intV(o) : lj_num2int(numV(o)); + return numberVint(o); } /* Get runtime value of string argument. */ @@ -586,7 +586,7 @@ static void LJ_FASTCALL recff_math_round(jit_State *J, RecordFFData *rd) /* Result is integral (or NaN/Inf), but may not fit an int32_t. */ if (LJ_DUALNUM) { /* Try to narrow using a guarded conversion to int. */ lua_Number n = lj_vm_foldfpm(numberVnum(&rd->argv[0]), rd->data); - if (n == (lua_Number)lj_num2int(n)) + if (lj_num2int_ok(n)) tr = emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_CHECK); } J->base[0] = tr; diff --git a/src/lj_ir.c b/src/lj_ir.c index e7a5e8bc09..e24fead4d6 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -248,28 +248,15 @@ TRef lj_ir_kint64(jit_State *J, uint64_t u64) return lj_ir_k64(J, IR_KINT64, u64); } -/* Check whether a number is int and return it. -0 is NOT considered an int. */ -static int numistrueint(lua_Number n, int32_t *kp) -{ - int32_t k = lj_num2int(n); - if (n == (lua_Number)k) { - if (kp) *kp = k; - if (k == 0) { /* Special check for -0. */ - TValue tv; - setnumV(&tv, n); - if (tv.u32.hi != 0) - return 0; - } - return 1; - } - return 0; -} - /* Intern number as int32_t constant if possible, otherwise as FP constant. */ TRef lj_ir_knumint(jit_State *J, lua_Number n) { + int64_t i64; int32_t k; - if (numistrueint(n, &k)) + TValue tv; + setnumV(&tv, n); + /* -0 is NOT considered an int. */ + if (lj_num2int_check(n, i64, k) && !tvismzero(&tv)) return lj_ir_kint(J, k); else return lj_ir_knum(J, n); diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 5196144e48..60b196c699 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -233,20 +233,15 @@ typedef struct CCallInfo { _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ - _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \ _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ - _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \ _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \ _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \ _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \ - _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \ - _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \ - _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ - _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ + _(FP64_FFI, lj_vm_num2u64, 1, N, U64, XA_FP) \ _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ @@ -291,27 +286,14 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; #define softfp_d2i __aeabi_d2iz #define softfp_ui2d __aeabi_ui2d #define softfp_f2d __aeabi_f2d -#define softfp_d2ui __aeabi_d2uiz #define softfp_d2f __aeabi_d2f #define softfp_i2f __aeabi_i2f #define softfp_ui2f __aeabi_ui2f #define softfp_f2i __aeabi_f2iz -#define softfp_f2ui __aeabi_f2uiz #define fp64_l2d __aeabi_l2d #define fp64_ul2d __aeabi_ul2d #define fp64_l2f __aeabi_l2f #define fp64_ul2f __aeabi_ul2f -#if LJ_TARGET_IOS -#define fp64_d2l __fixdfdi -#define fp64_d2ul __fixunsdfdi -#define fp64_f2l __fixsfdi -#define fp64_f2ul __fixunssfdi -#else -#define fp64_d2l __aeabi_d2lz -#define fp64_d2ul __aeabi_d2ulz -#define fp64_f2l __aeabi_f2lz -#define fp64_f2ul __aeabi_f2ulz -#endif #elif LJ_TARGET_MIPS || LJ_TARGET_PPC #define softfp_add __adddf3 #define softfp_sub __subdf3 @@ -322,12 +304,10 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; #define softfp_d2i __fixdfsi #define softfp_ui2d __floatunsidf #define softfp_f2d __extendsfdf2 -#define softfp_d2ui __fixunsdfsi #define softfp_d2f __truncdfsf2 #define softfp_i2f __floatsisf #define softfp_ui2f __floatunsisf #define softfp_f2i __fixsfsi -#define softfp_f2ui __fixunssfsi #else #error "Missing soft-float definitions for target architecture" #endif @@ -341,12 +321,10 @@ extern int32_t softfp_d2i(double a); #if LJ_HASFFI extern double softfp_ui2d(uint32_t a); extern double softfp_f2d(float a); -extern uint32_t softfp_d2ui(double a); extern float softfp_d2f(double a); extern float softfp_i2f(int32_t a); extern float softfp_ui2f(uint32_t a); extern int32_t softfp_f2i(float a); -extern uint32_t softfp_f2ui(float a); #endif #if LJ_TARGET_MIPS extern double lj_vm_sfmin(double a, double b); @@ -360,10 +338,6 @@ extern double lj_vm_sfmax(double a, double b); #define fp64_ul2d __floatundidf #define fp64_l2f __floatdisf #define fp64_ul2f __floatundisf -#define fp64_d2l __fixdfdi -#define fp64_d2ul __fixunsdfdi -#define fp64_f2l __fixsfdi -#define fp64_f2ul __fixunssfdi #else #error "Missing fp64 helper definitions for this compiler" #endif @@ -374,10 +348,6 @@ extern double fp64_l2d(int64_t a); extern double fp64_ul2d(uint64_t a); extern float fp64_l2f(int64_t a); extern float fp64_ul2f(uint64_t a); -extern int64_t fp64_d2l(double a); -extern uint64_t fp64_d2ul(double a); -extern int64_t fp64_f2l(float a); -extern uint64_t fp64_f2ul(float a); #endif #endif diff --git a/src/lj_jit.h b/src/lj_jit.h index 05a8e9bbe9..c0523457ae 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -350,22 +350,18 @@ enum { }; enum { +#if LJ_TARGET_X64 || LJ_TARGET_MIPS64 + LJ_K64_M2P64, /* -2^64 */ +#endif #if LJ_TARGET_X86ORX64 LJ_K64_TOBIT, /* 2^52 + 2^51 */ LJ_K64_2P64, /* 2^64 */ - LJ_K64_M2P64, /* -2^64 */ -#if LJ_32 - LJ_K64_M2P64_31, /* -2^64 or -2^31 */ -#else - LJ_K64_M2P64_31 = LJ_K64_M2P64, #endif +#if LJ_TARGET_MIPS64 + LJ_K64_2P63, /* 2^63 */ #endif #if LJ_TARGET_MIPS LJ_K64_2P31, /* 2^31 */ -#if LJ_64 - LJ_K64_2P63, /* 2^63 */ - LJ_K64_M2P64, /* -2^64 */ -#endif #endif #if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 LJ_K64_VM_EXIT_HANDLER, @@ -376,20 +372,19 @@ enum { #define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS) enum { -#if LJ_TARGET_X86ORX64 - LJ_K32_M2P64_31, /* -2^64 or -2^31 */ +#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 + LJ_K32_M2P64, /* -2^64 */ +#endif +#if LJ_TARGET_MIPS64 + LJ_K32_2P63, /* 2^63 */ #endif #if LJ_TARGET_PPC LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ LJ_K32_2P52, /* 2^52 */ #endif -#if LJ_TARGET_PPC || LJ_TARGET_MIPS +#if LJ_TARGET_PPC LJ_K32_2P31, /* 2^31 */ #endif -#if LJ_TARGET_MIPS64 - LJ_K32_2P63, /* 2^63 */ - LJ_K32_M2P64, /* -2^64 */ -#endif #if LJ_TARGET_PPC || LJ_TARGET_MIPS32 LJ_K32_VM_EXIT_HANDLER, LJ_K32_VM_EXIT_INTERP, diff --git a/src/lj_lib.c b/src/lj_lib.c index 88cb2bdd6c..d51351b827 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c @@ -349,7 +349,7 @@ int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b) ** integer overflow. Overflow detection still works, since all FPUs ** return either MININT or MAXINT, which is then out of range. */ - int32_t i = (int32_t)numV(o); + int32_t i = lj_num2int(numV(o)); if (i >= a && i <= b) return i; #if LJ_HASFFI } else if (tviscdata(o)) { diff --git a/src/lj_meta.c b/src/lj_meta.c index c9307615f7..3f30fafb6b 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c @@ -465,7 +465,8 @@ void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o) if (tvisint(o+i)) { k[i] = intV(o+i); nint++; } else { - k[i] = lj_num2int(numV(o+i)); nint += ((lua_Number)k[i] == numV(o+i)); + int64_t i64; + if (lj_num2int_check(numV(o+i), i64, k[i])) nint++; } } if (nint == 3) { /* Narrow to integers. */ diff --git a/src/lj_obj.h b/src/lj_obj.h index 73b186e256..58e5049cc9 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -981,43 +981,68 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) /* -- Number to integer conversion ---------------------------------------- */ -#if LJ_SOFTFP -LJ_ASMF int32_t lj_vm_tobit(double x); -#if LJ_TARGET_MIPS64 -LJ_ASMF int32_t lj_vm_tointg(double x); -#endif -#endif +/* +** The C standard leaves many aspects of FP to integer conversions as +** undefined behavior. Portability is a mess, hardware support varies, +** and modern C compilers are like a box of chocolates -- you never know +** what you're gonna get. +** +** However, we need 100% matching behavior between the interpreter (asm + C), +** optimizations (C) and the code generated by the JIT compiler (asm). +** Mixing Lua numbers with FFI numbers creates some extra requirements. +** +** These conversions have been moved to assembler code, even if they seem +** trivial, to foil unanticipated C compiler 'optimizations' with the +** surrounding code. Only the unchecked double to int32_t conversion +** is still in C, because it ought to be pretty safe -- we'll see. +** +** These macros also serve to document all places where FP to integer +** conversions happen. +*/ -static LJ_AINLINE int32_t lj_num2bit(lua_Number n) -{ -#if LJ_SOFTFP - return lj_vm_tobit(n); -#else - TValue o; - o.n = n + 6755399441055744.0; /* 2^52 + 2^51 */ - return (int32_t)o.u32.lo; -#endif -} +/* Unchecked double to int32_t conversion. */ +#define lj_num2int(n) ((int32_t)(n)) -#define lj_num2int(n) ((int32_t)(n)) +/* Unchecked double to arch/os-dependent signed integer type conversion. +** This assumes the 32/64-bit signed conversions are NOT range-extended. +*/ +#define lj_num2int_type(n, tp) ((tp)(n)) -/* -** This must match the JIT backend behavior. In particular for archs -** that don't have a common hardware instruction for this conversion. -** Note that signed FP to unsigned int conversions have an undefined -** result and should never be relied upon in portable FFI code. -** See also: C99 or C11 standard, 6.3.1.4, footnote of (1). +/* Convert a double to int32_t and check for exact conversion. +** Returns the zero-extended int32_t on success. -0 is OK, too. +** Returns 0x8000000080000000LL on failure (simplifies range checks). */ -static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) -{ -#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS - int64_t i = (int64_t)n; - if (i < 0) i = (int64_t)(n - 18446744073709551616.0); - return (uint64_t)i; -#else - return (uint64_t)n; -#endif -} +LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x); + +/* Check for exact conversion only, without storing the result. */ +#define lj_num2int_ok(x) (lj_vm_num2int_check((x)) >= 0) + +/* Check for exact conversion and conditionally store result. +** Note: conditions that fail for 0x80000000 may check only the lower +** 32 bits. This generates good code for both 32 and 64 bit archs. +*/ +#define lj_num2int_cond(x, i64, i, cond) \ + (i64 = lj_vm_num2int_check((x)), cond ? (i = (int32_t)i64, 1) : 0) + +/* This is the generic check for a full-range int32_t result. */ +#define lj_num2int_check(x, i64, i) \ + lj_num2int_cond((x), i64, i, i64 >= 0) + +/* Predictable conversion from double to int64_t or uint64_t. +** Truncates towards zero. Out-of-range values, NaN and +-Inf return +** an arch-dependent result, but do not cause C undefined behavior. +** The uint64_t conversion accepts the union of the unsigned + signed range. +*/ +LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x); +LJ_ASMF LJ_CONSTF int64_t lj_vm_num2u64(double x); + +#define lj_num2i64(x) (lj_vm_num2i64((x))) +#define lj_num2u64(x) (lj_vm_num2u64((x))) + +/* Lua BitOp conversion semantics use the 2^52 + 2^51 trick. */ +LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x); + +#define lj_num2bit(x) lj_vm_tobit((x)) static LJ_AINLINE int32_t numberVint(cTValue *o) { diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 6fdf45663f..456c04b255 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -303,17 +303,18 @@ LJFOLDF(kfold_intarith) return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o)); } +/* Forward declaration. */ +static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, + IROp op); + LJFOLD(ADDOV KINT KINT) LJFOLD(SUBOV KINT KINT) LJFOLD(MULOV KINT KINT) LJFOLDF(kfold_intovarith) { - lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i, - fins->o - IR_ADDOV); - int32_t k = lj_num2int(n); - if (n != (lua_Number)k) - return FAILFOLD; - return INTFOLD(k); + int64_t k = kfold_int64arith(J, (int64_t)fleft->i, (int64_t)fright->i, + (IROp)((int)fins->o - (int)IR_ADDOV + (int)IR_ADD)); + return checki32(k) ? INTFOLD(k) : FAILFOLD; } LJFOLD(BNOT KINT) @@ -368,11 +369,11 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, IROp op) { UNUSED(J); -#if LJ_HASFFI switch (op) { case IR_ADD: k1 += k2; break; case IR_SUB: k1 -= k2; break; case IR_MUL: k1 *= k2; break; +#if LJ_HASFFI case IR_BAND: k1 &= k2; break; case IR_BOR: k1 |= k2; break; case IR_BXOR: k1 ^= k2; break; @@ -382,11 +383,8 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break; case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break; default: lj_assertJ(0, "bad IR op %d", op); break; - } -#else - UNUSED(k2); UNUSED(op); - lj_assertJ(0, "FFI IR op without FFI"); #endif + } return k1; } @@ -883,8 +881,11 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM) LJFOLDF(kfold_conv_knum_int_num) { lua_Number n = knumleft; - int32_t k = lj_num2int(n); - if (irt_isguard(fins->t) && n != (lua_Number)k) { + if (irt_isguard(fins->t)) { + int64_t i64; + int32_t k; + if (lj_num2int_check(n, i64, k)) + return INTFOLD(k); /* We're about to create a guard which always fails, like CONV +1.5. ** Some pathological loops cause this during LICM, e.g.: ** local x,k,t = 0,1.5,{1,[1.5]=2} @@ -892,27 +893,15 @@ LJFOLDF(kfold_conv_knum_int_num) ** assert(x == 300) */ return FAILFOLD; + } else { + return INTFOLD(lj_num2int(n)); } - return INTFOLD(k); -} - -LJFOLD(CONV KNUM IRCONV_U32_NUM) -LJFOLDF(kfold_conv_knum_u32_num) -{ -#ifdef _MSC_VER - { /* Workaround for MSVC bug. */ - volatile uint32_t u = (uint32_t)knumleft; - return INTFOLD((int32_t)u); - } -#else - return INTFOLD((int32_t)(uint32_t)knumleft); -#endif } LJFOLD(CONV KNUM IRCONV_I64_NUM) LJFOLDF(kfold_conv_knum_i64_num) { - return INT64FOLD((uint64_t)(int64_t)knumleft); + return INT64FOLD((uint64_t)lj_num2i64(knumleft)); } LJFOLD(CONV KNUM IRCONV_U64_NUM) @@ -1135,7 +1124,6 @@ LJFOLDF(shortcut_conv_num_int) } LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */ -LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32 */ LJFOLDF(simplify_conv_int_num) { /* Fold even across PHI to avoid expensive num->int conversions in loop. */ @@ -1334,6 +1322,24 @@ LJFOLDF(narrow_convert) return lj_opt_narrow_convert(J); } +LJFOLD(XSTORE any CONV) +LJFOLDF(xstore_conv) +{ +#if LJ_64 + PHIBARRIER(fright); + if (!irt_is64(fins->t) && + irt_type(fins->t) == (IRType)((fright->op2&IRCONV_DSTMASK)>>IRCONV_DSH) && + ((fright->op2&IRCONV_SRCMASK) == IRT_I64 || + (fright->op2&IRCONV_SRCMASK) == IRT_U64)) { + fins->op2 = fright->op1; + return RETRYFOLD; + } +#else + UNUSED(J); +#endif + return NEXTFOLD; +} + /* -- Integer algebraic simplifications ----------------------------------- */ LJFOLD(ADD any KINT) diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 01b5833d92..3085c83766 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -281,22 +281,20 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) return 0; } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ lua_Number n = ir_knum(ir)->n; + int64_t i64; + int32_t k; if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { - /* Allows a wider range of constants. */ - int64_t k64 = (int64_t)n; - if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */ - *nc->sp++ = NARROWINS(NARROW_INT, 0); - *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */ - return 0; - } - } else { - int32_t k = lj_num2int(n); - /* Only if constant is a small integer. */ - if (checki16(k) && n == (lua_Number)k) { + /* Allows a wider range of constants, if const doesn't lose precision. */ + if (lj_num2int_check(n, i64, k)) { *nc->sp++ = NARROWINS(NARROW_INT, 0); *nc->sp++ = (NarrowIns)k; return 0; } + } else if (lj_num2int_cond(n, i64, k, checki16((int32_t)i64))) { + /* Only if constant is a small integer. */ + *nc->sp++ = NARROWINS(NARROW_INT, 0); + *nc->sp++ = (NarrowIns)k; + return 0; } return 10; /* Never narrow other FP constants (this is rare). */ } @@ -512,12 +510,6 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) /* -- Narrowing of arithmetic operators ----------------------------------- */ -/* Check whether a number fits into an int32_t (-0 is ok, too). */ -static int numisint(lua_Number n) -{ - return (n == (lua_Number)lj_num2int(n)); -} - /* Convert string to number. Error out for non-numeric string values. */ static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o) { @@ -539,8 +531,8 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && tref_isinteger(rb) && tref_isinteger(rc) && - numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc), - (int)op - (int)IR_ADD))) + lj_num2int_ok(lj_vm_foldarith(numberVnum(vb), numberVnum(vc), + (int)op - (int)IR_ADD))) return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc); if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT); if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); @@ -591,7 +583,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) static int narrow_forl(jit_State *J, cTValue *o) { if (tvisint(o)) return 1; - if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o)); + if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return lj_num2int_ok(numV(o)); return 0; } diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index 8d0259117b..d29d1eab68 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -573,13 +573,9 @@ static void split_ir(jit_State *J) case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_SOFTFP + lj_assertJ(st != IRT_FLOAT, "bad CONV *64.float emitted"); if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ - hi = split_call_l(J, hisubst, oir, ir, - irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); - } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ - nir->o = IR_CALLN; - nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; - hi = split_emit(J, IRTI(IR_HIOP), nref, nref); + hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_num2u64); } #else if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ @@ -692,8 +688,9 @@ static void split_ir(jit_State *J) nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; } } else if (st == IRT_FLOAT) { + lj_assertJ(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); nir->o = IR_CALLN; - nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; + nir->op2 = IRCALL_softfp_f2i; } else #endif #if LJ_SOFTFP @@ -705,9 +702,7 @@ static void split_ir(jit_State *J) } else { split_call_l(J, hisubst, oir, ir, #if LJ_32 && LJ_HASFFI - st == IRT_NUM ? - (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : - (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) + st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i #else IRCALL_softfp_d2i #endif diff --git a/src/lj_parse.c b/src/lj_parse.c index 181ce4d7e2..832f6bf404 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -522,9 +522,9 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg) ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)intV(tv)); else #else - lua_Number n = expr_numberV(e); - int32_t k = lj_num2int(n); - if (checki16(k) && n == (lua_Number)k) + int64_t i64; + int32_t k; + if (lj_num2int_cond(expr_numberV(e), i64, k, checki16((int32_t)i64))) ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k); else #endif @@ -782,8 +782,9 @@ static int foldarith(BinOpr opr, ExpDesc *e1, ExpDesc *e2) setnumV(&o, n); if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */ if (LJ_DUALNUM) { - int32_t k = lj_num2int(n); - if ((lua_Number)k == n) { + int64_t i64; + int32_t k; + if (lj_num2int_check(n, i64, k)) { setintV(&e1->u.nval, k); return 1; } @@ -1386,10 +1387,10 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr) if (tvisnum(&n->key)) { TValue *tv = &((TValue *)kptr)[kidx]; if (LJ_DUALNUM) { - lua_Number nn = numV(&n->key); - int32_t k = lj_num2int(nn); + int64_t i64; + int32_t k; lj_assertFS(!tvismzero(&n->key), "unexpected -0 key"); - if ((lua_Number)k == nn) + if (lj_num2int_check(numV(&n->key), i64, k)) setintV(tv, k); else *tv = n->key; @@ -1656,9 +1657,9 @@ static void expr_index(FuncState *fs, ExpDesc *t, ExpDesc *e) } } #else - lua_Number n = expr_numberV(e); - int32_t k = lj_num2int(n); - if (checku8(k) && n == (lua_Number)k) { + int64_t i64; + int32_t k; + if (lj_num2int_cond(expr_numberV(e), i64, k, checku8((int32_t)i64))) { t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */ return; } diff --git a/src/lj_record.c b/src/lj_record.c index 6543f2745c..536d7171ea 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -351,9 +351,14 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) } else { cTValue *tv = proto_knumtv(J->pt, bc_d(ins)); if (t == IRT_INT) { - int32_t k = numberVint(tv); - if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */ - return lj_ir_kint(J, k); + if (tvisint(tv)) { + return lj_ir_kint(J, intV(tv)); + } else { + int64_t i64; + int32_t k; + if (lj_num2int_check(numV(tv), i64, k)) /* -0 is ok here. */ + return lj_ir_kint(J, k); + } return 0; /* Type mismatch. */ } else { return lj_ir_knum(J, numberVnum(tv)); @@ -1426,9 +1431,13 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref, /* Integer keys are looked up in the array part first. */ key = ix->key; if (tref_isnumber(key)) { - int32_t k = numberVint(&ix->keyv); - if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k) - k = LJ_MAX_ASIZE; + int32_t k; + if (tvisint(&ix->keyv)) { + k = intV(&ix->keyv); + } else { + int64_t i64; + if (!lj_num2int_check(numV(&ix->keyv), i64, k)) k = LJ_MAX_ASIZE; + } if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */ TRef ikey = lj_opt_narrow_index(J, key); TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index bb649fc840..0936298d6b 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c @@ -351,7 +351,7 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) /* Add number formatted as signed integer to buffer. */ SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) { - int64_t k = (int64_t)n; + int64_t k = lj_num2i64(n); if (checki32(k) && sf == STRFMT_INT) return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */ else @@ -361,12 +361,7 @@ SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) /* Add number formatted as unsigned integer to buffer. */ SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) { - int64_t k; - if (n >= 9223372036854775808.0) - k = (int64_t)(n - 18446744073709551616.0); - else - k = (int64_t)n; - return lj_strfmt_putfxint(sb, sf, (uint64_t)k); + return lj_strfmt_putfxint(sb, sf, lj_num2u64(n)); } /* Format stack arguments to buffer. */ diff --git a/src/lj_strscan.c b/src/lj_strscan.c index 502c78e97e..fbb959c5bc 100644 --- a/src/lj_strscan.c +++ b/src/lj_strscan.c @@ -523,10 +523,10 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o, fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); /* Try to convert number to integer, if requested. */ - if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT) && !tvismzero(o)) { - double n = o->n; - int32_t i = lj_num2int(n); - if (n == (lua_Number)i) { o->i = i; return STRSCAN_INT; } + if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT)) { + int64_t tmp; + if (lj_num2int_check(o->n, tmp, o->i) && !tvismzero(o)) + return STRSCAN_INT; } return fmt; } diff --git a/src/lj_tab.c b/src/lj_tab.c index 62e336111a..2959fadba7 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -295,9 +295,9 @@ static uint32_t countint(cTValue *key, uint32_t *bins) { lj_assertX(!tvisint(key), "bad integer key"); if (tvisnum(key)) { - lua_Number nk = numV(key); - int32_t k = lj_num2int(nk); - if ((uint32_t)k < LJ_MAX_ASIZE && nk == (lua_Number)k) { + int64_t i64; + int32_t k; + if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < LJ_MAX_ASIZE)) { bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++; return 1; } @@ -409,9 +409,9 @@ cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key) if (tv) return tv; } else if (tvisnum(key)) { - lua_Number nk = numV(key); - int32_t k = lj_num2int(nk); - if (nk == (lua_Number)k) { + int64_t i64; + int32_t k; + if (lj_num2int_check(numV(key), i64, k)) { cTValue *tv = lj_tab_getint(t, k); if (tv) return tv; @@ -542,9 +542,9 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key) } else if (tvisint(key)) { return lj_tab_setint(L, t, intV(key)); } else if (tvisnum(key)) { - lua_Number nk = numV(key); - int32_t k = lj_num2int(nk); - if (nk == (lua_Number)k) + int64_t i64; + int32_t k; + if (lj_num2int_check(numV(key), i64, k)) return lj_tab_setint(L, t, k); if (tvisnan(key)) lj_err_msg(L, LJ_ERR_NANIDX); @@ -580,9 +580,9 @@ uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key) setnumV(&tmp, (lua_Number)k); key = &tmp; } else if (tvisnum(key)) { - lua_Number nk = numV(key); - int32_t k = lj_num2int(nk); - if ((uint32_t)k < t->asize && nk == (lua_Number)k) + int64_t i64; + int32_t k; + if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < t->asize)) return (uint32_t)k + 1; } if (!tvisnil(key)) { diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index fa32a5d46f..193102eec7 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -314,6 +314,7 @@ typedef enum { XO_FSTPq = XO_(dd), XOg_FSTPq = 3, XO_FISTPq = XO_(df), XOg_FISTPq = 7, XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1, + XO_FADDd = XO_(d8), XOg_FADDd = 0, XO_FADDq = XO_(dc), XOg_FADDq = 0, XO_FLDCW = XO_(d9), XOg_FLDCW = 5, XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7 diff --git a/src/lj_trace.c b/src/lj_trace.c index 47d7faa5c9..ad32954066 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -317,32 +317,34 @@ void lj_trace_initstate(global_State *g) tv[1].u64 = U64x(80000000,00000000); /* Initialize 32/64 bit constants. */ +#if LJ_TARGET_X64 || LJ_TARGET_MIPS64 + J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); +#endif #if LJ_TARGET_X86ORX64 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); -#if LJ_32 - J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); -#endif J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); - J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; #endif +#if LJ_TARGET_MIPS64 + J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); +#endif +#if LJ_TARGET_MIPS + J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); +#endif + #if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 - J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); + J->k32[LJ_K32_M2P64] = 0xdf800000; +#endif +#if LJ_TARGET_MIPS64 + J->k32[LJ_K32_2P63] = 0x5f000000; #endif #if LJ_TARGET_PPC J->k32[LJ_K32_2P52_2P31] = 0x59800004; J->k32[LJ_K32_2P52] = 0x59800000; #endif -#if LJ_TARGET_PPC || LJ_TARGET_MIPS +#if LJ_TARGET_PPC J->k32[LJ_K32_2P31] = 0x4f000000; #endif -#if LJ_TARGET_MIPS - J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); -#if LJ_64 - J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); - J->k32[LJ_K32_2P63] = 0x5f000000; - J->k32[LJ_K32_M2P64] = 0xdf800000; -#endif -#endif + #if LJ_TARGET_PPC || LJ_TARGET_MIPS32 J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler; J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp; diff --git a/src/lj_vm.h b/src/lj_vm.h index 9cc42613d3..96ad2d07ca 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -37,13 +37,19 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]); #if LJ_TARGET_PPC void lj_vm_cachesync(void *start, void *end); #endif -LJ_ASMF double lj_vm_foldarith(double x, double y, int op); +LJ_ASMF LJ_CONSTF double lj_vm_foldarith(double x, double y, int op); #if LJ_HASJIT -LJ_ASMF double lj_vm_foldfpm(double x, int op); +LJ_ASMF LJ_CONSTF double lj_vm_foldfpm(double x, int op); #endif -#if !LJ_ARCH_HASFPU -/* Declared in lj_obj.h: LJ_ASMF int32_t lj_vm_tobit(double x); */ +#if LJ_SOFTFP && LJ_TARGET_MIPS64 +LJ_ASMF LJ_CONSTF int32_t lj_vm_tointg(double x); #endif +/* Declared in lj_obj.h: +** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x); +** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x); +** LJ_ASMF LJ_CONSTF uint64_t lj_vm_num2u64(double x); +** LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x); +*/ /* Dispatch targets for recording and hooks. */ LJ_ASMF void lj_vm_record(void); @@ -62,15 +68,15 @@ LJ_ASMF char lj_vm_exit_interp[]; #define lj_vm_floor floor #define lj_vm_ceil ceil #else -LJ_ASMF double lj_vm_floor(double); -LJ_ASMF double lj_vm_ceil(double); +LJ_ASMF LJ_CONSTF double lj_vm_floor(double); +LJ_ASMF LJ_CONSTF double lj_vm_ceil(double); #if LJ_TARGET_ARM -LJ_ASMF double lj_vm_floor_sf(double); -LJ_ASMF double lj_vm_ceil_sf(double); +LJ_ASMF LJ_CONSTF double lj_vm_floor_sf(double); +LJ_ASMF LJ_CONSTF double lj_vm_ceil_sf(double); #endif #endif #ifdef LUAJIT_NO_LOG2 -LJ_ASMF double lj_vm_log2(double); +LJ_ASMF LJ_CONSTF double lj_vm_log2(double); #else #define lj_vm_log2 log2 #endif @@ -80,16 +86,16 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); #if LJ_HASJIT #if LJ_TARGET_X86ORX64 -LJ_ASMF void lj_vm_floor_sse(void); -LJ_ASMF void lj_vm_ceil_sse(void); -LJ_ASMF void lj_vm_trunc_sse(void); +LJ_ASMF LJ_CONSTF void lj_vm_floor_sse(void); +LJ_ASMF LJ_CONSTF void lj_vm_ceil_sse(void); +LJ_ASMF LJ_CONSTF void lj_vm_trunc_sse(void); #endif #if LJ_TARGET_PPC || LJ_TARGET_ARM64 #define lj_vm_trunc trunc #else -LJ_ASMF double lj_vm_trunc(double); +LJ_ASMF LJ_CONSTF double lj_vm_trunc(double); #if LJ_TARGET_ARM -LJ_ASMF double lj_vm_trunc_sf(double); +LJ_ASMF LJ_CONSTF double lj_vm_trunc_sf(double); #endif #endif #if LJ_HASFFI diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 2c9b96cce4..1495102fbc 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -59,7 +59,7 @@ double lj_vm_foldarith(double x, double y, int op) case IR_NEG - IR_ADD: return -x; break; case IR_ABS - IR_ADD: return fabs(x); break; #if LJ_HASJIT - case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; + case IR_LDEXP - IR_ADD: return ldexp(x, lj_num2int(y)); break; case IR_MIN - IR_ADD: return x < y ? x : y; break; case IR_MAX - IR_ADD: return x > y ? x : y; break; #endif diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 86bef0cfbc..2cd7eedb16 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -2452,6 +2452,118 @@ static void build_subroutines(BuildCtx *ctx) | bx lr | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + |.if FPU + |.if not HFABI + | vmov d0, CARG1, CARG2 + |.endif + | vcvt.s32.f64 s4, d0 + | vcvt.f64.s32 d1, s4 + | vcmp.f64 d0, d1 + | vmrs + | bne >1 + | vmov CRET1, s4 + | mov CRET2, #0 + | bx lr + | + |.else + | + | asr CARG4, CARG2, #31 // sign = 0 or -1. + | lsl CARG2, CARG2, #1 + | orrs RB, CARG2, CARG1 + | bxeq lr // Return 0 for +-0. + | mov RB, #1024 + | add RB, RB, #30 + | sub RB, RB, CARG2, lsr #21 + | cmp RB, #32 + | bhs >1 // Fail if |x| < 0x1p0 || |x| >= 0x1p32. + | lsr CARG3, CARG1, #21 + | orr CARG2, CARG3, CARG2, lsl #10 // Left-aligned mantissa. + | rsb CARG3, RB, #32 + | lsl CARG3, CARG2, CARG3 + | orr CARG2, CARG2, #0x80000000 // Merge leading 1. + | orrs CARG3, CARG3, CARG1, lsl #11 + | lsr CARG1, CARG2, RB // lo = right-aligned absolute value. + | bne >1 // Fail if fractional part != 0. + | adds CRET1, CARG1, CARG4 + | bmi >1 // Fail if lo+sign >= 0x80000000. + | eor CRET1, CRET1, CARG4 // lo = sign?-lo:lo = (lo+sign)^sign. + | mov CRET2, #0 + | bx lr + |.endif + |1: + | mov CRET1, #0x80000000 + | mov CRET2, #0x80000000 + | bx lr + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + |// fallthrough, same as lj_vm_num2u64. + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + |.if HFABI + | vmov CARG1, CARG2, d0 + |.endif + | lsl RB, CARG2, #1 + | lsr RB, RB, #21 + | sub RB, RB, #1020 + | sub RB, RB, #3 + | cmp RB, #116 + | bhs >3 // Exponent out of range. + | asr CARG4, CARG2, #31 // sign = 0 or -1. + | lsl CARG2, CARG2, #12 + | lsr CARG2, CARG2, #12 + | rsbs RB, RB, #52 + | orr CARG2, CARG2, #0x00100000 + | bmi >2 // Shift mantissa left or right? + | lsr CARG1, CARG1, RB // 64 bit right shift. + | lsr CARG3, CARG2, RB + | rsb RB, RB, #32 + | orr CARG1, CARG1, CARG2, lsl RB + | rsb RB, RB, #0 + | orr CARG1, CARG1, CARG2, lsr RB + | adds CRET1, CARG1, CARG4 // m = sign?-m:m = (m+sign)^sign. + | adc CRET2, CARG3, CARG4 + |1: + | eor CRET1, CRET1, CARG4 + | eor CRET2, CRET2, CARG4 + | bx lr + |2: + | rsb RB, RB, #0 + | lsl CARG2, CARG2, RB // 64 bit left shift. + | lsl CARG3, CARG1, RB + | sub RB, RB, #32 + | orr CARG2, CARG2, CARG1, lsl RB + | rsb RB, RB, #0 + | orr CARG2, CARG2, CARG1, lsr RB + | adds CRET1, CARG3, CARG4 + | adc CRET2, CARG2, CARG4 + | b <1 + |3: + | mov CRET1, #0 + | mov CRET2, #0 + | bx lr + | + |// int32_t lj_vm_tobit(double x) + |.if FPU + |->vm_tobit: + | vldr d1, >9 + |.if not HFABI + | vmov d0, CARG1, CARG2 + |.endif + | vadd.f64 d0, d0, d1 + | vmov CARG1, s0 + | bx lr + |9: + | .long 0, 0x43380000 // (double)(2^52 + 2^51). + |.endif + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | @@ -4097,7 +4209,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] | // Subsumes ins_next1 and ins_next2. | ldr INS, TRACE:CARG1->startins - | bfi INS, OP, #0, #8 + | bic INS, INS, #0xff + | orr INS, INS, OP | str INS, [PC], #4 | b <1 |.endif diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index a437b65766..eb6d0c2f44 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -2156,6 +2156,42 @@ static void build_subroutines(BuildCtx *ctx) | ret | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + | fcvtzs CRET1w, FARG1 + | scvtf FARG2, CRET1w + | fcmp FARG2, FARG1 + | bne >1 + | ret + |1: + | mov CRET1, #0x8000000080000000 + | ret + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + | fcvtzs CRET1, FARG1 + | ret + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + | fcvtzs CRET1, FARG1 + | fcvtzu CARG2, FARG1 + | cmn CRET1, #1 // Set overflow if CRET1 == INT64_MAX. + | csel CRET1, CRET1, CARG2, vc // No overflow ? i64 : u64. + | ret + | + |// int32_t lj_vm_tobit(double x) + |->vm_tobit: + | movz CRET1, #0x4338, lsl #48 // 2^52 + 2^51. + | fmov FARG2, CRET1 + | fadd FARG1, FARG1, FARG2 + | fmov CRET1w, s0 + | ret + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 02e588eebd..8a6b82709a 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -85,6 +85,7 @@ | |.if FPU |.define FARG1, f12 +|.define FARG1HI, f13 |.define FARG2, f14 | |.define FRET1, f0 @@ -2560,7 +2561,7 @@ static void build_subroutines(BuildCtx *ctx) | mtc1 r0, f4 | mtc1 TMP0, f5 | abs.d FRET2, FARG1 // |x| - | mfc1 AT, f13 + | mfc1 AT, FARG1HI | c.olt.d 0, FRET2, f4 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 | bc1f 0, >1 // Truncate only if |x| < 2^52. @@ -2822,6 +2823,122 @@ static void build_subroutines(BuildCtx *ctx) | sfmin_max max, vm_sfcmpogt | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + |.if FPU + | trunc.w.d FARG2, FARG1 + | mfc1 SFRETLO, FARG2 + | cvt.d.w FARG2, FARG2 + | c.eq.d FARG1, FARG2 + | bc1f 0, >2 + |. nop + | jr ra + |. move SFRETHI, r0 + | + |.else + | + | sll SFRETLO, SFARG1HI, 1 + | or SFRETHI, SFRETLO, SFARG1LO + | beqz SFRETHI, >1 // Return 0 for +-0. + |. li TMP0, 1054 + | srl AT, SFRETLO, 21 + | subu TMP0, TMP0, AT + | sltiu AT, TMP0, 32 + | beqz AT, >2 // Fail if |x| < 0x1p0 || |x| >= 0x1p32. + |. sll SFRETLO, SFARG1HI, 11 + | srl SFRETHI, SFARG1LO, 21 + | negu TMP1, TMP0 + | or SFRETLO, SFRETLO, SFRETHI // Left-aligned mantissa. + | sllv TMP2, SFRETLO, TMP1 + | lui AT, 0x8000 + | sll SFRETHI, SFARG1LO, 11 + | or SFRETLO, SFRETLO, AT // Merge leading 1. + | or TMP2, TMP2, SFRETHI + | srlv SFRETLO, SFRETLO, TMP0 // lo = right-aligned absolute value. + | bnez TMP2, >2 // Fail if fractional part != 0. + |. sra SFARG1HI, SFARG1HI, 31 // sign = 0 or -1. + | addu SFRETLO, SFRETLO, SFARG1HI + | bltz SFRETLO, >2 // Fail if lo+sign >= 0x80000000. + |. xor SFRETLO, SFRETLO, SFARG1HI // lo = sign?-lo:lo = (lo+sign)^sign. + |1: + | jr ra + |. move SFRETHI, r0 + |.endif + |2: // Not an integer, return 0x8000000080000000LL. + | lui SFRETHI, 0x8000 + | jr ra + |. lui SFRETLO, 0x8000 + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + |// fallthrough, same as lj_vm_num2u64. + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + |.if FPU + | mfc1 SFARG1HI, FARG1HI + | mfc1 SFARG1LO, FARG1 + |.endif + | srl TMP0, SFARG1HI, 20 + | andi TMP0, TMP0, 0x7ff + | addiu SFRETLO, TMP0, -1023 + | sltiu SFRETLO, SFRETLO, 116 + | beqz SFRETLO, >3 // Exponent out of range. + |. sll SFRETHI, SFARG1HI, 12 + | lui AT, 0x0010 + | srl SFRETHI, SFRETHI, 12 + | addiu TMP0, TMP0, -1075 + | sra SFARG1HI, SFARG1HI, 31 // sign = 0 or -1. + | bgez TMP0, >2 // Shift mantissa left or right? + |. or SFRETHI, SFRETHI, AT // Merge leading 1 into masked mantissa. + | subu TMP1, r0, TMP0 + | sll AT, SFRETHI, 1 + | nor TMP0, r0, TMP1 + | srlv SFRETHI, SFRETHI, TMP1 // Shift hi mantissa right for low exp. + | sllv AT, AT, TMP0 // Shifted-out hi mantissa. + | srlv SFRETLO, SFARG1LO, TMP1 // Shift lo mantissa right for low exp. + | andi TMP1, TMP1, 0x20 // Conditional right shift by 32. + | or AT, AT, SFRETLO // Merge into lo mantissa. + | movn AT, SFRETHI, TMP1 + | movn SFRETHI, r0, TMP1 + |1: + | addu SFRETLO, AT, SFARG1HI // m = sign?-m:m = (m+sign)^sign. + | addu SFRETHI, SFRETHI, SFARG1HI + | sltu TMP0, SFRETLO, AT // Carry + | addu SFRETHI, SFRETHI, TMP0 + | xor SFRETLO, SFRETLO, SFARG1HI + | jr ra + |. xor SFRETHI, SFRETHI, SFARG1HI + |2: + | srl TMP2, SFARG1LO, 1 + | nor AT, r0, TMP0 + | sllv SFRETHI, SFRETHI, TMP0 // Shift hi mantissa left for high exp. + | srlv TMP2, TMP2, AT // Shifted-out lo mantissa. + | sllv AT, SFARG1LO, TMP0 // Shift lo mantissa left for high exp. + | andi TMP0, TMP0, 0x20 // Conditional left shift by 32. + | or SFRETHI, SFRETHI, TMP2 // Merge into hi mantissa. + | movn SFRETHI, AT, TMP0 + | b <1 + |. movn AT, r0, TMP0 + |3: + | jr ra + |. li SFRETHI, 0 + | + |// int32_t lj_vm_tobit(double x) + |.if FPU + |->vm_tobit: + | lui AT, 0x59c0 // 2^52 + 2^51 (float). + | mtc1 AT, FARG2 + | cvt.d.s FARG2, FARG2 + | add.d FARG1, FARG1, FARG2 + | jr ra + |. mfc1 CRET1, FARG1 + |.endif + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 859c0aee84..4dc40d8a7f 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -2113,7 +2113,7 @@ static void build_subroutines(BuildCtx *ctx) | dinsu CRET2, AT, 21, 21 | slt AT, CARG1, r0 | dsrlv CRET1, CRET2, TMP0 - | dsubu CARG1, r0, CRET1 + | negu CARG1, CRET1 |.if MIPSR6 | seleqz CRET1, CRET1, AT | selnez CARG1, CARG1, AT @@ -2121,20 +2121,12 @@ static void build_subroutines(BuildCtx *ctx) |.else | movn CRET1, CARG1, AT |.endif - | li CARG1, 64 - | subu TMP0, CARG1, TMP0 + | negu TMP0, TMP0 | dsllv CRET2, CRET2, TMP0 // Integer check. | sextw AT, CRET1 | xor AT, CRET1, AT // Range check. - |.if MIPSR6 - | seleqz AT, AT, CRET2 - | selnez CRET2, CRET2, CRET2 | jr ra |. or CRET2, AT, CRET2 - |.else - | jr ra - |. movz CRET2, AT, CRET2 - |.endif |1: | jr ra |. li CRET2, 1 @@ -2929,6 +2921,136 @@ static void build_subroutines(BuildCtx *ctx) | sfmin_max max, vm_sfcmpogt | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + |.if FPU + | trunc.w.d FARG2, FARG1 + | mfc1 CRET1, FARG2 + | cvt.d.w FARG2, FARG2 + |.if MIPSR6 + | cmp.eq.d FARG2, FARG1, FARG2 + | bc1eqz FARG2, >2 + |.else + | c.eq.d FARG1, FARG2 + | bc1f 0, >2 + |.endif + |. nop + | jr ra + |. zextw CRET1, CRET1 + | + |.else + | + | dsll CRET2, CARG1, 1 + | beqz CRET2, >1 + |. li TMP0, 1076 + | dsrl AT, CRET2, 53 + | dsubu TMP0, TMP0, AT + | sltiu AT, TMP0, 54 + | beqz AT, >2 + |. dextm CRET2, CRET2, 0, 20 + | dinsu CRET2, AT, 21, 21 + | slt AT, CARG1, r0 + | dsrlv CRET1, CRET2, TMP0 + | negu CARG1, CRET1 + |.if MIPSR6 + | seleqz CRET1, CRET1, AT + | selnez CARG1, CARG1, AT + | or CRET1, CRET1, CARG1 + |.else + | movn CRET1, CARG1, AT + |.endif + | negu TMP0, TMP0 + | dsllv CRET2, CRET2, TMP0 // Integer check. + | sextw AT, CRET1 + | xor AT, CRET1, AT // Range check. + | or AT, AT, CRET2 + | bnez AT, >2 + |. nop + | jr ra + |. zextw CRET1, CRET1 + |1: + | jr ra + |. move CRET1, r0 + |.endif + |2: + | lui CRET1, 0x8000 + | dsll CRET1, CRET1, 16 + | ori CRET1, CRET1, 0x8000 + | jr ra + |. dsll CRET1, CRET1, 16 + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + |.if FPU + | trunc.l.d FARG1, FARG1 + | jr ra + |. dmfc1 CRET1, FARG1 + |.else + |// fallthrough, same as lj_vm_num2u64 for soft-float. + |.endif + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + |.if FPU + | trunc.l.d FARG2, FARG1 + | dmfc1 CRET1, FARG2 + | li AT, -1 + | dsrl AT, AT, 1 + | beq CRET1, AT, >1 + |. lui AT, 0xdf80 // -2^64 (float). + | jr ra + |. nop + |1: + | mtc1 AT, FARG2 + | cvt.d.s FARG2, FARG2 + | add.d FARG1, FARG1, FARG2 + | trunc.l.d FARG2, FARG1 + | jr ra + |. dmfc1 CRET1, FARG2 + | + |.else + | + | dextu CARG2, CARG1, 20, 10 + | addiu AT, CARG2, -1023 + | sltiu AT, AT, 116 + | beqz AT, >2 // Exponent out of range. + |. addiu CARG2, CARG2, -1075 + | dextm CRET1, CARG1, 0, 19 + | dsll AT, AT, 52 + | dsra CARG1, CARG1, 63 // sign = 0 or -1. + | bgez CARG2, >1 // Shift mantissa left or right? + |. or CRET1, CRET1, AT // Merge leading 1 into masked mantissa. + | subu CARG2, r0, CARG2 + | dsrlv CRET1, CRET1, CARG2 // Shift mantissa right for low exp. + | daddu CRET1, CRET1, CARG1 + | jr ra + |. xor CRET1, CRET1, CARG1 // m = sign?-m:m = (m+sign)^sign. + |1: + | dsllv CRET1, CRET1, CARG2 // Shift mantissa left for high exp. + | daddu CRET1, CRET1, CARG1 + | jr ra + |. xor CRET1, CRET1, CARG1 // m = sign?-m:m = (m+sign)^sign. + |2: + | jr ra + |. move CRET1, r0 + |.endif + | + |// int32_t lj_vm_tobit(double x) + |.if FPU + |->vm_tobit: + | lui AT, 0x59c0 // 2^52 + 2^51 (float). + | mtc1 AT, FARG2 + | cvt.d.s FARG2, FARG2 + | add.d FARG1, FARG1, FARG2 + | mfc1 CRET1, FARG1 + | jr ra + |. sextw CRET1, CRET1 + |.endif + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 2ddeefbfea..1761e39bdf 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -3160,6 +3160,152 @@ static void build_subroutines(BuildCtx *ctx) | blr | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + |.if FPU + | subi sp, sp, 16 + | stfd FARG1, 0(sp) + | lwz CARG1, 0(sp) + | lwz CARG2, 4(sp) + |.endif + | slwi TMP1, CARG1, 1 + |.if PPE + | or TMP1, TMP1, CARG2 + | cmpwi TMP1, 0 + |.else + | or. TMP1, TMP1, CARG2 + |.endif + | beq >2 // Return 0 for +-0. + | rlwinm RB, CARG1, 12, 21, 31 + | subfic RB, RB, 1054 + | cmplwi RB, 32 + | bge >1 // Fail if |x| < 0x1p0 || |x| >= 0x1p32. + | slwi CARG3, CARG1, 11 + | rlwimi CARG3, CARG2, 11, 21, 31 // Left-aligned mantissa. + | subfic TMP1, RB, 32 + | slw TMP1, CARG3, TMP1 + | slwi TMP2, CARG2, 11 + |.if PPE + | or. TMP1, TMP1, TMP2 + |.else + | or TMP1, TMP1, TMP2 + | cmpwi TMP1, 0 + |.endif + | bne >1 // Fail if fractional part != 0. + | oris CARG3, CARG3, 0x8000 // Merge leading 1. + | srw CRET2, CARG3, RB // lo = right-aligned absolute value. + | srawi CARG4, CARG1, 31 // sign = 0 or -1. + |.if GPR64 + | add CRET2, CRET2, CARG4 + | cmpwi CRET2, 0 + |.else + | add. CRET2, CRET2, CARG4 + |.endif + | blt >1 // Fail if fractional part != 0. + | xor CRET2, CRET2, CARG4 // lo = sign?-lo:lo = (lo+sign)^sign. + |2: + |.if GPR64 + | rldicl CRET1, CRET1, 0, 32 + |.else + | li CRET1, 0 + |.endif + |.if FPU + | addi sp, sp, 16 + |.endif + | blr + |1: + |.if GPR64 + | lus CRET1, 0x8000 + | rldicr CRET1, CRET1, 32, 32 + |.else + | lus CRET1, 0x8000 + | lus CRET2, 0x8000 + |.endif + |.if FPU + | addi sp, sp, 16 + |.endif + | blr + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + |// fallthrough, same as lj_vm_num2u64. + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + |.if FPU + | subi sp, sp, 16 + | stfd FARG1, 0(sp) + | lwz CARG1, 0(sp) + | lwz CARG2, 4(sp) + |.endif + | rlwinm RB, CARG1, 12, 21, 31 + | addi RB, RB, -1023 + | cmplwi RB, 116 + | bge >3 // Exponent out of range. + | srawi CARG4, CARG1, 31 // sign = 0 or -1. + | clrlwi CARG1, CARG1, 12 + | subfic RB, RB, 52 + | oris CARG1, CARG1, 0x0010 + | cmpwi RB, 0 + | blt >2 // Shift mantissa left or right? + | subfic TMP1, RB, 32 // 64 bit right shift. + | srw CARG2, CARG2, RB + | slw TMP2, CARG1, TMP1 + | addi TMP1, RB, -32 + | or CARG2, CARG2, TMP2 + | srw TMP2, CARG1, TMP1 + | or CARG2, CARG2, TMP2 + | srw CARG1, CARG1, RB + |1: + | addc CARG2, CARG2, CARG4 + | adde CARG1, CARG1, CARG4 + | xor CRET2, CARG2, CARG4 + | xor CRET1, CARG1, CARG4 + |.if GPR64 + | rldimi CRET2, CRET1, 0, 32 + | mr CRET1, CRET2 + |.endif + | addi sp, sp, 16 + | blr + |2: + | subfic TMP1, RB, 0 // 64 bit left shift. + | addi RB, RB, -32 + | slw CARG1, CARG1, TMP1 + | srw TMP2, CARG2, RB + | addi RB, TMP1, -32 + | or CARG1, CARG1, TMP2 + | slw TMP2, CARG2, RB + | or CARG1, CARG1, TMP2 + | slw CARG2, CARG2, TMP1 + | b <1 + |3: + | li CRET1, 0 + |.if not GPR64 + | li CRET2, 0 + |.endif + |.if FPU + | addi sp, sp, 16 + |.endif + | blr + | + |// int32_t lj_vm_tobit(double x) + |.if FPU + |->vm_tobit: + | lus TMP0, 0x59c0 // 2^52 + 2^51 (float). + | subi sp, sp, 16 + | stw TMP0, 0(sp) + | lfs FARG2, 0(sp) + | fadd FARG1, FARG1, FARG2 + | stfd FARG1, 0(sp) + | lwz CRET1, 4(sp) + | addi sp, sp, 16 + | blr + |.endif + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 4cfb7b6ad2..970e8e43df 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -2625,6 +2625,49 @@ static void build_subroutines(BuildCtx *ctx) | ret | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + | cvttsd2si eax, xmm0 + | xorps xmm1, xmm1 + | cvtsi2sd xmm1, eax + | ucomisd xmm1, xmm0 + | jp >1 + | jne >1 + | ret + |1: + | mov64 rax, U64x(80000000,80000000) + | ret + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + | cvttsd2si rax, xmm0 + | ret + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + | cvttsd2si rax, xmm0 // Convert [-2^63..2^63) range. + | cmp rax, 1 // Indefinite result -0x8000000000000000LL - 1 sets overflow. + | jo >1 + | ret + |1: + | mov64 rdx, U64x(c3f00000,00000000) // -0x1p64 (double). + | movd xmm1, rdx + | addsd xmm0, xmm1 + | cvttsd2si rax, xmm0 // Convert [2^63..2^64+2^63) range. + | // Note that -0x1p63 converts to -0x8000000000000000LL either way. + | ret + | + |// int32_t lj_vm_tobit(double x) + |->vm_tobit: + | sseconst_tobit xmm1, RC + | addsd xmm0, xmm1 + | movd eax, xmm0 + | ret + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 77c4069d45..485ed809c9 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -3059,6 +3059,98 @@ static void build_subroutines(BuildCtx *ctx) | ret | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + |.if not X64 + | movsd xmm0, qword [esp+4] + |.endif + | cvttsd2si eax, xmm0 + | xorps xmm1, xmm1 + | cvtsi2sd xmm1, eax + | ucomisd xmm1, xmm0 + | jp >1 + | jne >1 + |.if not X64 + | xor edx, edx + |.endif + | ret + |1: + |.if X64 + | mov64 rax, U64x(80000000,80000000) + |.else + | mov eax, 0x80000000 + | mov edx, eax + |.endif + | ret + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + |.if X64 + | cvttsd2si rax, xmm0 + | ret + |.else + | sub esp, 12 + | fld qword [esp+16] + | fisttp qword [esp] + | mov eax, dword [esp] + | mov edx, dword [esp+4] + | add esp, 12 + | ret + |.endif + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + |.if X64 + | cvttsd2si rax, xmm0 // Convert [-2^63..2^63) range. + | cmp rax, 1 // Indefinite result -0x8000000000000000LL - 1 sets overflow. + | jo >1 + | ret + |1: + | mov64 rdx, U64x(c3f00000,00000000) // -0x1p64 (double). + | movd xmm1, rdx + | addsd xmm0, xmm1 + | cvttsd2si rax, xmm0 // Convert [2^63..2^64+2^63) range. + | // Note that -0x1p63 converts to -0x8000000000000000LL either way. + | ret + |.else + | sub esp, 12 + | fld qword [esp+16] + | fld st0 + | fisttp qword [esp] + | mov edx, dword [esp+4] + | mov eax, dword [esp] + | cmp edx, 1 + | jo >2 + |1: + | fpop + | add esp, 12 + | ret + |2: + | cmp eax, 0 + | jne <1 + | mov dword [esp+8], 0xdf800000 // -0x1p64 (float). + | fadd dword [esp+8] + | fisttp qword [esp] + | mov eax, dword [esp] + | mov edx, dword [esp+4] + | add esp, 12 + | ret + |.endif + | + |// int32_t lj_vm_tobit(double x) + |->vm_tobit: + |.if not X64 + | movsd xmm0, qword [esp+4] + |.endif + | sseconst_tobit xmm1, RCa + | addsd xmm0, xmm1 + | movd eax, xmm0 + | ret + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | From 1acb204447aaa75e5338c3bbfd062aae64bc5959 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 27 Nov 2025 17:48:44 +0100 Subject: [PATCH 38/73] Remove compiler flag for FP conversions. Now unnecessary. Undo #1355. #1397 #1411 --- src/Makefile | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Makefile b/src/Makefile index e657af1343..969bf28913 100644 --- a/src/Makefile +++ b/src/Makefile @@ -302,9 +302,6 @@ endif ifneq (,$(INSTALL_LJLIBD)) TARGET_XCFLAGS+= -DLUA_LJDIR=\"$(INSTALL_LJLIBD)\" endif -ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-strict-float-cast-overflow 2>/dev/null || echo 1)) - TARGET_XCFLAGS+= -fno-strict-float-cast-overflow -endif ############################################################################## # Target system detection. From 6b35217b3d96dc9f468b4ce47e9d73c02cef110a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 27 Nov 2025 20:02:00 +0100 Subject: [PATCH 39/73] Back out MSVC LJ_CONSTF declaration. The reason for the error message is still unclear. #1412. --- src/lj_def.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/lj_def.h b/src/lj_def.h index f34b1a3996..25e827c4f1 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -246,7 +246,12 @@ static LJ_AINLINE uint32_t lj_getu32(const void *p) #define LJ_INLINE __inline #define LJ_AINLINE __forceinline #define LJ_NOINLINE __declspec(noinline) +#if MSVC_BROKEN +/* Unclear why this doesn't work, see #1412. */ #define LJ_CONSTF __declspec(nothrow noalias) +#else +#define LJ_CONSTF +#endif #if defined(_M_IX86) #define LJ_FASTCALL __fastcall #endif From b973c6243d4aab73e5c3df0d7264258b0672fa7e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 27 Nov 2025 20:04:42 +0100 Subject: [PATCH 40/73] Fix MSVC LJ_CONSTF declaration. Reading MS docs is hard. #1412 --- src/lj_def.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/lj_def.h b/src/lj_def.h index 25e827c4f1..77852912e5 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -246,12 +246,7 @@ static LJ_AINLINE uint32_t lj_getu32(const void *p) #define LJ_INLINE __inline #define LJ_AINLINE __forceinline #define LJ_NOINLINE __declspec(noinline) -#if MSVC_BROKEN -/* Unclear why this doesn't work, see #1412. */ -#define LJ_CONSTF __declspec(nothrow noalias) -#else -#define LJ_CONSTF -#endif +#define LJ_CONSTF __declspec(noalias) #if defined(_M_IX86) #define LJ_FASTCALL __fastcall #endif From 6f21cb8ace60b297cd144c3b6925865b043095d2 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 1 Dec 2025 13:50:32 +0100 Subject: [PATCH 41/73] Remove pointless GCC/MSVC const function attributes. They are only called from JIT-compiled code and not from C. Reported by Richard Beckmann. #1414 --- src/lj_vm.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/lj_vm.h b/src/lj_vm.h index 96ad2d07ca..01da1bd443 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -42,7 +42,7 @@ LJ_ASMF LJ_CONSTF double lj_vm_foldarith(double x, double y, int op); LJ_ASMF LJ_CONSTF double lj_vm_foldfpm(double x, int op); #endif #if LJ_SOFTFP && LJ_TARGET_MIPS64 -LJ_ASMF LJ_CONSTF int32_t lj_vm_tointg(double x); +LJ_ASMF int32_t lj_vm_tointg(double x); #endif /* Declared in lj_obj.h: ** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x); @@ -71,8 +71,8 @@ LJ_ASMF char lj_vm_exit_interp[]; LJ_ASMF LJ_CONSTF double lj_vm_floor(double); LJ_ASMF LJ_CONSTF double lj_vm_ceil(double); #if LJ_TARGET_ARM -LJ_ASMF LJ_CONSTF double lj_vm_floor_sf(double); -LJ_ASMF LJ_CONSTF double lj_vm_ceil_sf(double); +LJ_ASMF double lj_vm_floor_sf(double); +LJ_ASMF double lj_vm_ceil_sf(double); #endif #endif #ifdef LUAJIT_NO_LOG2 @@ -86,16 +86,16 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); #if LJ_HASJIT #if LJ_TARGET_X86ORX64 -LJ_ASMF LJ_CONSTF void lj_vm_floor_sse(void); -LJ_ASMF LJ_CONSTF void lj_vm_ceil_sse(void); -LJ_ASMF LJ_CONSTF void lj_vm_trunc_sse(void); +LJ_ASMF void lj_vm_floor_sse(void); +LJ_ASMF void lj_vm_ceil_sse(void); +LJ_ASMF void lj_vm_trunc_sse(void); #endif #if LJ_TARGET_PPC || LJ_TARGET_ARM64 #define lj_vm_trunc trunc #else LJ_ASMF LJ_CONSTF double lj_vm_trunc(double); #if LJ_TARGET_ARM -LJ_ASMF LJ_CONSTF double lj_vm_trunc_sf(double); +LJ_ASMF double lj_vm_trunc_sf(double); #endif #endif #if LJ_HASFFI From 406cf69b3ae0a3ecd5ede2bb97b937b7a5d92074 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 6 Dec 2025 08:35:19 +0100 Subject: [PATCH 42/73] DUALNUM: Add missing type conversion for FORI slots. Reported by Sergey Kaplun. #1413 --- src/lj_record.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/lj_record.c b/src/lj_record.c index 1d535a2299..04a379250d 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -307,12 +307,27 @@ static TRef fori_load(jit_State *J, BCReg slot, IRType t, int mode) mode + conv); } +/* Convert FORI argument to expected target type. */ +static TRef fori_conv(jit_State *J, TRef tr, IRType t) +{ + if (t == IRT_INT) { + if (!tref_isinteger(tr)) + return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_CHECK); + } else { + if (!tref_isnum(tr)) + return emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); + } + return tr; +} + /* Peek before FORI to find a const initializer. Otherwise load from slot. */ static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, IRType t, int mode) { TRef tr = J->base[slot]; - if (!tr) { + if (tr) { + tr = fori_conv(J, tr, t); + } else { tr = find_kinit(J, fori, slot, t); if (!tr) tr = fori_load(J, slot, t, mode); @@ -458,13 +473,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) lua_assert(tref_isnumber_str(tr[i])); if (tref_isstr(tr[i])) tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); - if (t == IRT_INT) { - if (!tref_isinteger(tr[i])) - tr[i] = emitir(IRTGI(IR_CONV), tr[i], IRCONV_INT_NUM|IRCONV_CHECK); - } else { - if (!tref_isnum(tr[i])) - tr[i] = emitir(IRTN(IR_CONV), tr[i], IRCONV_NUM_INT); - } + tr[i] = fori_conv(J, tr[i], t); } tr[FORL_EXT] = tr[FORL_IDX]; stop = tr[FORL_STOP]; From b1cd2f83b5d085bb71368b87c91a461be77d4364 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 6 Dec 2025 08:39:11 +0100 Subject: [PATCH 43/73] DUALNUM: Fix narrowing of unary minus. Reported by Sergey Kaplun. #1418 --- src/lj_opt_narrow.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 3085c83766..a4a1c8eea5 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -545,9 +545,9 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) rc = conv_str_tonum(J, rc, vc); if (tref_isinteger(rc)) { uint32_t k = (uint32_t)numberVint(vc); - if ((LJ_DUALNUM || k != 0) && k != 0x80000000u) { + if ((tvisint(vc) || k != 0) && k != 0x80000000u) { TRef zero = lj_ir_kint(J, 0); - if (!LJ_DUALNUM) + if (!tvisint(vc)) emitir(IRTGI(IR_NE), rc, zero); return emitir(IRTGI(IR_SUBOV), zero, rc); } From c7c74ea505ab9103bd0df8e84da56a895ae60e82 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 6 Dec 2025 08:42:18 +0100 Subject: [PATCH 44/73] ARM64: More fixes for ARM BTI. Reported by Yuichiro Naito. #1415 --- src/vm_arm64.dasc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index eb6d0c2f44..08ecb72c88 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -1696,6 +1696,7 @@ static void build_subroutines(BuildCtx *ctx) | adr lr, >1 | checkint CARG1, ->vm_tobit_fb |1: + | bti_jump |.endmacro | |.macro .ffunc_bit_op, name, ins @@ -1710,6 +1711,7 @@ static void build_subroutines(BuildCtx *ctx) | bge >9 | checkint CARG1, ->vm_tobit_fb |2: + | bti_jump | ins TMP0w, TMP0w, CARG1w | b <1 |.endmacro @@ -1742,6 +1744,7 @@ static void build_subroutines(BuildCtx *ctx) | adr lr, >1 | checkint CARG1, ->vm_tobit_fb |1: + | bti_jump |.if shmod == 0 | mov TMP1, CARG1 |.else @@ -1751,6 +1754,7 @@ static void build_subroutines(BuildCtx *ctx) | adr lr, >2 | checkint CARG1, ->vm_tobit_fb |2: + | bti_jump | ins TMP0w, CARG1w, TMP1w | add_TISNUM CARG1, TMP0 | b ->fff_restv From 45b771bb2c693a4cc7e34e79b7d30ab10bb7776a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 6 Dec 2025 08:44:03 +0100 Subject: [PATCH 45/73] PPC: Fix soft-float lj_num2u64(). Reported by Doug Crawford. #1411 --- src/vm_ppc.dasc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 1761e39bdf..6a64383205 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -3268,7 +3268,9 @@ static void build_subroutines(BuildCtx *ctx) | rldimi CRET2, CRET1, 0, 32 | mr CRET1, CRET2 |.endif + |.if FPU | addi sp, sp, 16 + |.endif | blr |2: | subfic TMP1, RB, 0 // 64 bit left shift. From 7152e15489d2077cd299ee23e3d51a4c599ab14f Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 8 Dec 2025 22:18:40 +0100 Subject: [PATCH 46/73] Fix string.format for limited precision FP conversions. Enforce round-to-even semantics. #1363 --- src/lj_strfmt_num.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c index a33fc63ad8..35ff1bc965 100644 --- a/src/lj_strfmt_num.c +++ b/src/lj_strfmt_num.c @@ -169,7 +169,9 @@ static uint32_t nd_div2k(uint32_t* nd, uint32_t ndhi, uint32_t k, SFormat sf) } if (k > 18) { if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_F)) { - stop1 = 63 - (int32_t)STRFMT_PREC(sf) / 9; + /* Must not limit precision here or nd_round cannot round to even. + ** stop1 = 63 - (int32_t)STRFMT_PREC(sf) / 9; + */ } else { int32_t floorlog2 = ndhi * 29 + lj_fls(nd[ndhi]) - k; int32_t floorlog10 = (int32_t)(floorlog2 * 0.30102999566398114); @@ -242,6 +244,41 @@ static uint32_t nd_add_m10e(uint32_t* nd, uint32_t ndhi, uint8_t m, int32_t e) return ndhi; } +/* Round to even with given precision. Extra digits are not zeroed. */ +static uint32_t nd_round(uint32_t* nd, uint32_t ndlo, uint32_t ndhi, int32_t e) +{ + uint32_t i; + int32_t d; + char buf[9]; + if (e >= 0) { + i = (uint32_t)e / 9; + d = 8 - e + (int32_t)i * 9; + } else { + int32_t f = (e - 8) / 9; + i = (uint32_t)(64 + f); + d = 8 - e + f * 9; + } + lj_strfmt_wuint9(buf, nd[i]); + if (buf[d] < '5') { + return ndhi; /* Don't round up. */ + } else if (buf[d] == '5') { /* Must check for round to even. */ + if (d ? (buf[d-1] & 1) : (nd[(i + 1) & 0x3f] & 1)) + goto round_up; /* Round up '[13579]5.*' */ + while (++d < 9) { /* Check remaining digits in buffer. */ + if (buf[d] != '0') + goto round_up; /* Round up '[02468]5[^0]*'. */ + } + while (i != ndlo) { /* Check remaining fraction. */ + if (nd[i]) + goto round_up; /* Round up '[02468]5[^0]*'. */ + i = (i - 1) & 0x3f; + } + return ndhi; /* Don't round up. */ + } /* else: round up.*/ +round_up: + return nd_add_m10e(nd, ndhi, 5, e); /* Round up by adding 5*10^e. */ +} + /* Test whether two "nd" values are equal in their most significant digits. */ static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen, MSize prec) @@ -432,7 +469,7 @@ static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) } if ((int32_t)(prec - nde) < (0x3f & -(int32_t)ndlo) * 9) { /* Precision is sufficiently low as to maybe require rounding. */ - ndhi = nd_add_m10e(nd, ndhi, 5, nde - prec - 1); + ndhi = nd_round(nd, ndlo, ndhi, nde - prec - 1); nde += (hilen != ndigits_dec(nd[ndhi])); } nde += ndebias; @@ -508,7 +545,7 @@ static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) /* %f (or, shortly, %g in %f style) */ if (prec < (MSize)(0x3f & -(int32_t)ndlo) * 9) { /* Precision is sufficiently low as to maybe require rounding. */ - ndhi = nd_add_m10e(nd, ndhi, 5, 0 - prec - 1); + ndhi = nd_round(nd, ndlo, ndhi, 0 - prec - 1); } g_format_like_f: if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT) && prec && width) { From 6e69e4731437d6876b96e843804a6ef0a671ed25 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 9 Jan 2026 17:18:03 +0100 Subject: [PATCH 47/73] Bump copyright date. --- COPYRIGHT | 2 +- Makefile | 2 +- README | 2 +- doc/bluequad-print.css | 2 +- doc/bluequad.css | 2 +- doc/contact.html | 6 +++--- doc/ext_c_api.html | 4 ++-- doc/ext_ffi.html | 4 ++-- doc/ext_ffi_api.html | 4 ++-- doc/ext_ffi_semantics.html | 4 ++-- doc/ext_ffi_tutorial.html | 4 ++-- doc/ext_jit.html | 4 ++-- doc/extensions.html | 4 ++-- doc/install.html | 4 ++-- doc/luajit.html | 6 +++--- doc/running.html | 4 ++-- dynasm/dasm_arm.h | 2 +- dynasm/dasm_arm.lua | 2 +- dynasm/dasm_mips.h | 2 +- dynasm/dasm_mips.lua | 2 +- dynasm/dasm_ppc.h | 2 +- dynasm/dasm_ppc.lua | 2 +- dynasm/dasm_proto.h | 2 +- dynasm/dasm_x64.lua | 2 +- dynasm/dasm_x86.h | 2 +- dynasm/dasm_x86.lua | 2 +- dynasm/dynasm.lua | 4 ++-- etc/luajit.1 | 2 +- src/Makefile | 2 +- src/host/buildvm.c | 2 +- src/host/buildvm.h | 2 +- src/host/buildvm_asm.c | 2 +- src/host/buildvm_fold.c | 2 +- src/host/buildvm_lib.c | 2 +- src/host/buildvm_peobj.c | 2 +- src/host/genminilua.lua | 2 +- src/host/genversion.lua | 2 +- src/jit/bc.lua | 2 +- src/jit/bcsave.lua | 2 +- src/jit/dis_arm.lua | 2 +- src/jit/dis_mips.lua | 2 +- src/jit/dis_mipsel.lua | 2 +- src/jit/dis_ppc.lua | 2 +- src/jit/dis_x64.lua | 2 +- src/jit/dis_x86.lua | 2 +- src/jit/dump.lua | 2 +- src/jit/v.lua | 2 +- src/lib_aux.c | 2 +- src/lib_base.c | 2 +- src/lib_bit.c | 2 +- src/lib_debug.c | 2 +- src/lib_ffi.c | 2 +- src/lib_init.c | 2 +- src/lib_io.c | 2 +- src/lib_jit.c | 2 +- src/lib_math.c | 2 +- src/lib_os.c | 2 +- src/lib_package.c | 2 +- src/lib_string.c | 2 +- src/lib_table.c | 2 +- src/lj_api.c | 2 +- src/lj_arch.h | 2 +- src/lj_asm.c | 2 +- src/lj_asm.h | 2 +- src/lj_asm_arm.h | 2 +- src/lj_asm_mips.h | 2 +- src/lj_asm_ppc.h | 2 +- src/lj_asm_x86.h | 2 +- src/lj_bc.c | 2 +- src/lj_bc.h | 2 +- src/lj_bcdump.h | 2 +- src/lj_bcread.c | 2 +- src/lj_bcwrite.c | 2 +- src/lj_carith.c | 2 +- src/lj_carith.h | 2 +- src/lj_ccall.c | 2 +- src/lj_ccall.h | 2 +- src/lj_ccallback.c | 2 +- src/lj_ccallback.h | 2 +- src/lj_cconv.c | 2 +- src/lj_cconv.h | 2 +- src/lj_cdata.c | 2 +- src/lj_cdata.h | 2 +- src/lj_clib.c | 2 +- src/lj_clib.h | 2 +- src/lj_cparse.c | 2 +- src/lj_cparse.h | 2 +- src/lj_crecord.c | 2 +- src/lj_crecord.h | 2 +- src/lj_ctype.c | 2 +- src/lj_ctype.h | 2 +- src/lj_debug.c | 2 +- src/lj_debug.h | 2 +- src/lj_def.h | 2 +- src/lj_dispatch.c | 2 +- src/lj_dispatch.h | 2 +- src/lj_emit_arm.h | 2 +- src/lj_emit_mips.h | 2 +- src/lj_emit_ppc.h | 2 +- src/lj_emit_x86.h | 2 +- src/lj_err.c | 2 +- src/lj_err.h | 2 +- src/lj_errmsg.h | 2 +- src/lj_ff.h | 2 +- src/lj_ffrecord.c | 2 +- src/lj_ffrecord.h | 2 +- src/lj_frame.h | 2 +- src/lj_func.c | 2 +- src/lj_func.h | 2 +- src/lj_gc.c | 2 +- src/lj_gc.h | 2 +- src/lj_gdbjit.c | 2 +- src/lj_gdbjit.h | 2 +- src/lj_ir.c | 2 +- src/lj_ir.h | 2 +- src/lj_ircall.h | 2 +- src/lj_iropt.h | 2 +- src/lj_jit.h | 2 +- src/lj_lex.c | 2 +- src/lj_lex.h | 2 +- src/lj_lib.c | 2 +- src/lj_lib.h | 2 +- src/lj_load.c | 2 +- src/lj_mcode.c | 2 +- src/lj_mcode.h | 2 +- src/lj_meta.c | 2 +- src/lj_meta.h | 2 +- src/lj_obj.c | 2 +- src/lj_obj.h | 2 +- src/lj_opt_dce.c | 2 +- src/lj_opt_fold.c | 2 +- src/lj_opt_loop.c | 2 +- src/lj_opt_mem.c | 2 +- src/lj_opt_narrow.c | 2 +- src/lj_opt_sink.c | 2 +- src/lj_opt_split.c | 2 +- src/lj_parse.c | 2 +- src/lj_parse.h | 2 +- src/lj_record.c | 2 +- src/lj_record.h | 2 +- src/lj_snap.c | 2 +- src/lj_snap.h | 2 +- src/lj_state.c | 2 +- src/lj_state.h | 2 +- src/lj_str.c | 2 +- src/lj_str.h | 2 +- src/lj_strscan.c | 2 +- src/lj_strscan.h | 2 +- src/lj_tab.c | 2 +- src/lj_tab.h | 2 +- src/lj_target.h | 2 +- src/lj_target_arm.h | 2 +- src/lj_target_mips.h | 2 +- src/lj_target_ppc.h | 2 +- src/lj_target_x86.h | 2 +- src/lj_trace.c | 2 +- src/lj_trace.h | 2 +- src/lj_traceerr.h | 2 +- src/lj_udata.c | 2 +- src/lj_udata.h | 2 +- src/lj_vm.h | 2 +- src/lj_vmevent.c | 2 +- src/lj_vmevent.h | 2 +- src/lj_vmmath.c | 2 +- src/ljamalg.c | 2 +- src/luaconf.h | 2 +- src/luajit.c | 2 +- src/luajit_rolling.h | 4 ++-- src/lualib.h | 2 +- src/msvcbuild.bat | 2 +- src/vm_arm.dasc | 2 +- src/vm_mips.dasc | 2 +- src/vm_ppc.dasc | 2 +- src/vm_ppcspe.dasc | 2 +- src/vm_x86.dasc | 2 +- 175 files changed, 190 insertions(+), 190 deletions(-) diff --git a/COPYRIGHT b/COPYRIGHT index a192ae49da..955d0d1e75 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,7 +1,7 @@ =============================================================================== LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/ -Copyright (C) 2005-2025 Mike Pall. All rights reserved. +Copyright (C) 2005-2026 Mike Pall. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index 1c619c70c2..4d1cb44f60 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ # For MSVC, please follow the instructions given in src/msvcbuild.bat. # For MinGW and Cygwin, cd to src and run make with the Makefile there. # -# Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +# Copyright (C) 2005-2026 Mike Pall. See Copyright Notice in luajit.h ############################################################################## MAJVER= 2 diff --git a/README b/README index bb80f8c48a..13ebd080fa 100644 --- a/README +++ b/README @@ -5,7 +5,7 @@ LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. Project Homepage: https://luajit.org/ -LuaJIT is Copyright (C) 2005-2025 Mike Pall. +LuaJIT is Copyright (C) 2005-2026 Mike Pall. LuaJIT is free software, released under the MIT license. See full Copyright Notice in the COPYRIGHT file or in luajit.h. diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css index 5bfda5d323..c0c37c3824 100644 --- a/doc/bluequad-print.css +++ b/doc/bluequad-print.css @@ -1,4 +1,4 @@ -/* Copyright (C) 2004-2025 Mike Pall. +/* Copyright (C) 2004-2026 Mike Pall. * * You are welcome to use the general ideas of this design for your own sites. * But please do not steal the stylesheet, the layout or the color scheme. diff --git a/doc/bluequad.css b/doc/bluequad.css index 5334a7596b..45ba2d578e 100644 --- a/doc/bluequad.css +++ b/doc/bluequad.css @@ -1,4 +1,4 @@ -/* Copyright (C) 2004-2025 Mike Pall. +/* Copyright (C) 2004-2026 Mike Pall. * * You are welcome to use the general ideas of this design for your own sites. * But please do not steal the stylesheet, the layout or the color scheme. diff --git a/doc/contact.html b/doc/contact.html index f3ebf6467f..6c733a4a80 100644 --- a/doc/contact.html +++ b/doc/contact.html @@ -3,7 +3,7 @@ Contact - + @@ -90,7 +90,7 @@

Contact

Copyright

All documentation is -Copyright © 2005-2025 Mike Pall. +Copyright © 2005-2026 Mike Pall.

@@ -98,7 +98,7 @@

Copyright