From e3c70a7d813ec7e3226510acedd64fc96021d4b0 Mon Sep 17 00:00:00 2001
From: Mike Pall
@@ -384,6 +386,24 @@ -O[level]
recunroll 2 Min. unroll factor for true recursion
-
+sizemcode 32 Size of each machine code area in KBytes (Windows: 64K) sizemcode 64 Size of each machine code area in KBytes
-
+maxmcode 512 Max. total size of all machine code areas in KBytes maxmcode 2048 Max. total size of all machine code areas in KBytes
diff --git a/src/lib_jit.c b/src/lib_jit.c
index fd8e585b83..1b74d957b5 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -479,12 +479,21 @@ static int jitopt_param(jit_State *J, const char *str)
size_t len = *(const uint8_t *)lst;
lj_assertJ(len != 0, "bad JIT_P_STRING");
if (strncmp(str, lst+1, len) == 0 && str[len] == '=') {
- int32_t n = 0;
+ uint32_t n = 0;
const char *p = &str[len+1];
while (*p >= '0' && *p <= '9')
n = n*10 + (*p++ - '0');
- if (*p) return 0; /* Malformed number. */
- J->param[i] = n;
+ if (*p || (int32_t)n < 0) return 0; /* Malformed number. */
+ if (i == JIT_P_sizemcode) { /* Adjust to required range here. */
+#if LJ_TARGET_JUMPRANGE
+ uint32_t maxkb = ((1 << (LJ_TARGET_JUMPRANGE - 10)) - 64);
+#else
+ uint32_t maxkb = ((1 << (31 - 10)) - 64);
+#endif
+ n = (n + (LJ_PAGESIZE >> 10) - 1) & ~((LJ_PAGESIZE >> 10) - 1);
+ if (n > maxkb) n = maxkb;
+ }
+ J->param[i] = (int32_t)n;
if (i == JIT_P_hotloop)
lj_dispatch_init_hotcount(J2G(J));
return 1; /* Ok. */
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 6d1a92714c..799f9c6cc3 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -301,6 +301,7 @@
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_TARGET_GC64 1
+#define LJ_PAGESIZE 16384
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
#define LJ_ARCH_VERSION 80
@@ -456,7 +457,7 @@
#define LJ_TARGET_MIPS 1
#define LJ_TARGET_EHRETREG 4
#define LJ_TARGET_EHRAREG 31
-#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */
+#define LJ_TARGET_JUMPRANGE 28 /* 2^28 = 256MB-aligned region */
#define LJ_TARGET_MASKSHIFT 1
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 8f558a0392..0e888c294a 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -93,6 +93,10 @@ typedef struct ASMState {
MCode *invmcp; /* Points to invertible loop branch (or NULL). */
MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
MCode *realign; /* Realign loop if not NULL. */
+ MCode *mctail; /* Tail of trace before stack adjust + jmp. */
+#if LJ_TARGET_PPC || LJ_TARGET_ARM64
+ MCode *mcexit; /* Pointer to exit stubs. */
+#endif
#ifdef LUAJIT_RANDOM_RA
/* Randomize register allocation. OK for fuzz testing, not for production. */
@@ -2541,7 +2545,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
RA_DBGX((as, "===== STOP ====="));
/* General trace setup. Emit tail of trace. */
- asm_tail_prep(as);
+ asm_tail_prep(as, T->link);
as->mcloop = NULL;
as->flagmcp = NULL;
as->topslot = 0;
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 24deaeae27..406360d26a 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -79,18 +79,43 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
/* Generate an exit stub group at the bottom of the reserved MCode memory. */
static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
{
+ ExitNo i;
+ int ind = 0;
+ MCode *target = (MCode *)(void *)lj_vm_exit_handler;
MCode *mxp = as->mcbot;
- int i;
- if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop)
+ if (mxp + 6+EXITSTUBS_PER_GROUP >= as->mctop)
asm_mclimit(as);
- /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */
- *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP);
- *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu);
- mxp++;
+ if ((((target - mxp - 2) + 0x00800000u) >> 24) == 0) {
+ /* str lr, [sp]; bl ->vm_exit_handler;
+ ** .long DISPATCH_address, group.
+ */
+ *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP);
+ *mxp = ARMI_BL | ((target - mxp - 2) & 0x00ffffffu); mxp++;
+ } else if ((as->flags & JIT_F_ARMV6T2)) {
+ /*
+ ** str lr, [sp]; movw/movt lr, vm_exit_handler; blx lr;
+ ** .long DISPATCH_address, group;
+ */
+ *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP);
+ *mxp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR);
+ *mxp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR);
+ *mxp++ = ARMI_BLXr | ARMF_M(RID_LR);
+ ind = 2;
+ } else {
+ /* .long vm_exit_handler;
+ ** str lr, [sp]; ldr lr, [pc, #-16]; blx lr;
+ ** .long DISPATCH_address, group;
+ */
+ *mxp++ = (MCode)target;
+ *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP);
+ *mxp++ = ARMI_LDRL | ARMF_D(RID_LR) | 16;
+ *mxp++ = ARMI_BLXr | ARMF_M(RID_LR);
+ ind = 1;
+ }
*mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */
*mxp++ = group*EXITSTUBS_PER_GROUP;
for (i = 0; i < EXITSTUBS_PER_GROUP; i++)
- *mxp++ = ARMI_B|((-6-i)&0x00ffffffu);
+ *mxp++ = ARMI_B | ((-6-ind-i) & 0x00ffffffu);
lj_mcode_sync(as->mcbot, mxp);
lj_mcode_commitbot(as->J, mxp);
as->mcbot = mxp;
@@ -2210,33 +2235,46 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
/* Fixup the tail code. */
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
- MCode *p = as->mctop;
- MCode *target;
+ MCode *target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp;
+ MCode *mcp = as->mctail;
int32_t spadj = as->T->spadjust;
- if (spadj == 0) {
- as->mctop = --p;
- } else {
- /* Patch stack adjustment. */
+ if (spadj) { /* Emit stack adjustment. */
uint32_t k = emit_isk12(ARMI_ADD, spadj);
lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
- p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
+ *mcp++ = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
+ }
+ if ((((target - mcp - 2) + 0x00800000u) >> 24) == 0) {
+ *mcp = ARMI_B | ((target - mcp - 2) & 0x00ffffffu); mcp++;
+ } else if ((as->flags & JIT_F_ARMV6T2)) {
+ *mcp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR);
+ *mcp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR);
+ *mcp++ = ARMI_BX | ARMF_M(RID_LR);
+ } else {
+ *mcp++ = ARMI_LDRL | ARMI_LS_U | ARMF_D(RID_LR) | 0;
+ *mcp++ = ARMI_BX | ARMF_M(RID_LR);
+ *mcp++ = (MCode)target;
}
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
- p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu);
+ while (as->mctop > mcp) *--as->mctop = ARMI_NOP;
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
MCode *p = as->mctop - 1; /* Leave room for exit branch. */
if (as->loopref) {
as->invmcp = as->mcp = p;
} else {
- as->mcp = p-1; /* Leave room for stack pointer adjustment. */
+ if (!lnk) {
+ MCode *target = (MCode *)(void *)lj_vm_exit_interp;
+ if ((((target - p - 2) + 0x00800000u) >> 24) ||
+ (((target - p - 1) + 0x00800000u) >> 24)) p -= 2;
+ }
+ p--; /* Leave room for stack pointer adjustment. */
+ as->mcp = p;
as->invmcp = NULL;
}
*p = 0; /* Prevent load/store merging. */
+ as->mctail = p;
}
/* -- Trace setup --------------------------------------------------------- */
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index 4feaa3b0c2..085f935728 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -51,15 +51,27 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
{
ExitNo i;
+ int ind;
+ MCode *target = (MCode *)(void *)lj_vm_exit_handler;
MCode *mxp = as->mctop;
- if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
+ if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim)
asm_mclimit(as);
- /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
+ ind = !A64F_S_OK(target - (mxp - nexits - 2), 26);
+ /* !ind: 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno;
+ ** ind: 1: str lr,[sp]; ldr lr, [gl, K64_VXH]; blr lr; movz w0,traceno;
+ ** bl <1; bl <1; ...
+ */
for (i = nexits-1; (int32_t)i >= 0; i--)
- *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i));
+ *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-ind-i));
+ as->mcexit = mxp;
*--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
- mxp--;
- *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp)));
+ if (ind) {
+ *--mxp = A64I_LE(A64I_BLR_AUTH | A64F_N(RID_LR));
+ *--mxp = A64I_LE(A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]) >> 3));
+ } else {
+ mxp--;
+ *mxp = A64I_LE(A64I_BL | A64F_S26(target-mxp));
+ }
*--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
as->mctop = mxp;
}
@@ -67,7 +79,7 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
{
/* Keep this in-sync with exitstub_trace_addr(). */
- return as->mctop + exitno + 3;
+ return as->mcexit + exitno;
}
/* Emit conditional branch to exit for guard. */
@@ -1917,34 +1929,42 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
/* Fixup the tail code. */
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
- MCode *p = as->mctop;
+ MCode *mcp = as->mctail;
MCode *target;
/* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
- if (spadj == 0) {
- *--p = A64I_LE(A64I_NOP);
- as->mctop = p;
- } else {
- /* Patch stack adjustment. */
+ if (spadj) { /* Emit stack adjustment. */
uint32_t k = emit_isk12(spadj);
lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
- p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
+ *mcp++ = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
}
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
- p[-1] = A64I_B | A64F_S26((target-p)+1);
+ /* Emit exit branch. */
+ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp;
+ if (lnk || A64F_S_OK(target - mcp, 26)) {
+ *mcp = A64I_B | A64F_S26(target - mcp); mcp++;
+ } else {
+ *mcp++ = A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]) >> 3);
+ *mcp++ = A64I_BR_AUTH | A64F_N(RID_LR);
+ }
+ while (as->mctop > mcp) *--as->mctop = A64I_LE(A64I_NOP);
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
MCode *p = as->mctop - 1; /* Leave room for exit branch. */
if (as->loopref) {
as->invmcp = as->mcp = p;
} else {
- as->mcp = p-1; /* Leave room for stack pointer adjustment. */
+ if (!lnk) {
+ MCode *target = (MCode *)(void *)lj_vm_exit_interp;
+ if (!A64F_S_OK(target - p, 26) || !A64F_S_OK(target - (p+1), 26)) p--;
+ }
+ p--; /* Leave room for stack pointer adjustment. */
+ as->mcp = p;
as->invmcp = NULL;
}
+ as->mctail = p;
*p = 0; /* Prevent load/store merging. */
}
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index af0e714f15..8dadabe4a0 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -92,13 +92,23 @@ static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump)
/* Setup exit stub after the end of each trace. */
static void asm_exitstub_setup(ASMState *as)
{
+ uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler;
MCode *mxp = as->mctop;
- /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
- *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno;
- *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu);
- lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0,
- "branch target out of range");
- *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0;
+ *--mxp = MIPSI_LI | MIPSF_T(RID_TMP) | as->T->traceno;
+ if (((uintptr_t)(mxp-1) ^ target) >> 28 == 0) {
+ /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
+ *--mxp = MIPSI_J | ((target >> 2) & 0x03ffffffu);
+ } else {
+ /* sw TMP, 0(sp); li TMP, K*_VXH(jgl); jr TMP ; li TMP, traceno */
+ *--mxp = MIPSI_JR | MIPSF_S(RID_TMP);
+ *--mxp = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) |
+#if LJ_64
+ jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]);
+#else
+ jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]);
+#endif
+ }
+ *--mxp = MIPSI_SW | MIPSF_T(RID_TMP) | MIPSF_S(RID_SP) | 0;
as->mctop = mxp;
}
@@ -428,7 +438,8 @@ static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
{
/* The modified regs must match with the *.dasc implementation. */
RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
- RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)
+ RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)|
+ RID2RSET(RID_CFUNCADDR)
#if LJ_TARGET_MIPSR6
|RID2RSET(RID_F21)
#endif
@@ -514,7 +525,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg r)
{
/* The modified regs must match with the *.dasc implementation. */
RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)|
- RID2RSET(RID_R1)|RID2RSET(RID_R12);
+ RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_CFUNCADDR);
if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
ra_evictset(as, drop);
/* Return values are in RID_RET (converted value) and RID_RET+1 (status). */
@@ -2699,18 +2710,37 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
/* Fixup the tail code. */
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
- MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
+ uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp;
+ MCode *mcp = as->mctail;
int32_t spadj = as->T->spadjust;
- MCode *p = as->mctop-1;
- *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
- p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
+ if (((uintptr_t)mcp ^ target) >> 28 == 0) {
+ *mcp++ = MIPSI_J | ((target >> 2) & 0x03ffffffu);
+ } else {
+ *mcp++ = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) |
+#if LJ_64
+ jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]);
+#else
+ jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]);
+#endif
+ *mcp++ = MIPSI_JR | MIPSF_S(RID_TMP);
+ }
+ *mcp++ = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
- as->mcp = as->mctop-2; /* Leave room for branch plus nop or stack adj. */
- as->invmcp = as->loopref ? as->mcp : NULL;
+ as->mcp = as->mctop - 2; /* Leave room for branch plus nop or stack adj. */
+ if (as->loopref) {
+ as->invmcp = as->mcp;
+ } else {
+ if (!lnk) {
+ uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp;
+ if (((uintptr_t)as->mcp ^ target) >> 28 != 0) as->mcp--;
+ }
+ as->invmcp = NULL;
+ }
+ as->mctail = as->mcp;
}
/* -- Trace setup --------------------------------------------------------- */
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index df1ac42f7a..d77c45ce9b 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -48,23 +48,38 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
{
ExitNo i;
+ int ind;
+ uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler;
MCode *mxp = as->mctop;
- if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
+ if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim)
asm_mclimit(as);
- /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */
+ ind = ((target - (uintptr_t)(mxp - nexits - 2) + 0x02000000u) >> 26) ? 2 : 0;
+ /* !ind: 1: mflr r0; bl ->vm_exit_handler; li r0, traceno;
+ ** ind: 1: lwz r0, K32_VXH(jgl); mtctr r0; mflr r0; bctrl; li r0, traceno;
+ ** bl <1; bl <1; ...
+ */
for (i = nexits-1; (int32_t)i >= 0; i--)
- *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2);
+ *--mxp = PPCI_BL | (((-3-ind-i) & 0x00ffffffu) << 2);
+ as->mcexit = mxp;
*--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */
- mxp--;
- *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2);
- *--mxp = PPCI_MFLR|PPCF_T(RID_TMP);
+ if (ind) {
+ *--mxp = PPCI_BCTRL;
+ *--mxp = PPCI_MFLR | PPCF_T(RID_TMP);
+ *--mxp = PPCI_MTCTR | PPCF_T(RID_TMP);
+ *--mxp = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) |
+ jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]);
+ } else {
+ mxp--;
+ *mxp = PPCI_BL | ((target - (uintptr_t)mxp) & 0x03fffffcu);
+ *--mxp = PPCI_MFLR | PPCF_T(RID_TMP);
+ }
as->mctop = mxp;
}
static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
{
/* Keep this in-sync with exitstub_trace_addr(). */
- return as->mctop + exitno + 3;
+ return as->mcexit + exitno;
}
/* Emit conditional branch to exit for guard. */
@@ -2218,34 +2233,43 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
/* Fixup the tail code. */
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
- MCode *p = as->mctop;
- MCode *target;
+ uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp;
+ MCode *mcp = as->mctail;
int32_t spadj = as->T->spadjust;
- if (spadj == 0) {
- *--p = PPCI_NOP;
- *--p = PPCI_NOP;
- as->mctop = p;
- } else {
- /* Patch stack adjustment. */
+ if (spadj) { /* Emit stack adjustment. */
lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range");
- p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
- p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
+ *mcp++ = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
+ *mcp++ = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
}
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
- p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2);
+ /* Emit exit branch. */
+ if ((((target - (uintptr_t)mcp) + 0x02000000u) >> 26) == 0) {
+ *mcp = PPCI_B | ((target - (uintptr_t)mcp) & 0x03fffffcu); mcp++;
+ } else {
+ *mcp++ = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) |
+ jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]);
+ *mcp++ = PPCI_MTCTR | PPCF_T(RID_TMP);
+ *mcp++ = PPCI_BCTR;
+ }
+ while (as->mctop > mcp) *--as->mctop = PPCI_NOP;
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
MCode *p = as->mctop - 1; /* Leave room for exit branch. */
if (as->loopref) {
as->invmcp = as->mcp = p;
} else {
- as->mcp = p-2; /* Leave room for stack pointer adjustment. */
+ if (!lnk) {
+ uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp;
+ if ((((target - (uintptr_t)p) + 0x02000000u) >> 26) ||
+ (((target - (uintptr_t)(p-2)) + 0x02000000u) >> 26)) p -= 2;
+ }
+ p -= 2; /* Leave room for stack pointer adjustment. */
+ as->mcp = p;
as->invmcp = NULL;
}
+ as->mctail = p;
}
/* -- Trace setup --------------------------------------------------------- */
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 774e77b433..f3c2238a2f 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -9,9 +9,12 @@
static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
{
ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff;
+ MCode *target = (MCode *)(void *)lj_vm_exit_handler;
MCode *mxp = as->mcbot;
MCode *mxpstart = mxp;
- if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop)
+ if (mxp + ((2+2)*EXITSTUBS_PER_GROUP +
+ (LJ_GC64 ? 0 : 8) +
+ (LJ_64 ? 6 : 5)) >= as->mctop)
asm_mclimit(as);
/* Push low byte of exitno for each exit stub. */
*mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs;
@@ -30,8 +33,13 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
*(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
#endif
/* Jump to exit handler which fills in the ExitState. */
- *mxp++ = XI_JMP; mxp += 4;
- *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler);
+ if (jmprel_ok(mxp + 5, target)) { /* Direct jump. */
+ *mxp++ = XI_JMP; mxp += 4;
+ *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, target);
+ } else { /* RIP-relative indirect jump. */
+ *mxp++ = XI_GROUP5; *mxp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mxp += 4;
+ *((int32_t *)(mxp-4)) = (int32_t)((group ? as->J->exitstubgroup[0] : mxpstart) - 8 - mxp);
+ }
/* Commit the code for this group (even if assembly fails later on). */
lj_mcode_commitbot(as->J, mxp);
as->mcbot = mxp;
@@ -45,6 +53,16 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
ExitNo i;
if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
lj_trace_err(as->J, LJ_TRERR_SNAPOV);
+#if LJ_64
+ if (as->J->exitstubgroup[0] == NULL) {
+ /* Store the two potentially out-of-range targets below group 0. */
+ MCode *mxp = as->mcbot;
+ while ((uintptr_t)mxp & 7) *mxp++ = XI_INT3;
+ *((void **)mxp) = (void *)lj_vm_exit_interp; mxp += 8;
+ *((void **)mxp) = (void *)lj_vm_exit_handler; mxp += 8;
+ as->mcbot = mxp; /* Don't bother to commit, done in asm_exitstub_gen. */
+ }
+#endif
for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++)
if (as->J->exitstubgroup[i] == NULL)
as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
@@ -396,7 +414,7 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
"bad interned 64 bit constant");
} else {
while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
- *(uint64_t*)as->mcbot = *k;
+ *(uint64_t *)as->mcbot = *k;
ir->i = (int32_t)(as->mctop - as->mcbot);
as->mcbot += 8;
as->mclim = as->mcbot + MCLIM_REDZONE;
@@ -728,7 +746,7 @@ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
p = (MCode *)(void *)ir_k64(irf)->u64;
else
p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i;
- if (p - as->mcp == (int32_t)(p - as->mcp))
+ if (jmprel_ok(p, as->mcp))
return p; /* Call target is still in +-2GB range. */
/* Avoid the indirect case of emit_call(). Try to hoist func addr. */
}
@@ -2806,6 +2824,8 @@ static void asm_gc_check(ASMState *as)
emit_rr(as, XO_TEST, RID_RET, RID_RET);
args[0] = ASMREF_TMP1; /* global_State *g */
args[1] = ASMREF_TMP2; /* MSize steps */
+ /* Insert nop to simplify GC exit recognition in lj_asm_patchexit. */
+ if (!jmprel_ok(as->mcp, (MCode *)(void *)ci->func)) *--as->mcp = XI_NOP;
asm_gencall(as, ci, args);
tmp = ra_releasetmp(as, ASMREF_TMP1);
#if LJ_GC64
@@ -2919,40 +2939,36 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
/* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */
- MCode *p = as->mctop;
- MCode *target, *q;
+ MCode *mcp = as->mctail;
+ MCode *target;
int32_t spadj = as->T->spadjust;
- if (spadj == 0) {
- p -= LJ_64 ? 7 : 6;
- } else {
- MCode *p1;
- /* Patch stack adjustment. */
+ if (spadj) { /* Emit stack adjustment. */
+ if (LJ_64) *mcp++ = 0x48;
if (checki8(spadj)) {
- p -= 3;
- p1 = p-6;
- *p1 = (MCode)spadj;
+ *mcp++ = XI_ARITHi8;
+ *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP);
+ *mcp++ = (MCode)spadj;
} else {
- p1 = p-9;
- *(int32_t *)p1 = spadj;
+ *mcp++ = XI_ARITHi;
+ *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP);
+ *(int32_t *)mcp = spadj; mcp += 4;
}
-#if LJ_64
- p1[-3] = 0x48;
-#endif
- p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
- p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
}
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
- *(int32_t *)(p-4) = jmprel(as->J, p, target);
- p[-5] = XI_JMP;
+ /* Emit exit branch. */
+ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp;
+ if (lnk || jmprel_ok(mcp + 5, target)) { /* Direct jump. */
+ *mcp++ = XI_JMP; mcp += 4;
+ *(int32_t *)(mcp-4) = jmprel(as->J, mcp, target);
+ } else { /* RIP-relative indirect jump. */
+ *mcp++ = XI_GROUP5; *mcp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mcp += 4;
+ *((int32_t *)(mcp-4)) = (int32_t)(as->J->exitstubgroup[0] - 16 - mcp);
+ }
/* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
- for (q = as->mctop-1; q >= p; q--)
- *q = XI_NOP;
- as->mctop = p;
+ while (as->mctop > mcp) *--as->mctop = XI_NOP;
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
MCode *p = as->mctop;
/* Realign and leave room for backwards loop branch or exit branch. */
@@ -2964,15 +2980,17 @@ static void asm_tail_prep(ASMState *as)
as->mctop = p;
p -= (as->loopinv ? 5 : 2); /* Space for short/near jmp. */
} else {
- p -= 5; /* Space for exit branch (near jmp). */
+ p -= (LJ_64 && !lnk) ? 6 : 5; /* Space for exit branch. */
}
if (as->loopref) {
as->invmcp = as->mcp = p;
} else {
- /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
- as->mcp = p - (LJ_64 ? 7 : 6);
+ /* Leave room for ESP adjustment: add esp, imm */
+ p -= LJ_64 ? 7 : 6;
+ as->mcp = p;
as->invmcp = NULL;
}
+ as->mctail = p;
}
/* -- Trace setup --------------------------------------------------------- */
@@ -3132,6 +3150,10 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
} else if (*p == XI_CALL &&
(void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) {
pgc = p+7; /* Do not patch GC check exit. */
+ } else if (LJ_64 && *p == 0xff &&
+ p[1] == MODRM(XM_REG, XOg_CALL, RID_RET) &&
+ p[2] == XI_NOP) {
+ pgc = p+5; /* Do not patch GC check exit. */
}
}
lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
index c60e7d7560..3e1eb64bfc 100644
--- a/src/lj_emit_arm.h
+++ b/src/lj_emit_arm.h
@@ -173,6 +173,11 @@ static int emit_kdelta2(ASMState *as, Reg rd, int32_t i)
return 0; /* Failed. */
}
+#define emit_movw_k(k) \
+ (ARMI_MOVW | ((k) & 0x0fffu) | (((k) & 0xf000u) << 4))
+#define emit_movt_k(k) \
+ (ARMI_MOVT | (((k) >> 16) & 0x0fffu) | ((((k) >> 16) & 0xf000u) << 4))
+
/* Load a 32 bit constant into a GPR. */
static void emit_loadi(ASMState *as, Reg rd, int32_t i)
{
@@ -184,13 +189,13 @@ static void emit_loadi(ASMState *as, Reg rd, int32_t i)
emit_d(as, ARMI_MOV^k, rd);
} else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) {
/* 16 bit loword constant for ARMv6T2. */
- emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
+ emit_d(as, emit_movw_k(i), rd);
} else if (emit_kdelta1(as, rd, i)) {
/* One step delta relative to another constant. */
} else if ((as->flags & JIT_F_ARMV6T2)) {
/* 32 bit hiword/loword constant for ARMv6T2. */
- emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd);
- emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
+ emit_d(as, emit_movt_k(i), rd);
+ emit_d(as, emit_movw_k(i), rd);
} else if (emit_kdelta2(as, rd, i)) {
/* Two step delta relative to another constant. */
} else {
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index d8104959aa..d65b1c5777 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -80,6 +80,9 @@ static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb,
/* -- Emit loads/stores --------------------------------------------------- */
+#define jglofs(as, k) \
+ (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff)
+
/* Prefer rematerialization of BASE/L from global_State over spills. */
#define emit_canremat(ref) ((ref) <= REF_BASE)
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h
index b13f00fe5b..56928e4235 100644
--- a/src/lj_emit_ppc.h
+++ b/src/lj_emit_ppc.h
@@ -53,6 +53,9 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n)
/* -- Emit loads/stores --------------------------------------------------- */
+#define jglofs(as, k) \
+ (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff)
+
/* Prefer rematerialization of BASE/L from global_State over spills. */
#define emit_canremat(ref) ((ref) <= REF_BASE)
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index 5fd6cfa7eb..858fe753be 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -478,6 +478,17 @@ static void emit_sfixup(ASMState *as, MCLabel source)
/* Return label pointing to current PC. */
#define emit_label(as) ((as)->mcp)
+/* Check if two adresses are in relative jump range. */
+static LJ_AINLINE int jmprel_ok(MCode *a, MCode *b)
+{
+#if LJ_64
+ return a - b == (int32_t)(a - b);
+#else
+ UNUSED(a); UNUSED(b);
+ return 1;
+#endif
+}
+
/* Compute relative 32 bit offset for jump and call instructions. */
static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target)
{
@@ -511,7 +522,7 @@ static void emit_call_(ASMState *as, MCode *target)
{
MCode *p = as->mcp;
#if LJ_64
- if (target-p != (int32_t)(target-p)) {
+ if (!jmprel_ok(target, p)) {
/* Assumes RID_RET is never an argument to calls and always clobbered. */
emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET);
emit_loadu64(as, RID_RET, (uint64_t)target);
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 102ba0b4b7..05a8e9bbe9 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -104,14 +104,6 @@
/* -- JIT engine parameters ----------------------------------------------- */
-#if LJ_TARGET_WINDOWS || LJ_64
-/* See: https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 */
-#define JIT_P_sizemcode_DEFAULT 64
-#else
-/* Could go as low as 4K, but the mmap() overhead would be rather high. */
-#define JIT_P_sizemcode_DEFAULT 32
-#endif
-
/* Optimization parameters and their defaults. Length is a char in octal! */
#define JIT_PARAMDEF(_) \
_(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \
@@ -131,9 +123,9 @@
_(\011, recunroll, 2) /* Min. unroll for true recursion. */ \
\
/* Size of each machine code area (in KBytes). */ \
- _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \
+ _(\011, sizemcode, 64) \
/* Max. total size of all machine code areas (in KBytes). */ \
- _(\010, maxmcode, 512) \
+ _(\010, maxmcode, 2048) \
/* End of list. */
enum {
@@ -374,10 +366,14 @@ enum {
LJ_K64_2P63, /* 2^63 */
LJ_K64_M2P64, /* -2^64 */
#endif
+#endif
+#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64
+ LJ_K64_VM_EXIT_HANDLER,
+ LJ_K64_VM_EXIT_INTERP,
#endif
LJ_K64__MAX,
};
-#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS)
+#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS)
enum {
#if LJ_TARGET_X86ORX64
@@ -393,6 +389,10 @@ enum {
#if LJ_TARGET_MIPS64
LJ_K32_2P63, /* 2^63 */
LJ_K32_M2P64, /* -2^64 */
+#endif
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS32
+ LJ_K32_VM_EXIT_HANDLER,
+ LJ_K32_VM_EXIT_INTERP,
#endif
LJ_K32__MAX
};
@@ -513,6 +513,7 @@ typedef struct jit_State {
MCode *mcbot; /* Bottom of current mcode area. */
size_t szmcarea; /* Size of current mcode area. */
size_t szallmcarea; /* Total size of all allocated mcode areas. */
+ uintptr_t mcmin, mcmax; /* Mcode allocation range. */
TValue errinfo; /* Additional info element for trace errors. */
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
index 2b8ac2df58..c3032f4e2d 100644
--- a/src/lj_mcode.c
+++ b/src/lj_mcode.c
@@ -63,31 +63,46 @@ void lj_mcode_sync(void *start, void *end)
#if LJ_HASJIT
+#if LUAJIT_SECURITY_MCODE != 0
+/* Protection twiddling failed. Probably due to kernel security. */
+static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J)
+{
+ lua_CFunction panic = J2G(J)->panic;
+ if (panic) {
+ lua_State *L = J->L;
+ setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT));
+ panic(L);
+ }
+ exit(EXIT_FAILURE);
+}
+#endif
+
#if LJ_TARGET_WINDOWS
#define MCPROT_RW PAGE_READWRITE
#define MCPROT_RX PAGE_EXECUTE_READ
#define MCPROT_RWX PAGE_EXECUTE_READWRITE
-static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot)
+static void *mcode_alloc_at(uintptr_t hint, size_t sz, DWORD prot)
{
- void *p = LJ_WIN_VALLOC((void *)hint, sz,
- MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
- if (!p && !hint)
- lj_trace_err(J, LJ_TRERR_MCODEAL);
- return p;
+ return LJ_WIN_VALLOC((void *)hint, sz,
+ MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
}
-static void mcode_free(jit_State *J, void *p, size_t sz)
+static void mcode_free(void *p, size_t sz)
{
- UNUSED(J); UNUSED(sz);
+ UNUSED(sz);
VirtualFree(p, 0, MEM_RELEASE);
}
-static int mcode_setprot(void *p, size_t sz, DWORD prot)
+static void mcode_setprot(jit_State *J, void *p, size_t sz, DWORD prot)
{
+#if LUAJIT_SECURITY_MCODE != 0
DWORD oprot;
- return !LJ_WIN_VPROTECT(p, sz, prot, &oprot);
+ if (!LJ_WIN_VPROTECT(p, sz, prot, &oprot)) mcode_protfail(J);
+#else
+ UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot);
+#endif
}
#elif LJ_TARGET_POSIX
@@ -117,33 +132,33 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot)
#define MCPROT_CREATE 0
#endif
-static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
+static void *mcode_alloc_at(uintptr_t hint, size_t sz, int prot)
{
void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS|MCMAP_CREATE, -1, 0);
- if (p == MAP_FAILED) {
- if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL);
- p = NULL;
+ if (p == MAP_FAILED) return NULL;
#if MCMAP_CREATE
- } else {
- pthread_jit_write_protect_np(0);
+ pthread_jit_write_protect_np(0);
#endif
- }
return p;
}
-static void mcode_free(jit_State *J, void *p, size_t sz)
+static void mcode_free(void *p, size_t sz)
{
- UNUSED(J);
munmap(p, sz);
}
-static int mcode_setprot(void *p, size_t sz, int prot)
+static void mcode_setprot(jit_State *J, void *p, size_t sz, int prot)
{
+#if LUAJIT_SECURITY_MCODE != 0
#if MCMAP_CREATE
+ UNUSED(J); UNUSED(p); UNUSED(sz);
pthread_jit_write_protect_np((prot & PROT_EXEC));
return 0;
#else
- return mprotect(p, sz, prot);
+ if (mprotect(p, sz, prot)) mcode_protfail(J);
+#endif
+#else
+ UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot);
#endif
}
@@ -153,6 +168,49 @@ static int mcode_setprot(void *p, size_t sz, int prot)
#endif
+#ifdef LUAJIT_MCODE_TEST
+/* Test wrapper for mcode allocation. DO NOT ENABLE in production! Try:
+** LUAJIT_MCODE_TEST=hhhhhhhhhhhhhhhh luajit -jv main.lua
+** LUAJIT_MCODE_TEST=F luajit -jv main.lua
+*/
+static void *mcode_alloc_at_TEST(jit_State *J, uintptr_t hint, size_t sz, int prot)
+{
+ static int test_ofs = 0;
+ static const char *test_str;
+ if (!test_str) {
+ test_str = getenv("LUAJIT_MCODE_TEST");
+ if (!test_str) test_str = "";
+ }
+ switch (test_str[test_ofs]) {
+ case 'a': /* OK for one allocation. */
+ test_ofs++;
+ /* fallthrough */
+ case '\0': /* EOS: OK for any further allocations. */
+ break;
+ case 'h': /* Ignore one hint. */
+ test_ofs++;
+ /* fallthrough */
+ case 'H': /* Ignore any further hints. */
+ hint = 0u;
+ break;
+ case 'r': /* Randomize one hint. */
+ test_ofs++;
+ /* fallthrough */
+ case 'R': /* Randomize any further hints. */
+ hint = lj_prng_u64(&J2G(J)->prng) & ~(uintptr_t)0xffffu;
+ hint &= ((uintptr_t)1 << (LJ_64 ? 47 : 31)) - 1;
+ break;
+ case 'f': /* Fail one allocation. */
+ test_ofs++;
+ /* fallthrough */
+ default: /* 'F' or unknown: Fail any further allocations. */
+ return NULL;
+ }
+ return mcode_alloc_at(hint, sz, prot);
+}
+#define mcode_alloc_at(hint, sz, prot) mcode_alloc_at_TEST(J, hint, sz, prot)
+#endif
+
/* -- MCode area protection ----------------------------------------------- */
#if LUAJIT_SECURITY_MCODE == 0
@@ -174,7 +232,7 @@ static int mcode_setprot(void *p, size_t sz, int prot)
static void mcode_protect(jit_State *J, int prot)
{
- UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot);
+ UNUSED(J); UNUSED(prot);
}
#else
@@ -190,24 +248,11 @@ static void mcode_protect(jit_State *J, int prot)
#define MCPROT_GEN MCPROT_RW
#define MCPROT_RUN MCPROT_RX
-/* Protection twiddling failed. Probably due to kernel security. */
-static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J)
-{
- lua_CFunction panic = J2G(J)->panic;
- if (panic) {
- lua_State *L = J->L;
- setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT));
- panic(L);
- }
- exit(EXIT_FAILURE);
-}
-
/* Change protection of MCode area. */
static void mcode_protect(jit_State *J, int prot)
{
if (J->mcprot != prot) {
- if (LJ_UNLIKELY(mcode_setprot(J->mcarea, J->szmcarea, prot)))
- mcode_protfail(J);
+ mcode_setprot(J, J->mcarea, J->szmcarea, prot);
J->mcprot = prot;
}
}
@@ -216,47 +261,74 @@ static void mcode_protect(jit_State *J, int prot)
/* -- MCode area allocation ----------------------------------------------- */
-#if LJ_64
-#define mcode_validptr(p) (p)
-#else
-#define mcode_validptr(p) ((p) && (uintptr_t)(p) < 0xffff0000)
-#endif
-
#ifdef LJ_TARGET_JUMPRANGE
-/* Get memory within relative jump distance of our code in 64 bit mode. */
-static void *mcode_alloc(jit_State *J, size_t sz)
+#define MCODE_RANGE64 ((1u << LJ_TARGET_JUMPRANGE) - 0x10000u)
+
+/* Set a memory range for mcode allocation with addr in the middle. */
+static void mcode_setrange(jit_State *J, uintptr_t addr)
{
- /* Target an address in the static assembler code (64K aligned).
- ** Try addresses within a distance of target-range/2+1MB..target+range/2-1MB.
- ** Use half the jump range so every address in the range can reach any other.
- */
#if LJ_TARGET_MIPS
- /* Use the middle of the 256MB-aligned region. */
- uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler &
- ~(uintptr_t)0x0fffffffu) + 0x08000000u;
+ /* Use the whole 256MB-aligned region. */
+ J->mcmin = addr & ~(uintptr_t)((1u << LJ_TARGET_JUMPRANGE) - 1);
+ J->mcmax = J->mcmin + (1u << LJ_TARGET_JUMPRANGE);
#else
- uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff;
+ /* Every address in the 64KB-aligned range should be able to reach
+ ** any other, so MCODE_RANGE64 is only half the (signed) branch range.
+ */
+ J->mcmin = (addr - (MCODE_RANGE64 >> 1) + 0xffffu) & ~(uintptr_t)0xffffu;
+ J->mcmax = J->mcmin + MCODE_RANGE64;
#endif
- const uintptr_t range = (1u << (LJ_TARGET_JUMPRANGE-1)) - (1u << 21);
- /* First try a contiguous area below the last one. */
- uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0;
- int i;
- /* Limit probing iterations, depending on the available pool size. */
- for (i = 0; i < LJ_TARGET_JUMPRANGE; i++) {
- if (mcode_validptr(hint)) {
- void *p = mcode_alloc_at(J, hint, sz, MCPROT_GEN);
-
- if (mcode_validptr(p) &&
- ((uintptr_t)p + sz - target < range || target - (uintptr_t)p < range))
- return p;
- if (p) mcode_free(J, p, sz); /* Free badly placed area. */
- }
- /* Next try probing 64K-aligned pseudo-random addresses. */
+ /* Avoid wrap-around and the 64KB corners. */
+ if (addr < J->mcmin || !J->mcmin) J->mcmin = 0x10000u;
+ if (addr > J->mcmax) J->mcmax = ~(uintptr_t)0xffffu;
+}
+
+/* Check if an address is in range of the mcode allocation range. */
+static LJ_AINLINE int mcode_inrange(jit_State *J, uintptr_t addr, size_t sz)
+{
+ /* Take care of unsigned wrap-around of addr + sz, too. */
+ return addr >= J->mcmin && addr + sz >= J->mcmin && addr + sz <= J->mcmax;
+}
+
+/* Get memory within a specific jump range in 64 bit mode. */
+static void *mcode_alloc(jit_State *J, size_t sz)
+{
+ uintptr_t hint;
+ int i = 0, j;
+ if (!J->mcmin) /* Place initial range near the interpreter code. */
+ mcode_setrange(J, (uintptr_t)(void *)lj_vm_exit_handler);
+ else if (!J->mcmax) /* Switch to a new range (already flushed). */
+ goto newrange;
+ /* First try a contiguous area below the last one (if in range). */
+ hint = (uintptr_t)J->mcarea - sz;
+ if (!mcode_inrange(J, hint, sz)) /* Also takes care of NULL J->mcarea. */
+ goto probe;
+ for (; i < 16; i++) {
+ void *p = mcode_alloc_at(hint, sz, MCPROT_GEN);
+ if (mcode_inrange(J, (uintptr_t)p, sz))
+ return p; /* Success. */
+ else if (p)
+ mcode_free(p, sz); /* Free badly placed area. */
+ probe:
+ /* Next try probing 64KB-aligned pseudo-random addresses. */
+ j = 0;
do {
- hint = lj_prng_u64(&J2G(J)->prng) & ((1u<Conversions between C types
Integer →round double, float
-
+double, float →trunc int32_t →narrow (u)int8_t, (u)int16_t double, float →trunc int64_t →narrow * (u)int8_t, (u)int16_t, (u)int32_t
-
+double, float →trunc (u)int32_t, (u)int64_t double, float →trunc int64_t
+
+double, float →trunc uint64_t ∪ int64_t →reinterpret * uint64_t
-double, float →round float, double
+
-Number n == 0 → 0, otherwise 1 bool
+
-bool false → 0, true → 1 Number
+
-Complex number convert real part Number
-
Number convert real part, imag = 0 Complex number
+
+Number convert real part, imag = 0 Complex number
-Complex number convert real and imag part Complex number
+
-Number convert scalar and replicate Vector
+
-Vector copy (same size) Vector
+
-struct/union take base address (compat) Pointer
-
Array take base address (compat) Pointer
+
+Array take base address (compat) Pointer
-Function take function address Function pointer
+
-Number convert via uintptr_t (cast) Pointer
-
Pointer convert address (compat/cast) Pointer
-
+Pointer convert address (cast) Integer Pointer convert address (compat/cast) Pointer
+
+Pointer convert address (cast) Integer
-Array convert base address (cast) Integer
+
-Array copy (compat) Array
+
struct/union copy (identical type) struct/union Conversions between C types
Conversions not listed above will raise an error. E.g. it's not
possible to convert a pointer to a complex number or vice versa.
+* Some conversions from double have a larger defined range to +allow for mixed-signedness conversions, which are common in C code. +E.g. initializing an int32_t field with 0xffffffff +or initializing an uint32_t or uint64_t field with +-1. Under strict conversion rules, these assignments would +give undefined results, since Lua numbers are doubles. The extended +ranges make these conversions defined. Lua numbers that are even +outside that range give an architecture-specific result. +
++Please note that doubles do not have the precision to represent the +whole signed or unsigned 64 bit integer range. Beware of large hex +constants in particular: e.g. 0xffffffffffffffff is a double +rounded up to 0x1p64 during parsing. This will not +convert to a defined 64 bit integer value. Use the 64 bit literal +syntax instead, i.e. 0xffffffffffffffffULL. +
diff --git a/src/lib_io.c b/src/lib_io.c
index 5659ff518b..ec7d254596 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -127,8 +127,9 @@ static int io_file_readnum(lua_State *L, FILE *fp)
lua_Number d;
if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) {
if (LJ_DUALNUM) {
- int32_t i = lj_num2int(d);
- if (d == (lua_Number)i && !tvismzero((cTValue *)&d)) {
+ int64_t i64;
+ int32_t i;
+ if (lj_num2int_check(d, i64, i) && !tvismzero((cTValue *)&d)) {
setintV(L->top++, i);
return 1;
}
@@ -335,7 +336,7 @@ LJLIB_CF(io_method_seek)
if (tvisint(o))
ofs = (int64_t)intV(o);
else if (tvisnum(o))
- ofs = (int64_t)numV(o);
+ ofs = lj_num2i64(numV(o));
else if (!tvisnil(o))
lj_err_argt(L, 3, LUA_TNUMBER);
}
diff --git a/src/lib_os.c b/src/lib_os.c
index ae3fc85783..fffc923e23 100644
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -171,7 +171,8 @@ static int getfield(lua_State *L, const char *key, int d)
LJLIB_CF(os_date)
{
const char *s = luaL_optstring(L, 1, "%c");
- time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL));
+ time_t t = lua_isnoneornil(L, 2) ? time(NULL) :
+ lj_num2int_type(luaL_checknumber(L, 2), time_t);
struct tm *stm;
#if LJ_TARGET_POSIX
struct tm rtm;
@@ -253,8 +254,9 @@ LJLIB_CF(os_time)
LJLIB_CF(os_difftime)
{
- lua_pushnumber(L, difftime((time_t)(luaL_checknumber(L, 1)),
- (time_t)(luaL_optnumber(L, 2, (lua_Number)0))));
+ lua_pushnumber(L,
+ difftime(lj_num2int_type(luaL_checknumber(L, 1), time_t),
+ lj_num2int_type(luaL_optnumber(L, 2, (lua_Number)0), time_t)));
return 1;
}
diff --git a/src/lj_api.c b/src/lj_api.c
index e9fc25b438..94d8bc7e80 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -416,11 +416,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
return intV(&tmp);
n = numV(&tmp);
}
-#if LJ_64
- return (lua_Integer)n;
-#else
- return lj_num2int(n);
-#endif
+ return lj_num2int_type(n, lua_Integer);
}
LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
@@ -445,11 +441,7 @@ LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
n = numV(&tmp);
}
if (ok) *ok = 1;
-#if LJ_64
- return (lua_Integer)n;
-#else
- return lj_num2int(n);
-#endif
+ return lj_num2int_type(n, lua_Integer);
}
LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
@@ -468,11 +460,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
return (lua_Integer)intV(&tmp);
n = numV(&tmp);
}
-#if LJ_64
- return (lua_Integer)n;
-#else
- return lj_num2int(n);
-#endif
+ return lj_num2int_type(n, lua_Integer);
}
LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
@@ -493,11 +481,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
return (lua_Integer)intV(&tmp);
n = numV(&tmp);
}
-#if LJ_64
- return (lua_Integer)n;
-#else
- return lj_num2int(n);
-#endif
+ return lj_num2int_type(n, lua_Integer);
}
LUA_API int lua_toboolean(lua_State *L, int idx)
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 0e888c294a..8f7ae9a3b6 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1329,27 +1329,32 @@ static void asm_conv64(ASMState *as, IRIns *ir)
IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
IRCallID id;
+ const CCallInfo *ci;
+#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
+ CCallInfo cim;
+#endif
IRRef args[2];
lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
"not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
args[LJ_BE] = (ir-1)->op1;
args[LJ_LE] = ir->op1;
- if (st == IRT_NUM || st == IRT_FLOAT) {
- id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
+ lj_assertA(st != IRT_FLOAT, "bad CONV *64.float emitted");
+ if (st == IRT_NUM) {
+ id = IRCALL_lj_vm_num2u64;
ir--;
+ ci = &lj_ir_callinfo[id];
} else {
id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
- }
- {
#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
- CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
+ cim = lj_ir_callinfo[id];
cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
+ ci = &cim;
#else
- const CCallInfo *ci = &lj_ir_callinfo[id];
+ ci = &lj_ir_callinfo[id];
#endif
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
}
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
}
#endif
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 406360d26a..1ddd2b3e07 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -624,10 +624,9 @@ static void asm_conv(ASMState *as, IRIns *ir)
Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
Reg dest = ra_dest(as, ir, RSET_GPR);
ARMIns ai;
+ lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
- ai = irt_isint(ir->t) ?
- (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) :
- (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32);
+ ai = st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32;
emit_dm(as, ai, (tmp & 15), (left & 15));
}
} else
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index fdcff1db24..507fc08453 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -648,14 +648,18 @@ static void asm_conv(ASMState *as, IRIns *ir)
} else {
Reg left = ra_alloc1(as, lref, RSET_FPR);
Reg dest = ra_dest(as, ir, RSET_GPR);
- A64Ins ai = irt_is64(ir->t) ?
- (st == IRT_NUM ?
- (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
- (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
- (st == IRT_NUM ?
- (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
- (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
- emit_dn(as, ai, dest, (left & 31));
+ lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
+ if (irt_isu64(ir->t)) {
+ emit_dnm(as, A64I_CSELx | A64F_CC(CC_VC), dest, dest, RID_TMP);
+ emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), dest);
+ emit_dn(as, st == IRT_NUM ? A64I_FCVT_U64_F64 : A64I_FCVT_U64_F32, RID_TMP, (left & 31));
+ emit_dn(as, st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32, dest, (left & 31));
+ } else {
+ A64Ins ai = irt_is64(ir->t) ?
+ (st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32) :
+ (st == IRT_NUM ? A64I_FCVT_S32_F64 : A64I_FCVT_S32_F32);
+ emit_dn(as, ai, dest, (left & 31));
+ }
}
} else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
Reg dest = ra_dest(as, ir, RSET_GPR);
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index 8dadabe4a0..36ed5de4fb 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -635,64 +635,38 @@ static void asm_conv(ASMState *as, IRIns *ir)
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = ra_alloc1(as, lref, RSET_FPR);
Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
- if (irt_isu32(ir->t)) { /* FP to U32 conversion. */
- /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
- emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP);
- emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
- emit_tg(as, MIPSI_MFC1, dest, tmp);
- emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D,
- tmp, tmp);
- emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D,
- tmp, left, tmp);
- if (st == IRT_FLOAT)
- emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
- (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
- else
- emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
- (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
+ lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
#if LJ_64
- } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */
- MCLabel l_end;
+ if (irt_isu64(ir->t)) { /* FP to U64 conversion. */
+ MCLabel l_end = emit_label(as);
emit_tg(as, MIPSI_DMFC1, dest, tmp);
- l_end = emit_label(as);
- /* For inputs >= 2^63 add -2^64 and convert again. */
+ /* For result == INT64_MAX add -2^64 and convert again. */
if (st == IRT_NUM) {
emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp);
emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp);
emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
(void *)&as->J->k64[LJ_K64_M2P64],
- rset_exclude(RSET_GPR, dest));
- emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */
-#if !LJ_TARGET_MIPSR6
- emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
- emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
-#else
- emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
- emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp);
-#endif
- emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
- (void *)&as->J->k64[LJ_K64_2P63],
- rset_exclude(RSET_GPR, dest));
+ rset_exclude(RSET_GPR, dest)); /* Delay slot. */
+ emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end); /* != INT64_MAX? */
+ emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1);
+ emit_ti(as, MIPSI_LI, RID_TMP, -1);
+ emit_tg(as, MIPSI_DMFC1, dest, tmp);
+ emit_fg(as, MIPSI_TRUNC_L_D, tmp, left);
} else {
emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp);
emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp);
emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
(void *)&as->J->k32[LJ_K32_M2P64],
- rset_exclude(RSET_GPR, dest));
- emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */
-#if !LJ_TARGET_MIPSR6
- emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
- emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
-#else
- emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
- emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp);
-#endif
- emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
- (void *)&as->J->k32[LJ_K32_2P63],
- rset_exclude(RSET_GPR, dest));
+ rset_exclude(RSET_GPR, dest)); /* Delay slot. */
+ emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end); /* != INT64_MAX? */
+ emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1);
+ emit_ti(as, MIPSI_LI, RID_TMP, -1);
+ emit_tg(as, MIPSI_DMFC1, dest, tmp);
+ emit_fg(as, MIPSI_TRUNC_L_S, tmp, left);
}
+ } else
#endif
- } else {
+ {
#if LJ_32
emit_tg(as, MIPSI_MFC1, dest, tmp);
emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
@@ -733,13 +707,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
"bad type for checked CONV");
asm_tointg(as, ir, RID_NONE);
} else {
- IRCallID cid = irt_is64(ir->t) ?
- ((st == IRT_NUM) ?
- (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) :
- (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) :
- ((st == IRT_NUM) ?
- (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
- (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui));
+ IRCallID cid;
+ lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
+ lj_assertA(!(irt_is64(ir->t) && st != IRT_NUM), "bad CONV *64.float emitted");
+ cid = irt_is64(ir->t) ? IRCALL_lj_vm_num2u64 :
+ (st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i);
asm_callid(as, ir, cid);
}
} else
@@ -780,7 +752,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
}
}
} else {
- if (st64 && !(ir->op2 & IRCONV_NONE)) {
+ if (!irt_isu32(ir->t)) { /* Implicit sign extension. */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ emit_dta(as, MIPSI_SLL, dest, left, 0);
+ } else if (st64 && !(ir->op2 & IRCONV_NONE)) {
/* This is either a 32 bit reg/reg mov which zeroes the hiword
** or a load of the loword from a 64 bit address.
*/
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index d77c45ce9b..9e2af4144a 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -512,29 +512,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = ra_alloc1(as, lref, RSET_FPR);
Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
- if (irt_isu32(ir->t)) {
- /* Convert both x and x-2^31 to int and merge results. */
- Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest));
- emit_asb(as, PPCI_OR, dest, dest, tmpi); /* Select with mask idiom. */
- emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP);
- emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP);
- emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO); /* tmp = (int)(x) */
- emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000); /* dest += 2^31 */
- emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31); /* mask = -(dest < 0) */
- emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
- emit_tai(as, PPCI_LWZ, dest,
- RID_SP, SPOFS_TMPLO); /* dest = (int)(x-2^31) */
- emit_fb(as, PPCI_FCTIWZ, tmp, left);
- emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
- emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
- emit_fab(as, PPCI_FSUB, tmp, left, tmp);
- emit_lsptr(as, PPCI_LFS, (tmp & 31),
- (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
- } else {
- emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
- emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
- emit_fb(as, PPCI_FCTIWZ, tmp, left);
- }
+ lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
+ emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
+ emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
+ emit_fb(as, PPCI_FCTIWZ, tmp, left);
}
} else
#endif
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index f3c2238a2f..bdbce1163c 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -905,29 +905,28 @@ static void asm_conv(ASMState *as, IRIns *ir)
} else {
Reg dest = ra_dest(as, ir, RSET_GPR);
x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
- if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
- /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
- /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
+ lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
+#if LJ_64
+ if (irt_isu64(ir->t)) {
+ /* For the indefinite result -2^63, add -2^64 and convert again. */
Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) :
ra_scratch(as, RSET_FPR);
MCLabel l_end = emit_label(as);
- if (LJ_32)
- emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
emit_rr(as, op, dest|REX_64, tmp);
if (st == IRT_NUM)
- emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]);
+ emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64]);
else
- emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]);
- emit_sjcc(as, CC_NS, l_end);
- emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */
+ emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64]);
+ emit_sjcc(as, CC_NO, l_end);
+ emit_gmrmi(as, XG_ARITHi(XOg_CMP), dest|REX_64, 1);
emit_rr(as, op, dest|REX_64, tmp);
ra_left(as, tmp, lref);
- } else {
- if (LJ_64 && irt_isu32(ir->t))
- emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */
+
+ } else
+#endif
+ {
emit_mrm(as, op,
- dest|((LJ_64 &&
- (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
+ dest|((LJ_64 && irt_is64(ir->t)) ? REX_64 : 0),
asm_fuseload(as, lref, RSET_FPR));
}
}
@@ -1020,6 +1019,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
Reg lo, hi;
+ int usehi = ra_used(ir);
lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV");
lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV");
hi = ra_dest(as, ir, RSET_GPR);
@@ -1032,21 +1032,24 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff);
}
if (dt == IRT_U64) {
- /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */
+ /* For the indefinite result -2^63, add -2^64 and convert again. */
MCLabel l_pop, l_end = emit_label(as);
emit_x87op(as, XI_FPOP);
l_pop = emit_label(as);
emit_sjmp(as, l_end);
- emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
+ if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
if ((as->flags & JIT_F_SSE3))
emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
else
emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
- emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]);
- emit_sjcc(as, CC_NS, l_pop);
- emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */
- }
- emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
+ emit_rma(as, XO_FADDd, XOg_FADDd, &as->J->k32[LJ_K32_M2P64]);
+ emit_sjcc(as, CC_NE, l_pop);
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), RID_ESP, 0, 0);
+ emit_sjcc(as, CC_NO, l_pop);
+ emit_gmrmi(as, XG_ARITHi(XOg_CMP), hi, 1);
+ usehi = 1;
+ }
+ if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */
emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
} else { /* Otherwise set FPU rounding mode to truncate before the store. */
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c
index ec6f13c8d5..cd7ae942a7 100644
--- a/src/lj_bcwrite.c
+++ b/src/lj_bcwrite.c
@@ -59,9 +59,9 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
p = lj_strfmt_wuleb128(p, intV(o));
} else if (tvisnum(o)) {
if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */
- lua_Number num = numV(o);
- int32_t k = lj_num2int(num);
- if (num == (lua_Number)k) { /* -0 is never a constant. */
+ int64_t i64;
+ int32_t k;
+ if (lj_num2int_check(numV(o), i64, k)) { /* -0 is never a constant. */
*p++ = BCDUMP_KTAB_INT;
p = lj_strfmt_wuleb128(p, k);
ctx->sb.w = p;
@@ -270,9 +270,8 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
/* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */
if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) {
/* Narrow number constants to integers. */
- lua_Number num = numV(o);
- k = lj_num2int(num);
- if (num == (lua_Number)k) { /* -0 is never a constant. */
+ int64_t i64;
+ if (lj_num2int_check(numV(o), i64, k)) { /* -0 is never a constant. */
save_int:
p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
if (k < 0)
diff --git a/src/lj_cconv.c b/src/lj_cconv.c
index 854b51db74..2b9349cdcc 100644
--- a/src/lj_cconv.c
+++ b/src/lj_cconv.c
@@ -197,18 +197,16 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
else goto err_conv; /* NYI: long double. */
/* Then convert double to integer. */
/* The conversion must exactly match the semantics of JIT-compiled code! */
- if (dsize < 4 || (dsize == 4 && !(dinfo & CTF_UNSIGNED))) {
- int32_t i = (int32_t)n;
+ if (dsize < 8) {
+ int64_t i = lj_num2i64(n); /* Always convert via int64_t. */
if (dsize == 4) *(int32_t *)dp = i;
else if (dsize == 2) *(int16_t *)dp = (int16_t)i;
else *(int8_t *)dp = (int8_t)i;
- } else if (dsize == 4) {
- *(uint32_t *)dp = (uint32_t)n;
} else if (dsize == 8) {
- if (!(dinfo & CTF_UNSIGNED))
- *(int64_t *)dp = (int64_t)n;
- else
+ if ((dinfo & CTF_UNSIGNED))
*(uint64_t *)dp = lj_num2u64(n);
+ else
+ *(int64_t *)dp = lj_num2i64(n);
} else {
goto err_conv; /* NYI: conversion to >64 bit integers. */
}
diff --git a/src/lj_cdata.c b/src/lj_cdata.c
index 3b48f76c1e..2dc56a802c 100644
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -133,12 +133,7 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp,
idx = (ptrdiff_t)intV(key);
goto integer_key;
} else if (tvisnum(key)) { /* Numeric key. */
-#ifdef _MSC_VER
- /* Workaround for MSVC bug. */
- volatile
-#endif
- lua_Number n = numV(key);
- idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n);
+ idx = lj_num2int_type(numV(key), ptrdiff_t);
integer_key:
if (ctype_ispointer(ct->info)) {
CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index 27f2c1dd99..45c559cf63 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -445,7 +445,20 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
/* fallthrough */
case CCX(I, F):
if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
- sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
+ conv_I_F:
+#if LJ_SOFTFP || LJ_32
+ if (st == IRT_FLOAT) { /* Uncommon. Simplify split backends. */
+ sp = emitconv(sp, IRT_NUM, IRT_FLOAT, 0);
+ st = IRT_NUM;
+ }
+#endif
+ if (dsize < 8) {
+ lj_needsplit(J);
+ sp = emitconv(sp, IRT_I64, st, IRCONV_ANY);
+ sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, IRT_I64, 0);
+ } else {
+ sp = emitconv(sp, dt, st, IRCONV_ANY);
+ }
goto xstore;
case CCX(I, P):
case CCX(I, A):
@@ -523,10 +536,9 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
goto xstore;
case CCX(P, F):
if (st == IRT_CDATA) goto err_nyi;
- /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
- sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
- st, IRCONV_ANY);
- goto xstore;
+ /* The signed 64 bit conversion is cheaper. */
+ dt = (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32;
+ goto conv_I_F;
/* Destination is an array. */
case CCX(A, A):
@@ -1878,7 +1890,7 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
if (J->base[0] && tref_iscdata(J->base[1])) {
tsh = crec_bit64_arg(J, ctype_get(cts, CTID_INT64),
J->base[1], &rd->argv[1]);
- if (!tref_isinteger(tsh))
+ if (LJ_32 && !tref_isinteger(tsh))
tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
J->base[1] = tsh;
}
@@ -1886,15 +1898,17 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
if (id) {
TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]);
uint32_t op = rd->data;
+ IRType t;
if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
+ t = tref_isinteger(tsh) ? IRT_INT : tref_type(tsh);
if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
!tref_isk(tsh))
- tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63));
+ tsh = emitir(IRT(IR_BAND, t), tsh, lj_ir_kint(J, 63));
#ifdef LJ_TARGET_UNIFYROT
- if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
- op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
- tsh = emitir(IRTI(IR_NEG), tsh, tsh);
- }
+ if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
+ op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
+ tsh = emitir(IRT(IR_NEG, t), tsh, tsh);
+ }
#endif
tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
diff --git a/src/lj_def.h b/src/lj_def.h
index a9e2372988..f34b1a3996 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -127,6 +127,7 @@ typedef uintptr_t BloomFilter;
#define LJ_INLINE inline
#define LJ_AINLINE inline __attribute__((always_inline))
#define LJ_NOINLINE __attribute__((noinline))
+#define LJ_CONSTF __attribute__((nothrow,const))
#if defined(__ELF__) || defined(__MACH__) || defined(__psp2__)
#if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__))
@@ -245,6 +246,7 @@ static LJ_AINLINE uint32_t lj_getu32(const void *p)
#define LJ_INLINE __inline
#define LJ_AINLINE __forceinline
#define LJ_NOINLINE __declspec(noinline)
+#define LJ_CONSTF __declspec(nothrow noalias)
#if defined(_M_IX86)
#define LJ_FASTCALL __fastcall
#endif
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 527b6c0681..290986f6bd 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -70,7 +70,7 @@ static int32_t argv2int(jit_State *J, TValue *o)
{
if (!lj_strscan_numberobj(o))
lj_trace_err(J, LJ_TRERR_BADTYPE);
- return tvisint(o) ? intV(o) : lj_num2int(numV(o));
+ return numberVint(o);
}
/* Get runtime value of string argument. */
@@ -586,7 +586,7 @@ static void LJ_FASTCALL recff_math_round(jit_State *J, RecordFFData *rd)
/* Result is integral (or NaN/Inf), but may not fit an int32_t. */
if (LJ_DUALNUM) { /* Try to narrow using a guarded conversion to int. */
lua_Number n = lj_vm_foldfpm(numberVnum(&rd->argv[0]), rd->data);
- if (n == (lua_Number)lj_num2int(n))
+ if (lj_num2int_ok(n))
tr = emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_CHECK);
}
J->base[0] = tr;
diff --git a/src/lj_ir.c b/src/lj_ir.c
index e7a5e8bc09..e24fead4d6 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -248,28 +248,15 @@ TRef lj_ir_kint64(jit_State *J, uint64_t u64)
return lj_ir_k64(J, IR_KINT64, u64);
}
-/* Check whether a number is int and return it. -0 is NOT considered an int. */
-static int numistrueint(lua_Number n, int32_t *kp)
-{
- int32_t k = lj_num2int(n);
- if (n == (lua_Number)k) {
- if (kp) *kp = k;
- if (k == 0) { /* Special check for -0. */
- TValue tv;
- setnumV(&tv, n);
- if (tv.u32.hi != 0)
- return 0;
- }
- return 1;
- }
- return 0;
-}
-
/* Intern number as int32_t constant if possible, otherwise as FP constant. */
TRef lj_ir_knumint(jit_State *J, lua_Number n)
{
+ int64_t i64;
int32_t k;
- if (numistrueint(n, &k))
+ TValue tv;
+ setnumV(&tv, n);
+ /* -0 is NOT considered an int. */
+ if (lj_num2int_check(n, i64, k) && !tvismzero(&tv))
return lj_ir_kint(J, k);
else
return lj_ir_knum(J, n);
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 5196144e48..60b196c699 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -233,20 +233,15 @@ typedef struct CCallInfo {
_(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \
_(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \
_(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \
- _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \
_(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \
_(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \
_(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \
_(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \
- _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \
_(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \
_(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \
_(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \
_(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \
- _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \
- _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \
- _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \
- _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \
+ _(FP64_FFI, lj_vm_num2u64, 1, N, U64, XA_FP) \
_(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
_(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
_(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
@@ -291,27 +286,14 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
#define softfp_d2i __aeabi_d2iz
#define softfp_ui2d __aeabi_ui2d
#define softfp_f2d __aeabi_f2d
-#define softfp_d2ui __aeabi_d2uiz
#define softfp_d2f __aeabi_d2f
#define softfp_i2f __aeabi_i2f
#define softfp_ui2f __aeabi_ui2f
#define softfp_f2i __aeabi_f2iz
-#define softfp_f2ui __aeabi_f2uiz
#define fp64_l2d __aeabi_l2d
#define fp64_ul2d __aeabi_ul2d
#define fp64_l2f __aeabi_l2f
#define fp64_ul2f __aeabi_ul2f
-#if LJ_TARGET_IOS
-#define fp64_d2l __fixdfdi
-#define fp64_d2ul __fixunsdfdi
-#define fp64_f2l __fixsfdi
-#define fp64_f2ul __fixunssfdi
-#else
-#define fp64_d2l __aeabi_d2lz
-#define fp64_d2ul __aeabi_d2ulz
-#define fp64_f2l __aeabi_f2lz
-#define fp64_f2ul __aeabi_f2ulz
-#endif
#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
#define softfp_add __adddf3
#define softfp_sub __subdf3
@@ -322,12 +304,10 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
#define softfp_d2i __fixdfsi
#define softfp_ui2d __floatunsidf
#define softfp_f2d __extendsfdf2
-#define softfp_d2ui __fixunsdfsi
#define softfp_d2f __truncdfsf2
#define softfp_i2f __floatsisf
#define softfp_ui2f __floatunsisf
#define softfp_f2i __fixsfsi
-#define softfp_f2ui __fixunssfsi
#else
#error "Missing soft-float definitions for target architecture"
#endif
@@ -341,12 +321,10 @@ extern int32_t softfp_d2i(double a);
#if LJ_HASFFI
extern double softfp_ui2d(uint32_t a);
extern double softfp_f2d(float a);
-extern uint32_t softfp_d2ui(double a);
extern float softfp_d2f(double a);
extern float softfp_i2f(int32_t a);
extern float softfp_ui2f(uint32_t a);
extern int32_t softfp_f2i(float a);
-extern uint32_t softfp_f2ui(float a);
#endif
#if LJ_TARGET_MIPS
extern double lj_vm_sfmin(double a, double b);
@@ -360,10 +338,6 @@ extern double lj_vm_sfmax(double a, double b);
#define fp64_ul2d __floatundidf
#define fp64_l2f __floatdisf
#define fp64_ul2f __floatundisf
-#define fp64_d2l __fixdfdi
-#define fp64_d2ul __fixunsdfdi
-#define fp64_f2l __fixsfdi
-#define fp64_f2ul __fixunssfdi
#else
#error "Missing fp64 helper definitions for this compiler"
#endif
@@ -374,10 +348,6 @@ extern double fp64_l2d(int64_t a);
extern double fp64_ul2d(uint64_t a);
extern float fp64_l2f(int64_t a);
extern float fp64_ul2f(uint64_t a);
-extern int64_t fp64_d2l(double a);
-extern uint64_t fp64_d2ul(double a);
-extern int64_t fp64_f2l(float a);
-extern uint64_t fp64_f2ul(float a);
#endif
#endif
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 05a8e9bbe9..c0523457ae 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -350,22 +350,18 @@ enum {
};
enum {
+#if LJ_TARGET_X64 || LJ_TARGET_MIPS64
+ LJ_K64_M2P64, /* -2^64 */
+#endif
#if LJ_TARGET_X86ORX64
LJ_K64_TOBIT, /* 2^52 + 2^51 */
LJ_K64_2P64, /* 2^64 */
- LJ_K64_M2P64, /* -2^64 */
-#if LJ_32
- LJ_K64_M2P64_31, /* -2^64 or -2^31 */
-#else
- LJ_K64_M2P64_31 = LJ_K64_M2P64,
#endif
+#if LJ_TARGET_MIPS64
+ LJ_K64_2P63, /* 2^63 */
#endif
#if LJ_TARGET_MIPS
LJ_K64_2P31, /* 2^31 */
-#if LJ_64
- LJ_K64_2P63, /* 2^63 */
- LJ_K64_M2P64, /* -2^64 */
-#endif
#endif
#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64
LJ_K64_VM_EXIT_HANDLER,
@@ -376,20 +372,19 @@ enum {
#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS)
enum {
-#if LJ_TARGET_X86ORX64
- LJ_K32_M2P64_31, /* -2^64 or -2^31 */
+#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
+ LJ_K32_M2P64, /* -2^64 */
+#endif
+#if LJ_TARGET_MIPS64
+ LJ_K32_2P63, /* 2^63 */
#endif
#if LJ_TARGET_PPC
LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
LJ_K32_2P52, /* 2^52 */
#endif
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS
+#if LJ_TARGET_PPC
LJ_K32_2P31, /* 2^31 */
#endif
-#if LJ_TARGET_MIPS64
- LJ_K32_2P63, /* 2^63 */
- LJ_K32_M2P64, /* -2^64 */
-#endif
#if LJ_TARGET_PPC || LJ_TARGET_MIPS32
LJ_K32_VM_EXIT_HANDLER,
LJ_K32_VM_EXIT_INTERP,
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 88cb2bdd6c..d51351b827 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -349,7 +349,7 @@ int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b)
** integer overflow. Overflow detection still works, since all FPUs
** return either MININT or MAXINT, which is then out of range.
*/
- int32_t i = (int32_t)numV(o);
+ int32_t i = lj_num2int(numV(o));
if (i >= a && i <= b) return i;
#if LJ_HASFFI
} else if (tviscdata(o)) {
diff --git a/src/lj_meta.c b/src/lj_meta.c
index c9307615f7..3f30fafb6b 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -465,7 +465,8 @@ void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o)
if (tvisint(o+i)) {
k[i] = intV(o+i); nint++;
} else {
- k[i] = lj_num2int(numV(o+i)); nint += ((lua_Number)k[i] == numV(o+i));
+ int64_t i64;
+ if (lj_num2int_check(numV(o+i), i64, k[i])) nint++;
}
}
if (nint == 3) { /* Narrow to integers. */
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 73b186e256..58e5049cc9 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -981,43 +981,68 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2)
/* -- Number to integer conversion ---------------------------------------- */
-#if LJ_SOFTFP
-LJ_ASMF int32_t lj_vm_tobit(double x);
-#if LJ_TARGET_MIPS64
-LJ_ASMF int32_t lj_vm_tointg(double x);
-#endif
-#endif
+/*
+** The C standard leaves many aspects of FP to integer conversions as
+** undefined behavior. Portability is a mess, hardware support varies,
+** and modern C compilers are like a box of chocolates -- you never know
+** what you're gonna get.
+**
+** However, we need 100% matching behavior between the interpreter (asm + C),
+** optimizations (C) and the code generated by the JIT compiler (asm).
+** Mixing Lua numbers with FFI numbers creates some extra requirements.
+**
+** These conversions have been moved to assembler code, even if they seem
+** trivial, to foil unanticipated C compiler 'optimizations' with the
+** surrounding code. Only the unchecked double to int32_t conversion
+** is still in C, because it ought to be pretty safe -- we'll see.
+**
+** These macros also serve to document all places where FP to integer
+** conversions happen.
+*/
-static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
-{
-#if LJ_SOFTFP
- return lj_vm_tobit(n);
-#else
- TValue o;
- o.n = n + 6755399441055744.0; /* 2^52 + 2^51 */
- return (int32_t)o.u32.lo;
-#endif
-}
+/* Unchecked double to int32_t conversion. */
+#define lj_num2int(n) ((int32_t)(n))
-#define lj_num2int(n) ((int32_t)(n))
+/* Unchecked double to arch/os-dependent signed integer type conversion.
+** This assumes the 32/64-bit signed conversions are NOT range-extended.
+*/
+#define lj_num2int_type(n, tp) ((tp)(n))
-/*
-** This must match the JIT backend behavior. In particular for archs
-** that don't have a common hardware instruction for this conversion.
-** Note that signed FP to unsigned int conversions have an undefined
-** result and should never be relied upon in portable FFI code.
-** See also: C99 or C11 standard, 6.3.1.4, footnote of (1).
+/* Convert a double to int32_t and check for exact conversion.
+** Returns the zero-extended int32_t on success. -0 is OK, too.
+** Returns 0x8000000080000000LL on failure (simplifies range checks).
*/
-static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
-{
-#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS
- int64_t i = (int64_t)n;
- if (i < 0) i = (int64_t)(n - 18446744073709551616.0);
- return (uint64_t)i;
-#else
- return (uint64_t)n;
-#endif
-}
+LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x);
+
+/* Check for exact conversion only, without storing the result. */
+#define lj_num2int_ok(x) (lj_vm_num2int_check((x)) >= 0)
+
+/* Check for exact conversion and conditionally store result.
+** Note: conditions that fail for 0x80000000 may check only the lower
+** 32 bits. This generates good code for both 32 and 64 bit archs.
+*/
+#define lj_num2int_cond(x, i64, i, cond) \
+ (i64 = lj_vm_num2int_check((x)), cond ? (i = (int32_t)i64, 1) : 0)
+
+/* This is the generic check for a full-range int32_t result. */
+#define lj_num2int_check(x, i64, i) \
+ lj_num2int_cond((x), i64, i, i64 >= 0)
+
+/* Predictable conversion from double to int64_t or uint64_t.
+** Truncates towards zero. Out-of-range values, NaN and +-Inf return
+** an arch-dependent result, but do not cause C undefined behavior.
+** The uint64_t conversion accepts the union of the unsigned + signed range.
+*/
+LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x);
+LJ_ASMF LJ_CONSTF int64_t lj_vm_num2u64(double x);
+
+#define lj_num2i64(x) (lj_vm_num2i64((x)))
+#define lj_num2u64(x) (lj_vm_num2u64((x)))
+
+/* Lua BitOp conversion semantics use the 2^52 + 2^51 trick. */
+LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x);
+
+#define lj_num2bit(x) lj_vm_tobit((x))
static LJ_AINLINE int32_t numberVint(cTValue *o)
{
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 6fdf45663f..456c04b255 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -303,17 +303,18 @@ LJFOLDF(kfold_intarith)
return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
}
+/* Forward declaration. */
+static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
+ IROp op);
+
LJFOLD(ADDOV KINT KINT)
LJFOLD(SUBOV KINT KINT)
LJFOLD(MULOV KINT KINT)
LJFOLDF(kfold_intovarith)
{
- lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
- fins->o - IR_ADDOV);
- int32_t k = lj_num2int(n);
- if (n != (lua_Number)k)
- return FAILFOLD;
- return INTFOLD(k);
+ int64_t k = kfold_int64arith(J, (int64_t)fleft->i, (int64_t)fright->i,
+ (IROp)((int)fins->o - (int)IR_ADDOV + (int)IR_ADD));
+ return checki32(k) ? INTFOLD(k) : FAILFOLD;
}
LJFOLD(BNOT KINT)
@@ -368,11 +369,11 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
IROp op)
{
UNUSED(J);
-#if LJ_HASFFI
switch (op) {
case IR_ADD: k1 += k2; break;
case IR_SUB: k1 -= k2; break;
case IR_MUL: k1 *= k2; break;
+#if LJ_HASFFI
case IR_BAND: k1 &= k2; break;
case IR_BOR: k1 |= k2; break;
case IR_BXOR: k1 ^= k2; break;
@@ -382,11 +383,8 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break;
case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break;
default: lj_assertJ(0, "bad IR op %d", op); break;
- }
-#else
- UNUSED(k2); UNUSED(op);
- lj_assertJ(0, "FFI IR op without FFI");
#endif
+ }
return k1;
}
@@ -883,8 +881,11 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
LJFOLDF(kfold_conv_knum_int_num)
{
lua_Number n = knumleft;
- int32_t k = lj_num2int(n);
- if (irt_isguard(fins->t) && n != (lua_Number)k) {
+ if (irt_isguard(fins->t)) {
+ int64_t i64;
+ int32_t k;
+ if (lj_num2int_check(n, i64, k))
+ return INTFOLD(k);
/* We're about to create a guard which always fails, like CONV +1.5.
** Some pathological loops cause this during LICM, e.g.:
** local x,k,t = 0,1.5,{1,[1.5]=2}
@@ -892,27 +893,15 @@ LJFOLDF(kfold_conv_knum_int_num)
** assert(x == 300)
*/
return FAILFOLD;
+ } else {
+ return INTFOLD(lj_num2int(n));
}
- return INTFOLD(k);
-}
-
-LJFOLD(CONV KNUM IRCONV_U32_NUM)
-LJFOLDF(kfold_conv_knum_u32_num)
-{
-#ifdef _MSC_VER
- { /* Workaround for MSVC bug. */
- volatile uint32_t u = (uint32_t)knumleft;
- return INTFOLD((int32_t)u);
- }
-#else
- return INTFOLD((int32_t)(uint32_t)knumleft);
-#endif
}
LJFOLD(CONV KNUM IRCONV_I64_NUM)
LJFOLDF(kfold_conv_knum_i64_num)
{
- return INT64FOLD((uint64_t)(int64_t)knumleft);
+ return INT64FOLD((uint64_t)lj_num2i64(knumleft));
}
LJFOLD(CONV KNUM IRCONV_U64_NUM)
@@ -1135,7 +1124,6 @@ LJFOLDF(shortcut_conv_num_int)
}
LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
-LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32 */
LJFOLDF(simplify_conv_int_num)
{
/* Fold even across PHI to avoid expensive num->int conversions in loop. */
@@ -1334,6 +1322,24 @@ LJFOLDF(narrow_convert)
return lj_opt_narrow_convert(J);
}
+LJFOLD(XSTORE any CONV)
+LJFOLDF(xstore_conv)
+{
+#if LJ_64
+ PHIBARRIER(fright);
+ if (!irt_is64(fins->t) &&
+ irt_type(fins->t) == (IRType)((fright->op2&IRCONV_DSTMASK)>>IRCONV_DSH) &&
+ ((fright->op2&IRCONV_SRCMASK) == IRT_I64 ||
+ (fright->op2&IRCONV_SRCMASK) == IRT_U64)) {
+ fins->op2 = fright->op1;
+ return RETRYFOLD;
+ }
+#else
+ UNUSED(J);
+#endif
+ return NEXTFOLD;
+}
+
/* -- Integer algebraic simplifications ----------------------------------- */
LJFOLD(ADD any KINT)
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 01b5833d92..3085c83766 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -281,22 +281,20 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
return 0;
} else if (ir->o == IR_KNUM) { /* Narrow FP constant. */
lua_Number n = ir_knum(ir)->n;
+ int64_t i64;
+ int32_t k;
if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) {
- /* Allows a wider range of constants. */
- int64_t k64 = (int64_t)n;
- if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */
- *nc->sp++ = NARROWINS(NARROW_INT, 0);
- *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */
- return 0;
- }
- } else {
- int32_t k = lj_num2int(n);
- /* Only if constant is a small integer. */
- if (checki16(k) && n == (lua_Number)k) {
+ /* Allows a wider range of constants, if const doesn't lose precision. */
+ if (lj_num2int_check(n, i64, k)) {
*nc->sp++ = NARROWINS(NARROW_INT, 0);
*nc->sp++ = (NarrowIns)k;
return 0;
}
+ } else if (lj_num2int_cond(n, i64, k, checki16((int32_t)i64))) {
+ /* Only if constant is a small integer. */
+ *nc->sp++ = NARROWINS(NARROW_INT, 0);
+ *nc->sp++ = (NarrowIns)k;
+ return 0;
}
return 10; /* Never narrow other FP constants (this is rare). */
}
@@ -512,12 +510,6 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
/* -- Narrowing of arithmetic operators ----------------------------------- */
-/* Check whether a number fits into an int32_t (-0 is ok, too). */
-static int numisint(lua_Number n)
-{
- return (n == (lua_Number)lj_num2int(n));
-}
-
/* Convert string to number. Error out for non-numeric string values. */
static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o)
{
@@ -539,8 +531,8 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
/* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */
if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) &&
tref_isinteger(rb) && tref_isinteger(rc) &&
- numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc),
- (int)op - (int)IR_ADD)))
+ lj_num2int_ok(lj_vm_foldarith(numberVnum(vb), numberVnum(vc),
+ (int)op - (int)IR_ADD)))
return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc);
if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT);
if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
@@ -591,7 +583,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
static int narrow_forl(jit_State *J, cTValue *o)
{
if (tvisint(o)) return 1;
- if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o));
+ if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return lj_num2int_ok(numV(o));
return 0;
}
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 8d0259117b..d29d1eab68 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -573,13 +573,9 @@ static void split_ir(jit_State *J)
case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
#if LJ_SOFTFP
+ lj_assertJ(st != IRT_FLOAT, "bad CONV *64.float emitted");
if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
- hi = split_call_l(J, hisubst, oir, ir,
- irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
- } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
- nir->o = IR_CALLN;
- nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
- hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
+ hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_num2u64);
}
#else
if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
@@ -692,8 +688,9 @@ static void split_ir(jit_State *J)
nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
}
} else if (st == IRT_FLOAT) {
+ lj_assertJ(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
nir->o = IR_CALLN;
- nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
+ nir->op2 = IRCALL_softfp_f2i;
} else
#endif
#if LJ_SOFTFP
@@ -705,9 +702,7 @@ static void split_ir(jit_State *J)
} else {
split_call_l(J, hisubst, oir, ir,
#if LJ_32 && LJ_HASFFI
- st == IRT_NUM ?
- (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
- (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
+ st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i
#else
IRCALL_softfp_d2i
#endif
diff --git a/src/lj_parse.c b/src/lj_parse.c
index 181ce4d7e2..832f6bf404 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -522,9 +522,9 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg)
ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)intV(tv));
else
#else
- lua_Number n = expr_numberV(e);
- int32_t k = lj_num2int(n);
- if (checki16(k) && n == (lua_Number)k)
+ int64_t i64;
+ int32_t k;
+ if (lj_num2int_cond(expr_numberV(e), i64, k, checki16((int32_t)i64)))
ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k);
else
#endif
@@ -782,8 +782,9 @@ static int foldarith(BinOpr opr, ExpDesc *e1, ExpDesc *e2)
setnumV(&o, n);
if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */
if (LJ_DUALNUM) {
- int32_t k = lj_num2int(n);
- if ((lua_Number)k == n) {
+ int64_t i64;
+ int32_t k;
+ if (lj_num2int_check(n, i64, k)) {
setintV(&e1->u.nval, k);
return 1;
}
@@ -1386,10 +1387,10 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr)
if (tvisnum(&n->key)) {
TValue *tv = &((TValue *)kptr)[kidx];
if (LJ_DUALNUM) {
- lua_Number nn = numV(&n->key);
- int32_t k = lj_num2int(nn);
+ int64_t i64;
+ int32_t k;
lj_assertFS(!tvismzero(&n->key), "unexpected -0 key");
- if ((lua_Number)k == nn)
+ if (lj_num2int_check(numV(&n->key), i64, k))
setintV(tv, k);
else
*tv = n->key;
@@ -1656,9 +1657,9 @@ static void expr_index(FuncState *fs, ExpDesc *t, ExpDesc *e)
}
}
#else
- lua_Number n = expr_numberV(e);
- int32_t k = lj_num2int(n);
- if (checku8(k) && n == (lua_Number)k) {
+ int64_t i64;
+ int32_t k;
+ if (lj_num2int_cond(expr_numberV(e), i64, k, checku8((int32_t)i64))) {
t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */
return;
}
diff --git a/src/lj_record.c b/src/lj_record.c
index 6543f2745c..536d7171ea 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -351,9 +351,14 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t)
} else {
cTValue *tv = proto_knumtv(J->pt, bc_d(ins));
if (t == IRT_INT) {
- int32_t k = numberVint(tv);
- if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */
- return lj_ir_kint(J, k);
+ if (tvisint(tv)) {
+ return lj_ir_kint(J, intV(tv));
+ } else {
+ int64_t i64;
+ int32_t k;
+ if (lj_num2int_check(numV(tv), i64, k)) /* -0 is ok here. */
+ return lj_ir_kint(J, k);
+ }
return 0; /* Type mismatch. */
} else {
return lj_ir_knum(J, numberVnum(tv));
@@ -1426,9 +1431,13 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
/* Integer keys are looked up in the array part first. */
key = ix->key;
if (tref_isnumber(key)) {
- int32_t k = numberVint(&ix->keyv);
- if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k)
- k = LJ_MAX_ASIZE;
+ int32_t k;
+ if (tvisint(&ix->keyv)) {
+ k = intV(&ix->keyv);
+ } else {
+ int64_t i64;
+ if (!lj_num2int_check(numV(&ix->keyv), i64, k)) k = LJ_MAX_ASIZE;
+ }
if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */
TRef ikey = lj_opt_narrow_index(J, key);
TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
index bb649fc840..0936298d6b 100644
--- a/src/lj_strfmt.c
+++ b/src/lj_strfmt.c
@@ -351,7 +351,7 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
/* Add number formatted as signed integer to buffer. */
SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
{
- int64_t k = (int64_t)n;
+ int64_t k = lj_num2i64(n);
if (checki32(k) && sf == STRFMT_INT)
return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */
else
@@ -361,12 +361,7 @@ SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
/* Add number formatted as unsigned integer to buffer. */
SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
{
- int64_t k;
- if (n >= 9223372036854775808.0)
- k = (int64_t)(n - 18446744073709551616.0);
- else
- k = (int64_t)n;
- return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
+ return lj_strfmt_putfxint(sb, sf, lj_num2u64(n));
}
/* Format stack arguments to buffer. */
diff --git a/src/lj_strscan.c b/src/lj_strscan.c
index 502c78e97e..fbb959c5bc 100644
--- a/src/lj_strscan.c
+++ b/src/lj_strscan.c
@@ -523,10 +523,10 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig);
/* Try to convert number to integer, if requested. */
- if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT) && !tvismzero(o)) {
- double n = o->n;
- int32_t i = lj_num2int(n);
- if (n == (lua_Number)i) { o->i = i; return STRSCAN_INT; }
+ if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT)) {
+ int64_t tmp;
+ if (lj_num2int_check(o->n, tmp, o->i) && !tvismzero(o))
+ return STRSCAN_INT;
}
return fmt;
}
diff --git a/src/lj_tab.c b/src/lj_tab.c
index 62e336111a..2959fadba7 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -295,9 +295,9 @@ static uint32_t countint(cTValue *key, uint32_t *bins)
{
lj_assertX(!tvisint(key), "bad integer key");
if (tvisnum(key)) {
- lua_Number nk = numV(key);
- int32_t k = lj_num2int(nk);
- if ((uint32_t)k < LJ_MAX_ASIZE && nk == (lua_Number)k) {
+ int64_t i64;
+ int32_t k;
+ if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < LJ_MAX_ASIZE)) {
bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++;
return 1;
}
@@ -409,9 +409,9 @@ cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key)
if (tv)
return tv;
} else if (tvisnum(key)) {
- lua_Number nk = numV(key);
- int32_t k = lj_num2int(nk);
- if (nk == (lua_Number)k) {
+ int64_t i64;
+ int32_t k;
+ if (lj_num2int_check(numV(key), i64, k)) {
cTValue *tv = lj_tab_getint(t, k);
if (tv)
return tv;
@@ -542,9 +542,9 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key)
} else if (tvisint(key)) {
return lj_tab_setint(L, t, intV(key));
} else if (tvisnum(key)) {
- lua_Number nk = numV(key);
- int32_t k = lj_num2int(nk);
- if (nk == (lua_Number)k)
+ int64_t i64;
+ int32_t k;
+ if (lj_num2int_check(numV(key), i64, k))
return lj_tab_setint(L, t, k);
if (tvisnan(key))
lj_err_msg(L, LJ_ERR_NANIDX);
@@ -580,9 +580,9 @@ uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key)
setnumV(&tmp, (lua_Number)k);
key = &tmp;
} else if (tvisnum(key)) {
- lua_Number nk = numV(key);
- int32_t k = lj_num2int(nk);
- if ((uint32_t)k < t->asize && nk == (lua_Number)k)
+ int64_t i64;
+ int32_t k;
+ if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < t->asize))
return (uint32_t)k + 1;
}
if (!tvisnil(key)) {
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index fa32a5d46f..193102eec7 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -314,6 +314,7 @@ typedef enum {
XO_FSTPq = XO_(dd), XOg_FSTPq = 3,
XO_FISTPq = XO_(df), XOg_FISTPq = 7,
XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1,
+ XO_FADDd = XO_(d8), XOg_FADDd = 0,
XO_FADDq = XO_(dc), XOg_FADDq = 0,
XO_FLDCW = XO_(d9), XOg_FLDCW = 5,
XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 47d7faa5c9..ad32954066 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -317,32 +317,34 @@ void lj_trace_initstate(global_State *g)
tv[1].u64 = U64x(80000000,00000000);
/* Initialize 32/64 bit constants. */
+#if LJ_TARGET_X64 || LJ_TARGET_MIPS64
+ J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
+#endif
#if LJ_TARGET_X86ORX64
J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
-#if LJ_32
- J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
-#endif
J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
- J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
#endif
+#if LJ_TARGET_MIPS64
+ J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
+#endif
+#if LJ_TARGET_MIPS
+ J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
+#endif
+
#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
- J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
+ J->k32[LJ_K32_M2P64] = 0xdf800000;
+#endif
+#if LJ_TARGET_MIPS64
+ J->k32[LJ_K32_2P63] = 0x5f000000;
#endif
#if LJ_TARGET_PPC
J->k32[LJ_K32_2P52_2P31] = 0x59800004;
J->k32[LJ_K32_2P52] = 0x59800000;
#endif
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS
+#if LJ_TARGET_PPC
J->k32[LJ_K32_2P31] = 0x4f000000;
#endif
-#if LJ_TARGET_MIPS
- J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
-#if LJ_64
- J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
- J->k32[LJ_K32_2P63] = 0x5f000000;
- J->k32[LJ_K32_M2P64] = 0xdf800000;
-#endif
-#endif
+
#if LJ_TARGET_PPC || LJ_TARGET_MIPS32
J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler;
J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp;
diff --git a/src/lj_vm.h b/src/lj_vm.h
index 9cc42613d3..96ad2d07ca 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -37,13 +37,19 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]);
#if LJ_TARGET_PPC
void lj_vm_cachesync(void *start, void *end);
#endif
-LJ_ASMF double lj_vm_foldarith(double x, double y, int op);
+LJ_ASMF LJ_CONSTF double lj_vm_foldarith(double x, double y, int op);
#if LJ_HASJIT
-LJ_ASMF double lj_vm_foldfpm(double x, int op);
+LJ_ASMF LJ_CONSTF double lj_vm_foldfpm(double x, int op);
#endif
-#if !LJ_ARCH_HASFPU
-/* Declared in lj_obj.h: LJ_ASMF int32_t lj_vm_tobit(double x); */
+#if LJ_SOFTFP && LJ_TARGET_MIPS64
+LJ_ASMF LJ_CONSTF int32_t lj_vm_tointg(double x);
#endif
+/* Declared in lj_obj.h:
+** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x);
+** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x);
+** LJ_ASMF LJ_CONSTF uint64_t lj_vm_num2u64(double x);
+** LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x);
+*/
/* Dispatch targets for recording and hooks. */
LJ_ASMF void lj_vm_record(void);
@@ -62,15 +68,15 @@ LJ_ASMF char lj_vm_exit_interp[];
#define lj_vm_floor floor
#define lj_vm_ceil ceil
#else
-LJ_ASMF double lj_vm_floor(double);
-LJ_ASMF double lj_vm_ceil(double);
+LJ_ASMF LJ_CONSTF double lj_vm_floor(double);
+LJ_ASMF LJ_CONSTF double lj_vm_ceil(double);
#if LJ_TARGET_ARM
-LJ_ASMF double lj_vm_floor_sf(double);
-LJ_ASMF double lj_vm_ceil_sf(double);
+LJ_ASMF LJ_CONSTF double lj_vm_floor_sf(double);
+LJ_ASMF LJ_CONSTF double lj_vm_ceil_sf(double);
#endif
#endif
#ifdef LUAJIT_NO_LOG2
-LJ_ASMF double lj_vm_log2(double);
+LJ_ASMF LJ_CONSTF double lj_vm_log2(double);
#else
#define lj_vm_log2 log2
#endif
@@ -80,16 +86,16 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
#if LJ_HASJIT
#if LJ_TARGET_X86ORX64
-LJ_ASMF void lj_vm_floor_sse(void);
-LJ_ASMF void lj_vm_ceil_sse(void);
-LJ_ASMF void lj_vm_trunc_sse(void);
+LJ_ASMF LJ_CONSTF void lj_vm_floor_sse(void);
+LJ_ASMF LJ_CONSTF void lj_vm_ceil_sse(void);
+LJ_ASMF LJ_CONSTF void lj_vm_trunc_sse(void);
#endif
#if LJ_TARGET_PPC || LJ_TARGET_ARM64
#define lj_vm_trunc trunc
#else
-LJ_ASMF double lj_vm_trunc(double);
+LJ_ASMF LJ_CONSTF double lj_vm_trunc(double);
#if LJ_TARGET_ARM
-LJ_ASMF double lj_vm_trunc_sf(double);
+LJ_ASMF LJ_CONSTF double lj_vm_trunc_sf(double);
#endif
#endif
#if LJ_HASFFI
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 2c9b96cce4..1495102fbc 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -59,7 +59,7 @@ double lj_vm_foldarith(double x, double y, int op)
case IR_NEG - IR_ADD: return -x; break;
case IR_ABS - IR_ADD: return fabs(x); break;
#if LJ_HASJIT
- case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break;
+ case IR_LDEXP - IR_ADD: return ldexp(x, lj_num2int(y)); break;
case IR_MIN - IR_ADD: return x < y ? x : y; break;
case IR_MAX - IR_ADD: return x > y ? x : y; break;
#endif
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 86bef0cfbc..2cd7eedb16 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -2452,6 +2452,118 @@ static void build_subroutines(BuildCtx *ctx)
| bx lr
|
|//-----------------------------------------------------------------------
+ |//-- Number conversion functions ----------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// int64_t lj_vm_num2int_check(double x)
+ |->vm_num2int_check:
+ |.if FPU
+ |.if not HFABI
+ | vmov d0, CARG1, CARG2
+ |.endif
+ | vcvt.s32.f64 s4, d0
+ | vcvt.f64.s32 d1, s4
+ | vcmp.f64 d0, d1
+ | vmrs
+ | bne >1
+ | vmov CRET1, s4
+ | mov CRET2, #0
+ | bx lr
+ |
+ |.else
+ |
+ | asr CARG4, CARG2, #31 // sign = 0 or -1.
+ | lsl CARG2, CARG2, #1
+ | orrs RB, CARG2, CARG1
+ | bxeq lr // Return 0 for +-0.
+ | mov RB, #1024
+ | add RB, RB, #30
+ | sub RB, RB, CARG2, lsr #21
+ | cmp RB, #32
+ | bhs >1 // Fail if |x| < 0x1p0 || |x| >= 0x1p32.
+ | lsr CARG3, CARG1, #21
+ | orr CARG2, CARG3, CARG2, lsl #10 // Left-aligned mantissa.
+ | rsb CARG3, RB, #32
+ | lsl CARG3, CARG2, CARG3
+ | orr CARG2, CARG2, #0x80000000 // Merge leading 1.
+ | orrs CARG3, CARG3, CARG1, lsl #11
+ | lsr CARG1, CARG2, RB // lo = right-aligned absolute value.
+ | bne >1 // Fail if fractional part != 0.
+ | adds CRET1, CARG1, CARG4
+ | bmi >1 // Fail if lo+sign >= 0x80000000.
+ | eor CRET1, CRET1, CARG4 // lo = sign?-lo:lo = (lo+sign)^sign.
+ | mov CRET2, #0
+ | bx lr
+ |.endif
+ |1:
+ | mov CRET1, #0x80000000
+ | mov CRET2, #0x80000000
+ | bx lr
+ |
+ |// int64_t lj_vm_num2i64(double x)
+ |->vm_num2i64:
+ |// fallthrough, same as lj_vm_num2u64.
+ |
+ |// uint64_t lj_vm_num2u64(double x)
+ |->vm_num2u64:
+ |.if HFABI
+ | vmov CARG1, CARG2, d0
+ |.endif
+ | lsl RB, CARG2, #1
+ | lsr RB, RB, #21
+ | sub RB, RB, #1020
+ | sub RB, RB, #3
+ | cmp RB, #116
+ | bhs >3 // Exponent out of range.
+ | asr CARG4, CARG2, #31 // sign = 0 or -1.
+ | lsl CARG2, CARG2, #12
+ | lsr CARG2, CARG2, #12
+ | rsbs RB, RB, #52
+ | orr CARG2, CARG2, #0x00100000
+ | bmi >2 // Shift mantissa left or right?
+ | lsr CARG1, CARG1, RB // 64 bit right shift.
+ | lsr CARG3, CARG2, RB
+ | rsb RB, RB, #32
+ | orr CARG1, CARG1, CARG2, lsl RB
+ | rsb RB, RB, #0
+ | orr CARG1, CARG1, CARG2, lsr RB
+ | adds CRET1, CARG1, CARG4 // m = sign?-m:m = (m+sign)^sign.
+ | adc CRET2, CARG3, CARG4
+ |1:
+ | eor CRET1, CRET1, CARG4
+ | eor CRET2, CRET2, CARG4
+ | bx lr
+ |2:
+ | rsb RB, RB, #0
+ | lsl CARG2, CARG2, RB // 64 bit left shift.
+ | lsl CARG3, CARG1, RB
+ | sub RB, RB, #32
+ | orr CARG2, CARG2, CARG1, lsl RB
+ | rsb RB, RB, #0
+ | orr CARG2, CARG2, CARG1, lsr RB
+ | adds CRET1, CARG3, CARG4
+ | adc CRET2, CARG2, CARG4
+ | b <1
+ |3:
+ | mov CRET1, #0
+ | mov CRET2, #0
+ | bx lr
+ |
+ |// int32_t lj_vm_tobit(double x)
+ |.if FPU
+ |->vm_tobit:
+ | vldr d1, >9
+ |.if not HFABI
+ | vmov d0, CARG1, CARG2
+ |.endif
+ | vadd.f64 d0, d0, d1
+ | vmov CARG1, s0
+ | bx lr
+ |9:
+ | .long 0, 0x43380000 // (double)(2^52 + 2^51).
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
|//-- Miscellaneous functions --------------------------------------------
|//-----------------------------------------------------------------------
|
@@ -4097,7 +4209,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ldr TRACE:CARG1, [CARG1, CARG2, lsl #2]
| // Subsumes ins_next1 and ins_next2.
| ldr INS, TRACE:CARG1->startins
- | bfi INS, OP, #0, #8
+ | bic INS, INS, #0xff
+ | orr INS, INS, OP
| str INS, [PC], #4
| b <1
|.endif
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index a437b65766..eb6d0c2f44 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -2156,6 +2156,42 @@ static void build_subroutines(BuildCtx *ctx)
| ret
|
|//-----------------------------------------------------------------------
+ |//-- Number conversion functions ----------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// int64_t lj_vm_num2int_check(double x)
+ |->vm_num2int_check:
+ | fcvtzs CRET1w, FARG1
+ | scvtf FARG2, CRET1w
+ | fcmp FARG2, FARG1
+ | bne >1
+ | ret
+ |1:
+ | mov CRET1, #0x8000000080000000
+ | ret
+ |
+ |// int64_t lj_vm_num2i64(double x)
+ |->vm_num2i64:
+ | fcvtzs CRET1, FARG1
+ | ret
+ |
+ |// uint64_t lj_vm_num2u64(double x)
+ |->vm_num2u64:
+ | fcvtzs CRET1, FARG1
+ | fcvtzu CARG2, FARG1
+ | cmn CRET1, #1 // Set overflow if CRET1 == INT64_MAX.
+ | csel CRET1, CRET1, CARG2, vc // No overflow ? i64 : u64.
+ | ret
+ |
+ |// int32_t lj_vm_tobit(double x)
+ |->vm_tobit:
+ | movz CRET1, #0x4338, lsl #48 // 2^52 + 2^51.
+ | fmov FARG2, CRET1
+ | fadd FARG1, FARG1, FARG2
+ | fmov CRET1w, s0
+ | ret
+ |
+ |//-----------------------------------------------------------------------
|//-- Miscellaneous functions --------------------------------------------
|//-----------------------------------------------------------------------
|
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 02e588eebd..8a6b82709a 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -85,6 +85,7 @@
|
|.if FPU
|.define FARG1, f12
+|.define FARG1HI, f13
|.define FARG2, f14
|
|.define FRET1, f0
@@ -2560,7 +2561,7 @@ static void build_subroutines(BuildCtx *ctx)
| mtc1 r0, f4
| mtc1 TMP0, f5
| abs.d FRET2, FARG1 // |x|
- | mfc1 AT, f13
+ | mfc1 AT, FARG1HI
| c.olt.d 0, FRET2, f4
| add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
| bc1f 0, >1 // Truncate only if |x| < 2^52.
@@ -2822,6 +2823,122 @@ static void build_subroutines(BuildCtx *ctx)
| sfmin_max max, vm_sfcmpogt
|
|//-----------------------------------------------------------------------
+ |//-- Number conversion functions ----------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// int64_t lj_vm_num2int_check(double x)
+ |->vm_num2int_check:
+ |.if FPU
+ | trunc.w.d FARG2, FARG1
+ | mfc1 SFRETLO, FARG2
+ | cvt.d.w FARG2, FARG2
+ | c.eq.d FARG1, FARG2
+ | bc1f 0, >2
+ |. nop
+ | jr ra
+ |. move SFRETHI, r0
+ |
+ |.else
+ |
+ | sll SFRETLO, SFARG1HI, 1
+ | or SFRETHI, SFRETLO, SFARG1LO
+ | beqz SFRETHI, >1 // Return 0 for +-0.
+ |. li TMP0, 1054
+ | srl AT, SFRETLO, 21
+ | subu TMP0, TMP0, AT
+ | sltiu AT, TMP0, 32
+ | beqz AT, >2 // Fail if |x| < 0x1p0 || |x| >= 0x1p32.
+ |. sll SFRETLO, SFARG1HI, 11
+ | srl SFRETHI, SFARG1LO, 21
+ | negu TMP1, TMP0
+ | or SFRETLO, SFRETLO, SFRETHI // Left-aligned mantissa.
+ | sllv TMP2, SFRETLO, TMP1
+ | lui AT, 0x8000
+ | sll SFRETHI, SFARG1LO, 11
+ | or SFRETLO, SFRETLO, AT // Merge leading 1.
+ | or TMP2, TMP2, SFRETHI
+ | srlv SFRETLO, SFRETLO, TMP0 // lo = right-aligned absolute value.
+ | bnez TMP2, >2 // Fail if fractional part != 0.
+ |. sra SFARG1HI, SFARG1HI, 31 // sign = 0 or -1.
+ | addu SFRETLO, SFRETLO, SFARG1HI
+ | bltz SFRETLO, >2 // Fail if lo+sign >= 0x80000000.
+ |. xor SFRETLO, SFRETLO, SFARG1HI // lo = sign?-lo:lo = (lo+sign)^sign.
+ |1:
+ | jr ra
+ |. move SFRETHI, r0
+ |.endif
+ |2: // Not an integer, return 0x8000000080000000LL.
+ | lui SFRETHI, 0x8000
+ | jr ra
+ |. lui SFRETLO, 0x8000
+ |
+ |// int64_t lj_vm_num2i64(double x)
+ |->vm_num2i64:
+ |// fallthrough, same as lj_vm_num2u64.
+ |
+ |// uint64_t lj_vm_num2u64(double x)
+ |->vm_num2u64:
+ |.if FPU
+ | mfc1 SFARG1HI, FARG1HI
+ | mfc1 SFARG1LO, FARG1
+ |.endif
+ | srl TMP0, SFARG1HI, 20
+ | andi TMP0, TMP0, 0x7ff
+ | addiu SFRETLO, TMP0, -1023
+ | sltiu SFRETLO, SFRETLO, 116
+ | beqz SFRETLO, >3 // Exponent out of range.
+ |. sll SFRETHI, SFARG1HI, 12
+ | lui AT, 0x0010
+ | srl SFRETHI, SFRETHI, 12
+ | addiu TMP0, TMP0, -1075
+ | sra SFARG1HI, SFARG1HI, 31 // sign = 0 or -1.
+ | bgez TMP0, >2 // Shift mantissa left or right?
+ |. or SFRETHI, SFRETHI, AT // Merge leading 1 into masked mantissa.
+ | subu TMP1, r0, TMP0
+ | sll AT, SFRETHI, 1
+ | nor TMP0, r0, TMP1
+ | srlv SFRETHI, SFRETHI, TMP1 // Shift hi mantissa right for low exp.
+ | sllv AT, AT, TMP0 // Shifted-out hi mantissa.
+ | srlv SFRETLO, SFARG1LO, TMP1 // Shift lo mantissa right for low exp.
+ | andi TMP1, TMP1, 0x20 // Conditional right shift by 32.
+ | or AT, AT, SFRETLO // Merge into lo mantissa.
+ | movn AT, SFRETHI, TMP1
+ | movn SFRETHI, r0, TMP1
+ |1:
+ | addu SFRETLO, AT, SFARG1HI // m = sign?-m:m = (m+sign)^sign.
+ | addu SFRETHI, SFRETHI, SFARG1HI
+ | sltu TMP0, SFRETLO, AT // Carry
+ | addu SFRETHI, SFRETHI, TMP0
+ | xor SFRETLO, SFRETLO, SFARG1HI
+ | jr ra
+ |. xor SFRETHI, SFRETHI, SFARG1HI
+ |2:
+ | srl TMP2, SFARG1LO, 1
+ | nor AT, r0, TMP0
+ | sllv SFRETHI, SFRETHI, TMP0 // Shift hi mantissa left for high exp.
+ | srlv TMP2, TMP2, AT // Shifted-out lo mantissa.
+ | sllv AT, SFARG1LO, TMP0 // Shift lo mantissa left for high exp.
+ | andi TMP0, TMP0, 0x20 // Conditional left shift by 32.
+ | or SFRETHI, SFRETHI, TMP2 // Merge into hi mantissa.
+ | movn SFRETHI, AT, TMP0
+ | b <1
+ |. movn AT, r0, TMP0
+ |3:
+ | jr ra
+ |. li SFRETHI, 0
+ |
+ |// int32_t lj_vm_tobit(double x)
+ |.if FPU
+ |->vm_tobit:
+ | lui AT, 0x59c0 // 2^52 + 2^51 (float).
+ | mtc1 AT, FARG2
+ | cvt.d.s FARG2, FARG2
+ | add.d FARG1, FARG1, FARG2
+ | jr ra
+ |. mfc1 CRET1, FARG1
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
|//-- Miscellaneous functions --------------------------------------------
|//-----------------------------------------------------------------------
|
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index 859c0aee84..4dc40d8a7f 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -2113,7 +2113,7 @@ static void build_subroutines(BuildCtx *ctx)
| dinsu CRET2, AT, 21, 21
| slt AT, CARG1, r0
| dsrlv CRET1, CRET2, TMP0
- | dsubu CARG1, r0, CRET1
+ | negu CARG1, CRET1
|.if MIPSR6
| seleqz CRET1, CRET1, AT
| selnez CARG1, CARG1, AT
@@ -2121,20 +2121,12 @@ static void build_subroutines(BuildCtx *ctx)
|.else
| movn CRET1, CARG1, AT
|.endif
- | li CARG1, 64
- | subu TMP0, CARG1, TMP0
+ | negu TMP0, TMP0
| dsllv CRET2, CRET2, TMP0 // Integer check.
| sextw AT, CRET1
| xor AT, CRET1, AT // Range check.
- |.if MIPSR6
- | seleqz AT, AT, CRET2
- | selnez CRET2, CRET2, CRET2
| jr ra
|. or CRET2, AT, CRET2
- |.else
- | jr ra
- |. movz CRET2, AT, CRET2
- |.endif
|1:
| jr ra
|. li CRET2, 1
@@ -2929,6 +2921,136 @@ static void build_subroutines(BuildCtx *ctx)
| sfmin_max max, vm_sfcmpogt
|
|//-----------------------------------------------------------------------
+ |//-- Number conversion functions ----------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// int64_t lj_vm_num2int_check(double x)
+ |->vm_num2int_check:
+ |.if FPU
+ | trunc.w.d FARG2, FARG1
+ | mfc1 CRET1, FARG2
+ | cvt.d.w FARG2, FARG2
+ |.if MIPSR6
+ | cmp.eq.d FARG2, FARG1, FARG2
+ | bc1eqz FARG2, >2
+ |.else
+ | c.eq.d FARG1, FARG2
+ | bc1f 0, >2
+ |.endif
+ |. nop
+ | jr ra
+ |. zextw CRET1, CRET1
+ |
+ |.else
+ |
+ | dsll CRET2, CARG1, 1
+ | beqz CRET2, >1
+ |. li TMP0, 1076
+ | dsrl AT, CRET2, 53
+ | dsubu TMP0, TMP0, AT
+ | sltiu AT, TMP0, 54
+ | beqz AT, >2
+ |. dextm CRET2, CRET2, 0, 20
+ | dinsu CRET2, AT, 21, 21
+ | slt AT, CARG1, r0
+ | dsrlv CRET1, CRET2, TMP0
+ | negu CARG1, CRET1
+ |.if MIPSR6
+ | seleqz CRET1, CRET1, AT
+ | selnez CARG1, CARG1, AT
+ | or CRET1, CRET1, CARG1
+ |.else
+ | movn CRET1, CARG1, AT
+ |.endif
+ | negu TMP0, TMP0
+ | dsllv CRET2, CRET2, TMP0 // Integer check.
+ | sextw AT, CRET1
+ | xor AT, CRET1, AT // Range check.
+ | or AT, AT, CRET2
+ | bnez AT, >2
+ |. nop
+ | jr ra
+ |. zextw CRET1, CRET1
+ |1:
+ | jr ra
+ |. move CRET1, r0
+ |.endif
+ |2:
+ | lui CRET1, 0x8000
+ | dsll CRET1, CRET1, 16
+ | ori CRET1, CRET1, 0x8000
+ | jr ra
+ |. dsll CRET1, CRET1, 16
+ |
+ |// int64_t lj_vm_num2i64(double x)
+ |->vm_num2i64:
+ |.if FPU
+ | trunc.l.d FARG1, FARG1
+ | jr ra
+ |. dmfc1 CRET1, FARG1
+ |.else
+ |// fallthrough, same as lj_vm_num2u64 for soft-float.
+ |.endif
+ |
+ |// uint64_t lj_vm_num2u64(double x)
+ |->vm_num2u64:
+ |.if FPU
+ | trunc.l.d FARG2, FARG1
+ | dmfc1 CRET1, FARG2
+ | li AT, -1
+ | dsrl AT, AT, 1
+ | beq CRET1, AT, >1
+ |. lui AT, 0xdf80 // -2^64 (float).
+ | jr ra
+ |. nop
+ |1:
+ | mtc1 AT, FARG2
+ | cvt.d.s FARG2, FARG2
+ | add.d FARG1, FARG1, FARG2
+ | trunc.l.d FARG2, FARG1
+ | jr ra
+ |. dmfc1 CRET1, FARG2
+ |
+ |.else
+ |
+ | dextu CARG2, CARG1, 20, 10
+ | addiu AT, CARG2, -1023
+ | sltiu AT, AT, 116
+ | beqz AT, >2 // Exponent out of range.
+ |. addiu CARG2, CARG2, -1075
+ | dextm CRET1, CARG1, 0, 19
+ | dsll AT, AT, 52
+ | dsra CARG1, CARG1, 63 // sign = 0 or -1.
+ | bgez CARG2, >1 // Shift mantissa left or right?
+ |. or CRET1, CRET1, AT // Merge leading 1 into masked mantissa.
+ | subu CARG2, r0, CARG2
+ | dsrlv CRET1, CRET1, CARG2 // Shift mantissa right for low exp.
+ | daddu CRET1, CRET1, CARG1
+ | jr ra
+ |. xor CRET1, CRET1, CARG1 // m = sign?-m:m = (m+sign)^sign.
+ |1:
+ | dsllv CRET1, CRET1, CARG2 // Shift mantissa left for high exp.
+ | daddu CRET1, CRET1, CARG1
+ | jr ra
+ |. xor CRET1, CRET1, CARG1 // m = sign?-m:m = (m+sign)^sign.
+ |2:
+ | jr ra
+ |. move CRET1, r0
+ |.endif
+ |
+ |// int32_t lj_vm_tobit(double x)
+ |.if FPU
+ |->vm_tobit:
+ | lui AT, 0x59c0 // 2^52 + 2^51 (float).
+ | mtc1 AT, FARG2
+ | cvt.d.s FARG2, FARG2
+ | add.d FARG1, FARG1, FARG2
+ | mfc1 CRET1, FARG1
+ | jr ra
+ |. sextw CRET1, CRET1
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
|//-- Miscellaneous functions --------------------------------------------
|//-----------------------------------------------------------------------
|
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 2ddeefbfea..1761e39bdf 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -3160,6 +3160,152 @@ static void build_subroutines(BuildCtx *ctx)
| blr
|
|//-----------------------------------------------------------------------
+ |//-- Number conversion functions ----------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// int64_t lj_vm_num2int_check(double x)
+ |->vm_num2int_check:
+ |.if FPU
+ | subi sp, sp, 16
+ | stfd FARG1, 0(sp)
+ | lwz CARG1, 0(sp)
+ | lwz CARG2, 4(sp)
+ |.endif
+ | slwi TMP1, CARG1, 1
+ |.if PPE
+ | or TMP1, TMP1, CARG2
+ | cmpwi TMP1, 0
+ |.else
+ | or. TMP1, TMP1, CARG2
+ |.endif
+ | beq >2 // Return 0 for +-0.
+ | rlwinm RB, CARG1, 12, 21, 31
+ | subfic RB, RB, 1054
+ | cmplwi RB, 32
+ | bge >1 // Fail if |x| < 0x1p0 || |x| >= 0x1p32.
+ | slwi CARG3, CARG1, 11
+ | rlwimi CARG3, CARG2, 11, 21, 31 // Left-aligned mantissa.
+ | subfic TMP1, RB, 32
+ | slw TMP1, CARG3, TMP1
+ | slwi TMP2, CARG2, 11
+ |.if PPE
+ | or. TMP1, TMP1, TMP2
+ |.else
+ | or TMP1, TMP1, TMP2
+ | cmpwi TMP1, 0
+ |.endif
+ | bne >1 // Fail if fractional part != 0.
+ | oris CARG3, CARG3, 0x8000 // Merge leading 1.
+ | srw CRET2, CARG3, RB // lo = right-aligned absolute value.
+ | srawi CARG4, CARG1, 31 // sign = 0 or -1.
+ |.if GPR64
+ | add CRET2, CRET2, CARG4
+ | cmpwi CRET2, 0
+ |.else
+ | add. CRET2, CRET2, CARG4
+ |.endif
+ | blt >1 // Fail if fractional part != 0.
+ | xor CRET2, CRET2, CARG4 // lo = sign?-lo:lo = (lo+sign)^sign.
+ |2:
+ |.if GPR64
+ | rldicl CRET1, CRET1, 0, 32
+ |.else
+ | li CRET1, 0
+ |.endif
+ |.if FPU
+ | addi sp, sp, 16
+ |.endif
+ | blr
+ |1:
+ |.if GPR64
+ | lus CRET1, 0x8000
+ | rldicr CRET1, CRET1, 32, 32
+ |.else
+ | lus CRET1, 0x8000
+ | lus CRET2, 0x8000
+ |.endif
+ |.if FPU
+ | addi sp, sp, 16
+ |.endif
+ | blr
+ |
+ |// int64_t lj_vm_num2i64(double x)
+ |->vm_num2i64:
+ |// fallthrough, same as lj_vm_num2u64.
+ |
+ |// uint64_t lj_vm_num2u64(double x)
+ |->vm_num2u64:
+ |.if FPU
+ | subi sp, sp, 16
+ | stfd FARG1, 0(sp)
+ | lwz CARG1, 0(sp)
+ | lwz CARG2, 4(sp)
+ |.endif
+ | rlwinm RB, CARG1, 12, 21, 31
+ | addi RB, RB, -1023
+ | cmplwi RB, 116
+ | bge >3 // Exponent out of range.
+ | srawi CARG4, CARG1, 31 // sign = 0 or -1.
+ | clrlwi CARG1, CARG1, 12
+ | subfic RB, RB, 52
+ | oris CARG1, CARG1, 0x0010
+ | cmpwi RB, 0
+ | blt >2 // Shift mantissa left or right?
+ | subfic TMP1, RB, 32 // 64 bit right shift.
+ | srw CARG2, CARG2, RB
+ | slw TMP2, CARG1, TMP1
+ | addi TMP1, RB, -32
+ | or CARG2, CARG2, TMP2
+ | srw TMP2, CARG1, TMP1
+ | or CARG2, CARG2, TMP2
+ | srw CARG1, CARG1, RB
+ |1:
+ | addc CARG2, CARG2, CARG4
+ | adde CARG1, CARG1, CARG4
+ | xor CRET2, CARG2, CARG4
+ | xor CRET1, CARG1, CARG4
+ |.if GPR64
+ | rldimi CRET2, CRET1, 0, 32
+ | mr CRET1, CRET2
+ |.endif
+ | addi sp, sp, 16
+ | blr
+ |2:
+ | subfic TMP1, RB, 0 // 64 bit left shift.
+ | addi RB, RB, -32
+ | slw CARG1, CARG1, TMP1
+ | srw TMP2, CARG2, RB
+ | addi RB, TMP1, -32
+ | or CARG1, CARG1, TMP2
+ | slw TMP2, CARG2, RB
+ | or CARG1, CARG1, TMP2
+ | slw CARG2, CARG2, TMP1
+ | b <1
+ |3:
+ | li CRET1, 0
+ |.if not GPR64
+ | li CRET2, 0
+ |.endif
+ |.if FPU
+ | addi sp, sp, 16
+ |.endif
+ | blr
+ |
+ |// int32_t lj_vm_tobit(double x)
+ |.if FPU
+ |->vm_tobit:
+ | lus TMP0, 0x59c0 // 2^52 + 2^51 (float).
+ | subi sp, sp, 16
+ | stw TMP0, 0(sp)
+ | lfs FARG2, 0(sp)
+ | fadd FARG1, FARG1, FARG2
+ | stfd FARG1, 0(sp)
+ | lwz CRET1, 4(sp)
+ | addi sp, sp, 16
+ | blr
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
|//-- Miscellaneous functions --------------------------------------------
|//-----------------------------------------------------------------------
|
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 4cfb7b6ad2..970e8e43df 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -2625,6 +2625,49 @@ static void build_subroutines(BuildCtx *ctx)
| ret
|
|//-----------------------------------------------------------------------
+ |//-- Number conversion functions ----------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// int64_t lj_vm_num2int_check(double x)
+ |->vm_num2int_check:
+ | cvttsd2si eax, xmm0
+ | xorps xmm1, xmm1
+ | cvtsi2sd xmm1, eax
+ | ucomisd xmm1, xmm0
+ | jp >1
+ | jne >1
+ | ret
+ |1:
+ | mov64 rax, U64x(80000000,80000000)
+ | ret
+ |
+ |// int64_t lj_vm_num2i64(double x)
+ |->vm_num2i64:
+ | cvttsd2si rax, xmm0
+ | ret
+ |
+ |// uint64_t lj_vm_num2u64(double x)
+ |->vm_num2u64:
+ | cvttsd2si rax, xmm0 // Convert [-2^63..2^63) range.
+ | cmp rax, 1 // Indefinite result -0x8000000000000000LL - 1 sets overflow.
+ | jo >1
+ | ret
+ |1:
+ | mov64 rdx, U64x(c3f00000,00000000) // -0x1p64 (double).
+ | movd xmm1, rdx
+ | addsd xmm0, xmm1
+ | cvttsd2si rax, xmm0 // Convert [2^63..2^64+2^63) range.
+ | // Note that -0x1p63 converts to -0x8000000000000000LL either way.
+ | ret
+ |
+ |// int32_t lj_vm_tobit(double x)
+ |->vm_tobit:
+ | sseconst_tobit xmm1, RC
+ | addsd xmm0, xmm1
+ | movd eax, xmm0
+ | ret
+ |
+ |//-----------------------------------------------------------------------
|//-- Miscellaneous functions --------------------------------------------
|//-----------------------------------------------------------------------
|
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 77c4069d45..485ed809c9 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -3059,6 +3059,98 @@ static void build_subroutines(BuildCtx *ctx)
| ret
|
|//-----------------------------------------------------------------------
+ |//-- Number conversion functions ----------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// int64_t lj_vm_num2int_check(double x)
+ |->vm_num2int_check:
+ |.if not X64
+ | movsd xmm0, qword [esp+4]
+ |.endif
+ | cvttsd2si eax, xmm0
+ | xorps xmm1, xmm1
+ | cvtsi2sd xmm1, eax
+ | ucomisd xmm1, xmm0
+ | jp >1
+ | jne >1
+ |.if not X64
+ | xor edx, edx
+ |.endif
+ | ret
+ |1:
+ |.if X64
+ | mov64 rax, U64x(80000000,80000000)
+ |.else
+ | mov eax, 0x80000000
+ | mov edx, eax
+ |.endif
+ | ret
+ |
+ |// int64_t lj_vm_num2i64(double x)
+ |->vm_num2i64:
+ |.if X64
+ | cvttsd2si rax, xmm0
+ | ret
+ |.else
+ | sub esp, 12
+ | fld qword [esp+16]
+ | fisttp qword [esp]
+ | mov eax, dword [esp]
+ | mov edx, dword [esp+4]
+ | add esp, 12
+ | ret
+ |.endif
+ |
+ |// uint64_t lj_vm_num2u64(double x)
+ |->vm_num2u64:
+ |.if X64
+ | cvttsd2si rax, xmm0 // Convert [-2^63..2^63) range.
+ | cmp rax, 1 // Indefinite result -0x8000000000000000LL - 1 sets overflow.
+ | jo >1
+ | ret
+ |1:
+ | mov64 rdx, U64x(c3f00000,00000000) // -0x1p64 (double).
+ | movd xmm1, rdx
+ | addsd xmm0, xmm1
+ | cvttsd2si rax, xmm0 // Convert [2^63..2^64+2^63) range.
+ | // Note that -0x1p63 converts to -0x8000000000000000LL either way.
+ | ret
+ |.else
+ | sub esp, 12
+ | fld qword [esp+16]
+ | fld st0
+ | fisttp qword [esp]
+ | mov edx, dword [esp+4]
+ | mov eax, dword [esp]
+ | cmp edx, 1
+ | jo >2
+ |1:
+ | fpop
+ | add esp, 12
+ | ret
+ |2:
+ | cmp eax, 0
+ | jne <1
+ | mov dword [esp+8], 0xdf800000 // -0x1p64 (float).
+ | fadd dword [esp+8]
+ | fisttp qword [esp]
+ | mov eax, dword [esp]
+ | mov edx, dword [esp+4]
+ | add esp, 12
+ | ret
+ |.endif
+ |
+ |// int32_t lj_vm_tobit(double x)
+ |->vm_tobit:
+ |.if not X64
+ | movsd xmm0, qword [esp+4]
+ |.endif
+ | sseconst_tobit xmm1, RCa
+ | addsd xmm0, xmm1
+ | movd eax, xmm0
+ | ret
+ |
+ |//-----------------------------------------------------------------------
|//-- Miscellaneous functions --------------------------------------------
|//-----------------------------------------------------------------------
|
From 1acb204447aaa75e5338c3bbfd062aae64bc5959 Mon Sep 17 00:00:00 2001
From: Mike Pall
All documentation is
-Copyright © 2005-2025 Mike Pall.
+Copyright © 2005-2026 Mike Pall.
Contact
Copyright
Copyright
-Copyright © 2005-2025
+Copyright © 2005-2026
-Copyright © 2005-2025
+Copyright © 2005-2026
-Copyright © 2005-2025
+Copyright © 2005-2026
-Copyright © 2005-2025
+Copyright © 2005-2026
-Copyright © 2005-2025
+Copyright © 2005-2026
-Copyright © 2005-2025
+Copyright © 2005-2026
-Copyright © 2005-2025
+Copyright © 2005-2026
-Copyright © 2005-2025
+Copyright © 2005-2026
-Copyright © 2005-2025
+Copyright © 2005-2026
-Copyright © 2005-2025
+Copyright © 2005-2026
-Copyright © 2005-2025
+Copyright © 2005-2026