diff --git a/Makefile b/Makefile index 0feff9a9..227faaf4 100644 --- a/Makefile +++ b/Makefile @@ -452,6 +452,30 @@ ifeq (,$(findstring msvc,$(platform))) CXXFLAGS += -MMD -MP endif +# DSP JIT (SCU + SCSP MPROG): on by default. The aarch64 backend uses +# oaknut (C++20, header-only) via thin extern-"C" shims +# (scu_dsp_jit_oaknut.cpp / scsp_dsp_jit_oaknut.cpp); the JIT bodies +# themselves are C (scu_dsp_jit.c / scsp_dsp_jit.c). Source-level +# guards keep the JIT a no-op on non-aarch64 builds. A runtime +# libretro option (`beetle_saturn_jit`) further gates whether the +# compiled JIT code is dispatched. Pass WANT_JIT=0 to drop the JIT at +# compile time (slightly smaller binary, no oaknut dep). +WANT_JIT ?= 1 +ifeq ($(WANT_JIT), 1) + CFLAGS += -DWANT_JIT + CXXFLAGS += -DWANT_JIT +endif +# SCU DSP JIT perf jitdump emitter: writes /tmp/jit-.dump in the +# Linux perf jitdump format so `perf inject --jit` can resolve [JIT] tid +# samples to per-slot symbols (dsp__pc_). Diagnostic +# only; the writer itself is cheap (one writev per compiled slot) but +# the dump file grows unbounded over long runs. Requires WANT_JIT=1. +ifeq ($(WANT_DSP_JIT_PERF_DUMP), 1) + CFLAGS += -DWANT_DSP_JIT_PERF_DUMP +endif +$(CORE_EMU_DIR)/scu_dsp_jit_oaknut.o: CXXFLAGS += -std=c++20 -I$(DEPS_DIR)/oaknut/include +$(CORE_EMU_DIR)/scsp_dsp_jit_oaknut.o: CXXFLAGS += -std=c++20 -I$(DEPS_DIR)/oaknut/include + OBJOUT = -o LINKOUT = -o diff --git a/Makefile.common b/Makefile.common index 48d55dae..51bee4a6 100644 --- a/Makefile.common +++ b/Makefile.common @@ -55,18 +55,20 @@ ifeq ($(NEED_TREMOR), 1) FLAGS += -DNEED_TREMOR endif -SOURCES_CXX += \ - $(CORE_EMU_DIR)/sound_glue.cpp \ - $(CORE_EMU_DIR)/ss.cpp - -# sound.cpp -> sound.c + sound_glue.cpp. The orchestration half -# (SOUND_* public ABI, the SOUND_Update 32.32 fixed-point cycle -# loop, the SCSP IBuffer ring, the setjmp recovery point) moves -# to C; the C++ class instances (SS_SCSP SCSP, M68K SoundCPU), -# the eight M68K bus callbacks they need, and scsp.inc stay in -# the new sound_glue.cpp. The two halves exchange state through -# sound_internal.h: a handful of extern "C" SoundGlue_* wrappers -# around the M68K / SS_SCSP class methods sound.c needs, and four +SOURCES_C += $(CORE_EMU_DIR)/ss.c \ + $(CORE_EMU_DIR)/sound_glue.c \ + $(CORE_EMU_DIR)/scsp_dsp_jit.c \ + $(CORE_EMU_DIR)/jitdump.c + +# sound.cpp -> sound.c + sound_glue.cpp (Phase-6c) -> sound_glue.c +# (Phase-9). The orchestration half (SOUND_* public ABI, the +# SOUND_Update 32.32 fixed-point cycle loop, the SCSP IBuffer ring, +# the setjmp recovery point) lives in sound.c; the struct instances +# (SS_SCSP SCSP, M68K SoundCPU -- both retired from being C++ +# class types), the eight M68K bus callbacks they need, and +# scsp.inc live in sound_glue.c. The two halves exchange state +# through sound_internal.h: a set of SoundGlue_* C-linkage wrappers +# around the M68K_* / SS_SCSP_* free-function APIs, and four # extern symbols (SOUND_next_scsp_time, SOUND_jbuf, IBuffer, # SOUND_IBufferCount) owned by sound.c that the glue side reads / # writes during the bus-callback and RunSCSP paths. @@ -90,6 +92,10 @@ SOURCES_CXX += \ # The STATE_MASK_* enum was hoisted out of `struct DSPS` to file # scope so the same `STATE_MASK_EXECUTE` spelling compiles in # both C and C++. +# scu_dsp_jit.c: the SCU DSP slot-dispatch JIT body lives in C +# (orchestration, emit_*, entry/exit stubs, looped-slot cache, +# perf jitdump writer). Instructions are emitted directly via the +# in-tree a64emit C library. # db.cpp -> db.c. Dead std:: helpers (DB_GetInternalDB, # DB_GetHHDescriptions, FDIDToString) were removed in an earlier @@ -147,6 +153,8 @@ SOURCES_C += $(CORE_EMU_DIR)/db.c \ $(CORE_EMU_DIR)/scu_dsp_mvi.c \ $(CORE_EMU_DIR)/scu_dsp_jmp.c \ $(CORE_EMU_DIR)/scu_dsp_misc.c \ + $(CORE_EMU_DIR)/scu_dsp_jit.c \ + $(CORE_EMU_DIR)/a64emit.c \ $(CORE_EMU_DIR)/sound.c \ $(CORE_EMU_DIR)/ss_state.c \ $(CORE_EMU_DIR)/ss_init.c diff --git a/libretro.c b/libretro.c index db187d83..70fc162e 100644 --- a/libretro.c +++ b/libretro.c @@ -299,6 +299,26 @@ static void check_variables(bool startup) cdimagecache = true; } + var.key = "beetle_saturn_jit_scu"; + var.value = NULL; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (!strcmp(var.value, "disabled")) + setting_jit_scu = false; + else + setting_jit_scu = true; + } + + var.key = "beetle_saturn_jit_scsp"; + var.value = NULL; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (!strcmp(var.value, "disabled")) + setting_jit_scsp = false; + else + setting_jit_scsp = true; + } + var.key = "beetle_saturn_shared_int"; var.value = NULL; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) diff --git a/libretro_core_options.h b/libretro_core_options.h index f0b2912a..2be593d9 100644 --- a/libretro_core_options.h +++ b/libretro_core_options.h @@ -142,6 +142,34 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled" }, + { + "beetle_saturn_jit_scu", + "SCU DSP JIT Binary Translator", + NULL, + "Use the native aarch64 JIT binary translator for the SCU DSP. Has no effect on non-aarch64 builds. Restart required.", + NULL, + "system", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "enabled" + }, + { + "beetle_saturn_jit_scsp", + "SCSP DSP JIT Binary Translator", + NULL, + "Use the native aarch64 JIT binary translator for the SCSP DSP (MPROG). Has no effect on non-aarch64 builds. Restart required.", + NULL, + "system", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "enabled" + }, { "beetle_saturn_initial_scanline", "Initial Scanline", diff --git a/libretro_settings.c b/libretro_settings.c index a4ce3bc0..5394154a 100644 --- a/libretro_settings.c +++ b/libretro_settings.c @@ -15,3 +15,5 @@ bool setting_multitap_port1; bool setting_multitap_port2; bool opposite_directions; bool setting_midsync; +bool setting_jit_scu = true; +bool setting_jit_scsp = true; diff --git a/libretro_settings.h b/libretro_settings.h index f9194544..78ad275e 100644 --- a/libretro_settings.h +++ b/libretro_settings.h @@ -1,7 +1,7 @@ #ifndef __LIBRETRO_SETTINGS_HDR__ #define __LIBRETRO_SETTINGS_HDR__ -#include +#include enum { @@ -33,5 +33,7 @@ extern bool setting_multitap_port1; extern bool setting_multitap_port2; extern bool opposite_directions; extern bool setting_midsync; +extern bool setting_jit_scu; +extern bool setting_jit_scsp; #endif diff --git a/mednafen/emuspec.h b/mednafen/emuspec.h index 5375abd3..f89fdf06 100644 --- a/mednafen/emuspec.h +++ b/mednafen/emuspec.h @@ -2,9 +2,7 @@ #define __MDFN_EMUSPEC_H #include -#ifndef __cplusplus -#include -#endif +#include #include "video/surface.h" diff --git a/mednafen/git.h b/mednafen/git.h index e3fa910d..8d80cbb7 100644 --- a/mednafen/git.h +++ b/mednafen/git.h @@ -1,54 +1,14 @@ #ifndef __MDFN_GIT_H #define __MDFN_GIT_H -#include -#include -#include -#include #include #include "video/surface.h" #include "state.h" -#include "settings-common.h" struct MemoryPatch; -struct CheatFormatStruct -{ - const char *FullName; //"Game Genie", "GameShark", "Pro Action Catplay", etc. - const char *Description; // Whatever? - - bool (*DecodeCheat)(const std::string& cheat_string, MemoryPatch* patch); // *patch should be left as initialized by MemoryPatch::MemoryPatch(), unless this is the - // second(or third or whatever) part of a multipart cheat. - // - // Will throw an std::exception(or derivative) on format error. - // - // Will return true if this is part of a multipart cheat. -}; - -MDFN_HIDE extern const std::vector CheatFormatInfo_Empty; - -struct CheatInfoStruct -{ - // - // InstallReadPatch and RemoveReadPatches should be non-NULL(even if only pointing to dummy functions) if the emulator module supports - // read-substitution and read-substitution-with-compare style(IE Game Genie-style) cheats. - // - // See also "SubCheats" global stuff in mempatcher.h. - // - void (*InstallReadPatch)(uint32_t address, uint8_t value, int compare); // Compare is >= 0 when utilized. - void (*RemoveReadPatches)(void); - uint8_t (*MemRead)(uint32_t addr); - void (*MemWrite)(uint32_t addr, uint8_t val); - - const std::vector& CheatFormatInfo; - - bool BigEndian; // UI default for cheat search and new cheats. -}; - -MDFN_HIDE extern const CheatInfoStruct CheatInfo_Empty; - /* EmulateSpecStruct now lives in mednafen/emuspec.h so it can be included from C TUs (the libretro entry-point is converted to C in the same commit that introduced emuspec.h). This include @@ -58,24 +18,6 @@ MDFN_HIDE extern const CheatInfoStruct CheatInfo_Empty; relying on those defaults now zero-init explicitly. */ #include "emuspec.h" -struct GameDB_Entry -{ - std::string GameID; - bool GameIDIsHash = false; - std::string Name; - std::string Setting; - std::string Purpose; -}; - -struct GameDB_Database -{ - std::string ShortName; - std::string FullName; - std::string Description; - - std::vector Entries; -}; - //=========================================== #include "mdfn_gameinfo.h" diff --git a/mednafen/hw_cpu/m68k/gen.cpp b/mednafen/hw_cpu/m68k/gen.cpp deleted file mode 100644 index 0a5112ff..00000000 --- a/mednafen/hw_cpu/m68k/gen.cpp +++ /dev/null @@ -1,1255 +0,0 @@ -/******************************************************************************/ -/* Mednafen - Multi-system Emulator */ -/******************************************************************************/ -/* gen.cpp - Table Generator for Motorola 68000 CPU Emulator -** Copyright (C) 2015-2016 Mednafen Team -** -** This program is free software; you can redistribute it and/or -** modify it under the terms of the GNU General Public License -** as published by the Free Software Foundation; either version 2 -** of the License, or (at your option) any later version. -** -** This program is distributed in the hope that it will be useful, -** but WITHOUT ANY WARRANTY; without even the implied warranty of -** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -** GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License -** along with this program; if not, write to the Free Software Foundation, Inc., -** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -*/ - -// g++ -std=gnu++14 -Wall -O2 -o gen gen.cpp && ./gen > m68k_instr.inc -#include -#include -#include -#include - -#include -#include -#include -#include - -static std::string s(const char* format, ...) -{ - static char buf[65536]; - va_list ap; - - va_start(ap, format); - vsnprintf(buf, sizeof(buf), format, ap); - va_end(ap); - - return std::string(buf); -} - -/* - DATA_REG_DIR, - ADDR_REG_DIR, - - ADDR_REG_INDIR, - ADDR_REG_INDIR_POST, - ADDR_REG_INDIR_PRE, - - ADDR_REG_INDIR_DISP, - - ADDR_REG_INDIR_INDX, - - ABS_SHORT, - ABS_LONG, - - PC_DISP, - PC_INDEX, - - IMMEDIATE -*/ -enum -{ - AMA_DATA = 0x0001, - AMA_MEMORY = 0x0002, - AMA_CONTROL = 0x0004, - AMA_ALTERABLE = 0x0008, -}; - -static const char* bsize_to_type(unsigned size) -{ - if(size == 0) - return "std::tuple<>"; - - if(size == 1) - return "uint8_t"; - - if(size == 2) - return "uint16_t"; - - if(size == 4) - return "uint32_t"; - - return NULL; -} - -/* Phase-9d: decode_ea retired into - * decode_ea_src (move_dest=false) and decode_ea_dst - * (move_dest=true) via source-fold. */ - -bool decode_ea_src(unsigned allowed, int size, unsigned instr, const char* ham_name, std::string* ham_out) -{ - - const unsigned mode = (instr >> ( 3)) & 0x7; - const unsigned reg = (instr >> ( 0)) & 0x7; - - if(size == -1) - { - if(mode == 0) - size = 4; - else - size = 1; - } - - if((size != 0 && size != 1 && size != 2 && size != 4) || (size == 0 && allowed != AMA_CONTROL)) - return false; - - static const struct - { - const char* name; - unsigned flags; - } ams[2][0x8] = - { - { - { "DATA_REG_DIR", AMA_DATA | AMA_ALTERABLE }, - { "ADDR_REG_DIR", AMA_ALTERABLE }, - { "ADDR_REG_INDIR", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "ADDR_REG_INDIR_POST", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE }, - { "ADDR_REG_INDIR_PRE", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE }, - { "ADDR_REG_INDIR_DISP", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "ADDR_REG_INDIR_INDX", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { NULL, 0 }, - }, - { - { "ABS_SHORT", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "ABS_LONG", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "PC_DISP", AMA_DATA | AMA_MEMORY | AMA_CONTROL }, - { "PC_INDEX", AMA_DATA | AMA_MEMORY | AMA_CONTROL }, - { "IMMEDIATE", AMA_DATA | AMA_MEMORY }, - { NULL, 0 }, - { NULL, 0 }, - { NULL, 0 }, - } - }; - - auto const* const am = &ams[mode == 0x7][(mode == 0x7) ? reg : mode]; - - if((am->flags & allowed) != allowed) - return false; - - if(mode == 0x1 && size == 1) - return false; - - if(mode < 0x7) - { - *ham_out += s("HAM<%s, %s> %s(this, %s)", bsize_to_type(size), am->name, ham_name, ( "instr_b2_b0")); - - return true; - } - else if(am->name) - { - *ham_out += s("HAM<%s, %s> %s(this)", bsize_to_type(size), am->name, ham_name); - - return true; - } - - return false; -} - -bool decode_ea_dst(unsigned allowed, int size, unsigned instr, const char* ham_name, std::string* ham_out) -{ - - const unsigned mode = (instr >> ( 6 )) & 0x7; - const unsigned reg = (instr >> ( 9 )) & 0x7; - - if(size == -1) - { - if(mode == 0) - size = 4; - else - size = 1; - } - - if((size != 0 && size != 1 && size != 2 && size != 4) || (size == 0 && allowed != AMA_CONTROL)) - return false; - - static const struct - { - const char* name; - unsigned flags; - } ams[2][0x8] = - { - { - { "DATA_REG_DIR", AMA_DATA | AMA_ALTERABLE }, - { "ADDR_REG_DIR", AMA_ALTERABLE }, - { "ADDR_REG_INDIR", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "ADDR_REG_INDIR_POST", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE }, - { "ADDR_REG_INDIR_PRE", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE }, - { "ADDR_REG_INDIR_DISP", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "ADDR_REG_INDIR_INDX", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { NULL, 0 }, - }, - { - { "ABS_SHORT", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "ABS_LONG", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "PC_DISP", AMA_DATA | AMA_MEMORY | AMA_CONTROL }, - { "PC_INDEX", AMA_DATA | AMA_MEMORY | AMA_CONTROL }, - { "IMMEDIATE", AMA_DATA | AMA_MEMORY }, - { NULL, 0 }, - { NULL, 0 }, - { NULL, 0 }, - } - }; - - auto const* const am = &ams[mode == 0x7][(mode == 0x7) ? reg : mode]; - - if((am->flags & allowed) != allowed) - return false; - - if(mode == 0x1 && size == 1) - return false; - - if(mode < 0x7) - { - *ham_out += s("HAM<%s, %s> %s(this, %s)", bsize_to_type(size), am->name, ham_name, ( "instr_b11_b9" )); - - return true; - } - else if(am->name) - { - *ham_out += s("HAM<%s, %s> %s(this)", bsize_to_type(size), am->name, ham_name); - - return true; - } - - return false; -} - - -static void PrivilegeWrap(std::string *str, bool mfsr = false) -{ - if(mfsr) - *str = s("if(!Revision_E || CheckPrivilege()) { %s }", str->c_str()); - else - *str = s("if(CheckPrivilege()) { %s }", str->c_str()); -} - -static const char* size_names[4] = { "uint8_t", "uint16_t", "uint32_t", NULL }; - -// -// Bit manipulation, MOVEP, immediate -// -static std::string Instr0(const unsigned i) -{ - std::string ret; - - if(i & 0x100) - { - // MOVEP - if((i & 0x38) == 0x08) - { - assert(ret.size() == 0); - const char* sn = (i & 0x40) ? "uint32_t" : "uint16_t"; - - /* Phase-8d: emit named MOVEP variants instead of template form. */ - { - const bool is_l = (i & 0x40) != 0; - const bool to_mem = (i & 0x80) != 0; - const char* fname = is_l - ? (to_mem ? "MOVEP_l_reg_to_mem" : "MOVEP_l_mem_to_reg") - : (to_mem ? "MOVEP_w_reg_to_mem" : "MOVEP_w_mem_to_reg"); - ret += s("%s(instr_b2_b0, instr_b11_b9);", fname); - } - } - } - else - { - static const char* op_names[8] = { "OR", "AND", "SUB", "ADD", NULL, "EOR", "CMP", NULL }; - const unsigned opi = (i >> 9) & 0x7; - const unsigned szi = (i >> 6) & 0x3; - - std::string tmp; - - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1 << szi, i, "dst", &tmp)) - { - if(op_names[opi] && size_names[szi]) - { - assert(ret.size() == 0); - ret += s("HAM<%s, IMMEDIATE> src(this); %s; %s(src, dst);", size_names[szi], tmp.c_str(), op_names[opi]); //, size_names[szi]); - } - } - // FEDCBA9876543210 FEDCBA9876543210 - if((i & 0b1111111110111111) == 0b0000000000111100) - { - assert(ret.size() == 0); - ret += s("ORI_%s();", (i & 0x40) ? "SR" : "CCR" ); - if(i & 0x40) - PrivilegeWrap(&ret); - } - - // FEDCBA9876543210 FEDCBA9876543210 - if((i & 0b1111111110111111) == 0b0000001000111100) - { - assert(ret.size() == 0); - ret += s("ANDI_%s();", (i & 0x40) ? "SR" : "CCR" ); - if(i & 0x40) - PrivilegeWrap(&ret); - } - - // FEDCBA9876543210 FEDCBA9876543210 - if((i & 0b1111111110111111) == 0b0000101000111100) - { - assert(ret.size() == 0); - ret += s("EORI_%s();", (i & 0x40) ? "SR" : "CCR" ); - if(i & 0x40) - PrivilegeWrap(&ret); - } - } - - - // bit - { - static const char* op_names[4] = { "BTST", "BCHG", "BCLR", "BSET" }; - const unsigned type = (i >> 6) & 0x3; - const unsigned allowed = AMA_DATA | ((type == 0) ? 0 : AMA_ALTERABLE); - std::string tmp; - - if(decode_ea_src(allowed, -1, i, "targ", &tmp)) - { - if(i & 0x100) // Dynamic - { - assert(ret.size() == 0); - ret += s("%s; %s(targ, D[instr_b11_b9]);", tmp.c_str(), op_names[type]); - } - else if(((i >> 9) & 0x7) == 0x4 && ((i & 0x3F) != 0x3C)) // Static - { - //printf("%04x -- %s\n", i, ret.c_str()); - assert(ret.size() == 0); - ret += s("unsigned wb = ReadOp(); %s; %s(targ, wb);", tmp.c_str(), op_names[type]); - } - } - } - - return ret; -} - - -// -// MOVEB -// -static std::string Instr1(const unsigned i) -{ - std::string ret; - std::string src_tmp; - std::string dst_tmp; - - if(decode_ea_src(AMA_DATA, 1, i, "src", &src_tmp) && decode_ea_dst(AMA_DATA | AMA_ALTERABLE, 1, i, "dst", &dst_tmp)) - { - ret += s("%s; %s; MOVE(src, dst);", src_tmp.c_str(), dst_tmp.c_str()); - } - - return ret; -} - -// -// MOVEL MOVEW -// -static std::string Instr23(const unsigned i) -{ - std::string ret; - std::string src_tmp; - std::string dst_tmp; - const unsigned size = ((i >> 12) == 0x2) ? 4 : 2; - - if(decode_ea_src(0, size, i, "src", &src_tmp)) - { - if(decode_ea_dst(AMA_DATA | AMA_ALTERABLE, size, i, "dst", &dst_tmp)) - { - ret += s("%s; %s; MOVE(src, dst);", src_tmp.c_str(), dst_tmp.c_str()); - } - else if(((i >> 6) & 0x7) == 0x1) - { - ret += s("%s; MOVEA(src, instr_b11_b9);", src_tmp.c_str()); - } - } - return ret; -} - -/* Phase-9d: form_dar_opm_ea retired into 4 named variants via - * source-fold. Suffixes map to the four distinct call-site - * argument tuples (see callers below). */ - -static std::string form_dar_opm_ea_OR_AND(const unsigned i, const char* name) -{ - - std::string ret; - const unsigned opm = (i >> 6) & 0x7; - unsigned szi; - bool to_mem; - bool is_a = false; - - if(opm == 0x3 || opm == 0x7) - { - to_mem = false; - szi = (opm & 0x4) ? 2 : 1; - is_a = true; - - return ""; - } - else - { - to_mem = (bool)(opm & 0x4); - szi = opm & 0x3; - } - - std::string tmp_ea, tmp_reg; - unsigned allowed; - - if(to_mem) - { - allowed = AMA_MEMORY | AMA_ALTERABLE; - } - else - { - allowed = AMA_DATA; - } - - if(decode_ea_src(allowed, 1U << szi, i, to_mem ? "dst" : "src", &tmp_ea)) - { - tmp_ea += "; "; - tmp_reg = s("HAM<%s, %s> %s\(this, instr_b11_b9); ", is_a ? "uint32_t" : size_names[szi], is_a ? "ADDR_REG_DIR" : "DATA_REG_DIR", to_mem ? "src" : "dst"); - - if(to_mem) - ret += tmp_reg + tmp_ea; - else - ret += tmp_ea + tmp_reg; - - ret += s("%s(src, dst);", name); - } - else if(false && ((i >> 8) & 0x1) == 0x1 && ((i >> 4) & 0x3) == 0x0) - { - const bool rm = (i >> 3) & 1; - const unsigned szi = (i >> 6) & 0x3; - - if(szi != 0x3) - { - if(rm) // address register, predecrement addressing mode - { - ret += s("HAM<%s, ADDR_REG_INDIR_PRE> src(this, instr_b2_b0); HAM<%s, ADDR_REG_INDIR_PRE> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - else - { - ret += s("HAM<%s, DATA_REG_DIR> src(this, instr_b2_b0); HAM<%s, DATA_REG_DIR> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - } - } - return ret; -} - -static std::string form_dar_opm_ea_SUB_ADD(const unsigned i, const char* name) -{ - - std::string ret; - const unsigned opm = (i >> 6) & 0x7; - unsigned szi; - bool to_mem; - bool is_a = false; - - if(opm == 0x3 || opm == 0x7) - { - to_mem = false; - szi = (opm & 0x4) ? 2 : 1; - is_a = true; - - } - else - { - to_mem = (bool)(opm & 0x4); - szi = opm & 0x3; - } - - std::string tmp_ea, tmp_reg; - unsigned allowed; - - if(to_mem) - { - allowed = AMA_MEMORY | AMA_ALTERABLE; - } - else - { - allowed = 0; - } - - if(decode_ea_src(allowed, 1U << szi, i, to_mem ? "dst" : "src", &tmp_ea)) - { - tmp_ea += "; "; - tmp_reg = s("HAM<%s, %s> %s\(this, instr_b11_b9); ", is_a ? "uint32_t" : size_names[szi], is_a ? "ADDR_REG_DIR" : "DATA_REG_DIR", to_mem ? "src" : "dst"); - - if(to_mem) - ret += tmp_reg + tmp_ea; - else - ret += tmp_ea + tmp_reg; - - ret += s("%s(src, dst);", name); - } - else if(true && ((i >> 8) & 0x1) == 0x1 && ((i >> 4) & 0x3) == 0x0) - { - const bool rm = (i >> 3) & 1; - const unsigned szi = (i >> 6) & 0x3; - - if(szi != 0x3) - { - if(rm) // address register, predecrement addressing mode - { - ret += s("HAM<%s, ADDR_REG_INDIR_PRE> src(this, instr_b2_b0); HAM<%s, ADDR_REG_INDIR_PRE> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - else - { - ret += s("HAM<%s, DATA_REG_DIR> src(this, instr_b2_b0); HAM<%s, DATA_REG_DIR> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - } - } - return ret; -} - -static std::string form_dar_opm_ea_EOR(const unsigned i, const char* name) -{ - - std::string ret; - const unsigned opm = (i >> 6) & 0x7; - unsigned szi; - bool to_mem; - bool is_a = false; - - if(opm == 0x3 || opm == 0x7) - { - to_mem = false; - szi = (opm & 0x4) ? 2 : 1; - is_a = true; - - return ""; - } - else - { - to_mem = (bool)(opm & 0x4); - szi = opm & 0x3; - } - - std::string tmp_ea, tmp_reg; - unsigned allowed; - - if(to_mem) - { - allowed = AMA_DATA | AMA_ALTERABLE; - } - else - { - allowed = AMA_DATA; - } - - if(decode_ea_src(allowed, 1U << szi, i, to_mem ? "dst" : "src", &tmp_ea)) - { - tmp_ea += "; "; - tmp_reg = s("HAM<%s, %s> %s\(this, instr_b11_b9); ", is_a ? "uint32_t" : size_names[szi], is_a ? "ADDR_REG_DIR" : "DATA_REG_DIR", to_mem ? "src" : "dst"); - - if(to_mem) - ret += tmp_reg + tmp_ea; - else - ret += tmp_ea + tmp_reg; - - ret += s("%s(src, dst);", name); - } - else if(false && ((i >> 8) & 0x1) == 0x1 && ((i >> 4) & 0x3) == 0x0) - { - const bool rm = (i >> 3) & 1; - const unsigned szi = (i >> 6) & 0x3; - - if(szi != 0x3) - { - if(rm) // address register, predecrement addressing mode - { - ret += s("HAM<%s, ADDR_REG_INDIR_PRE> src(this, instr_b2_b0); HAM<%s, ADDR_REG_INDIR_PRE> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - else - { - ret += s("HAM<%s, DATA_REG_DIR> src(this, instr_b2_b0); HAM<%s, DATA_REG_DIR> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - } - } - return ret; -} - -static std::string form_dar_opm_ea_CMP(const unsigned i, const char* name) -{ - - std::string ret; - const unsigned opm = (i >> 6) & 0x7; - unsigned szi; - bool to_mem; - bool is_a = false; - - if(opm == 0x3 || opm == 0x7) - { - to_mem = false; - szi = (opm & 0x4) ? 2 : 1; - is_a = true; - - } - else - { - to_mem = (bool)(opm & 0x4); - szi = opm & 0x3; - } - - std::string tmp_ea, tmp_reg; - unsigned allowed; - - if(to_mem) - { - allowed = AMA_MEMORY | AMA_ALTERABLE; - } - else - { - allowed = 0; - } - - if(decode_ea_src(allowed, 1U << szi, i, to_mem ? "dst" : "src", &tmp_ea)) - { - tmp_ea += "; "; - tmp_reg = s("HAM<%s, %s> %s\(this, instr_b11_b9); ", is_a ? "uint32_t" : size_names[szi], is_a ? "ADDR_REG_DIR" : "DATA_REG_DIR", to_mem ? "src" : "dst"); - - if(to_mem) - ret += tmp_reg + tmp_ea; - else - ret += tmp_ea + tmp_reg; - - ret += s("%s(src, dst);", name); - } - else if(false && ((i >> 8) & 0x1) == 0x1 && ((i >> 4) & 0x3) == 0x0) - { - const bool rm = (i >> 3) & 1; - const unsigned szi = (i >> 6) & 0x3; - - if(szi != 0x3) - { - if(rm) // address register, predecrement addressing mode - { - ret += s("HAM<%s, ADDR_REG_INDIR_PRE> src(this, instr_b2_b0); HAM<%s, ADDR_REG_INDIR_PRE> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - else - { - ret += s("HAM<%s, DATA_REG_DIR> src(this, instr_b2_b0); HAM<%s, DATA_REG_DIR> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - } - } - return ret; -} - - -static std::string form_destr_rm_srcr(const unsigned i, const char* name, const unsigned szi, const char* mem_am = "ADDR_REG_INDIR_PRE") -{ - const bool rm = (i >> 3) & 1; - std::string ret; - - if(rm) // address register, predecrement addressing mode - ret = s("HAM<%s, %s> src(this, instr_b2_b0); HAM<%s, %s> dst(this, instr_b11_b9); %s(src, dst);", size_names[szi], mem_am, size_names[szi], mem_am, name); - else - ret = s("HAM<%s, DATA_REG_DIR> src(this, instr_b2_b0); HAM<%s, DATA_REG_DIR> dst(this, instr_b11_b9); %s(src, dst);", size_names[szi], size_names[szi], name); - - return ret; -} - -static std::string Instr4(const unsigned instr) -{ - std::string ret; - std::string tmp; - - if(((instr >> 7) & 0x1F) == 0x1D && decode_ea_src(AMA_CONTROL, 0, instr, "targ", &tmp)) - { - assert(ret.size() == 0); - - ret = s("%s; %s(targ);", tmp.c_str(), (instr & 0x40) ? "JMP" : "JSR"); - } - - if(((instr >> 7) & 0x7) == 0x1 && ((instr >> 11) & 0x1) == 0x1) - { - const bool dr = (instr >> 10) & 1; // 0 = reg->mem, 1 = mem->reg - const bool sz = (instr >> 6) & 1; // 0 = word, 1 = long - std::string rls = "const uint16_t reglist = ReadOp()"; - unsigned instr_adj = instr; - - if(!dr) - { - bool pseudo_predec = false; - if(((instr_adj >> 3) & 0x7) == 4) - { - instr_adj = (instr_adj &~ (0x7 << 3)) | (0x2 << 3); - pseudo_predec = true; - } - - if(decode_ea_src(AMA_CONTROL | AMA_ALTERABLE, 2 << sz, instr_adj, "dst", &tmp)) - { - assert(ret.size() == 0); - ret += s("%s; %s; MOVEM_to_MEM(%s, reglist, dst);", rls.c_str(), tmp.c_str(), pseudo_predec ? "true" : "false"); - } - } - else - { - bool pseudo_postinc = false; - - if(((instr_adj >> 3) & 0x7) == 3) - { - instr_adj = (instr_adj &~ (0x7 << 3)) | (0x2 << 3); - pseudo_postinc = true; - } - - if(decode_ea_src(AMA_CONTROL, 2 << sz, instr_adj, "src", &tmp)) - { - assert(ret.size() == 0); - ret += s("%s; %s; MOVEM_to_REGS(%s, src, reglist);", rls.c_str(), tmp.c_str(), pseudo_postinc ? "true" : "false"); - } - } - } - - - if((instr & 0xF00) == 0x000) - { - const unsigned szi = (instr >> 6) & 0x3; - - if(szi == 0x3) - { - // MOVE from SR - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 2, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; MOVE_from_SR(dst);", tmp.c_str()); - PrivilegeWrap(&ret, true); - } - } - else - { - // NEGX - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1U << szi, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; NEGX(dst);", tmp.c_str()); - } - } - } - - - - // - // - // - { - const unsigned type = (instr >> 6) & 0x7; - - if(type == 0x6) - { - // CHK - if(decode_ea_src(AMA_DATA, 2, instr, "src", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; HAM dst(this, instr_b11_b9); CHK(src, dst);", tmp.c_str()); - } - } - else if(type == 0x7) - { - // LEA - if(decode_ea_src(AMA_CONTROL, 0, instr, "src", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; LEA(src, instr_b11_b9);", tmp.c_str()); - } - } - } - - - if((instr & 0xF00) == 0x200) - { - const unsigned szi = (instr >> 6) & 0x3; - - if(szi == 0x3) - { - - } - else - { - // CLR - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1U << szi, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; CLR(dst);", tmp.c_str()); - } - } - } - - if((instr & 0xD00) == 0x400) - { - const unsigned szi = (instr >> 6) & 0x3; - - if(szi == 0x3) - { - if(decode_ea_src(AMA_DATA, 2, instr, "src", &tmp)) - { - assert(ret.size() == 0); - - if(instr & 0x200) // MOVE to SR - { - ret += s("%s; MOVE_to_SR(src);", tmp.c_str()); - PrivilegeWrap(&ret); - } - else // MOVE to CCR - ret += s("%s; MOVE_to_CCR(src);", tmp.c_str()); - } - } - else - { - // NOT - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1U << szi, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - - if(instr & 0x200) // NOT - ret += s("%s; NOT(dst);", tmp.c_str()); - else // NEG - ret += s("%s; NEG(dst);", tmp.c_str()); - } - } - } - - // NBCD - if((instr & 0xFC0) == 0x800 && decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; NBCD(dst);", tmp.c_str()); - } - - // SWAP - if((instr & 0xFF8) == 0x840) - { - assert(ret.size() == 0); - - ret += s("SWAP(instr_b2_b0);"); - } - - // PEA - if((instr & 0xFC0) == 0x840 && decode_ea_src(AMA_CONTROL, 0, instr, "src", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; PEA(src);", tmp.c_str()); - } - - // EXT - if(((instr >> 9) & 0x7) == 0x4 && ((instr >> 3) & 0x7) == 0x0) - { - const unsigned type = (instr >> 6) & 0x7; - - if(type == 0x2 || type == 0x3) - { - assert(ret.size() == 0); - - ret = s("HAM<%s, DATA_REG_DIR> dst(this, instr_b2_b0); EXT(dst);", (type & 0x1) ? "uint32_t" : "uint16_t"); - } - } - - - // MOVEM EA to Regs - // TODO! - if((instr & 0xF80) == 0xC80) - { - //assert(ret.size() == 0); - } - - // MOVEM regs to EA - // TODO! - if((instr & 0xF80) == 0x880) - { - //assert(ret.size() == 0); - } - - if((instr & 0xF00) == 0xA00) - { - const unsigned szi = (instr >> 6) & 0x3; - - if(szi == 0x3) - { - // TAS - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - ret += s("%s; TAS(dst);", tmp.c_str()); - } - } - else - { - // TST - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1U << szi, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - ret += s("%s; TST(dst);", tmp.c_str()); - } - } - } - - - // TRAP - if((instr & 0xFF0) == 0xE40) - { - assert(ret.size() == 0); - ret += s("TRAP(instr & 0xF);"); - } - - // MOVE to/from USP - if((instr & 0xFF0) == 0xE60) - { - assert(ret.size() == 0); - ret += s("MOVE_USP(%s, instr_b2_b0);", (instr & 0x8) ? "true" : "false"); - PrivilegeWrap(&ret); - } - - // LINK - if((instr & 0xFF8) == 0xE50) - { - assert(ret.size() == 0); - ret += s("LINK(instr_b2_b0);"); - } - - // UNLK - if((instr & 0xFF8) == 0xE58) - { - assert(ret.size() == 0); - ret += s("UNLK(instr_b2_b0);"); - } - - // RTR - // FEDCBA9876543210 - if(instr == 0b0100111001110111) - { - assert(ret.size() == 0); - ret += s("RTR();"); - } - - - // TRAPV - // FEDCBA9876543210 - if(instr == 0b0100111001110110) - { - assert(ret.size() == 0); - ret += s("TRAPV();"); - } - - // RTS - // FEDCBA9876543210 - if(instr == 0b0100111001110101) - { - assert(ret.size() == 0); - ret += s("RTS();"); - } - - // RTE - // FEDCBA9876543210 - if(instr == 0b0100111001110011) - { - assert(ret.size() == 0); - ret += s("RTE();"); - PrivilegeWrap(&ret); - } - - // STOP - // FEDCBA9876543210 - if(instr == 0b0100111001110010) - { - assert(ret.size() == 0); - ret += s("STOP();"); - PrivilegeWrap(&ret); - } - - // NOP - // FEDCBA9876543210 - if(instr == 0b0100111001110001) - { - assert(ret.size() == 0); - ret += s("NOP();"); - } - - // RESET - // FEDCBA9876543210 - if(instr == 0b0100111001110000) - { - assert(ret.size() == 0); - ret += s("RESET();"); - PrivilegeWrap(&ret); - } - - - return ret; -} - -static std::string Instr5(const unsigned i) -{ - std::string ret; - std::string tmp; - const unsigned szi = (i >> 6) & 0x3; - - if(szi == 0x3) - { - if(((i >> 3) & 0x7) == 0x1) // DBcc - { - ret = s("DBcc<0x%02x>(instr_b2_b0);", (i >> 8) & 0xF); - } - else if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1, i, "dst", &tmp)) // Scc - { - ret = s("%s; Scc<0x%02x>(dst);", tmp.c_str(), (i >> 8) & 0xF); - } - } - else if(decode_ea_src(AMA_ALTERABLE, (((i >> 3) & 0x7) == 1) ? 4 : (1U << szi), i, "dst", &tmp)) // ADDQ and SUBQ - { - ret = s("HAM<%s, IMMEDIATE> src(this, instr_b11_b9 ? instr_b11_b9 : 8); ", size_names[szi]); - ret += s("%s; %s(src, dst);", tmp.c_str(), (i & 0x100) ? "SUB" : "ADD"); - } - - - return ret; -} - -static std::string Instr6(const unsigned i) -{ - return s("Bxx<0x%02x>((int8_t)instr);", (i >> 8) & 0xF); -} - -static std::string Instr7(const unsigned i) -{ - std::string ret; - - // MOVEQ - if(((i >> 8) & 0x1) == 0x0) - { - ret = s("HAM src(this, (int8_t)instr); HAM dst(this, instr_b11_b9); MOVE(src, dst);"); - } - - return ret; -} - -static std::string Instr8(const unsigned i) -{ - std::string ret; - - ret = form_dar_opm_ea_OR_AND(i, "OR"); - - // DIVU/DIVS - if(((i >> 6) & 0x3) == 0x3) - { - std::string tmp; - - if(decode_ea_src(AMA_DATA, 2, i, "src", &tmp)) - { - assert(ret.size() == 0); - - ret = s("%s; DIV%s(src, instr_b11_b9);", tmp.c_str(), ((i >> 8) & 1) ? "S" : "U"); - } - } - else if(((i >> 4) & 0x1F) == 0x10) // SBCD - { - assert(ret.size() == 0); - - ret = form_destr_rm_srcr(i, "SBCD", 0); - } - return ret; -} - -static std::string Instr9(const unsigned i) -{ - return form_dar_opm_ea_SUB_ADD(i, "SUB"); -} - -static std::string InstrB(const unsigned i) -{ - std::string ret; - - switch((i >> 6) & 0x7) - { - case 0x4: case 0x5: case 0x6: - ret = form_dar_opm_ea_EOR(i, "EOR"); - break; - - case 0x0: case 0x1: case 0x2: case 0x3: case 0x7: - ret = form_dar_opm_ea_CMP(i, "CMP"); - break; - } - - // CMPM - if(((i >> 8) & 0x1) == 0x1 && ((i >> 3) & 0x7) == 0x1) - { - const unsigned szi = (i >> 6) & 0x3; - - if(szi != 0x3) - { - assert(ret.size() == 0); - ret = form_destr_rm_srcr(i, "CMP", szi, "ADDR_REG_INDIR_POST"); - } - } - - return ret; -} - -static std::string InstrC(const unsigned i) -{ - std::string ret; - - ret = form_dar_opm_ea_OR_AND(i, "AND"); - - if(((i >> 4) & 0x1F) == 0x10) // ABCD - { - assert(ret.size() == 0); - - ret = form_destr_rm_srcr(i, "ABCD", 0); - } - - // MULU/MULS - if(((i >> 6) & 0x3) == 0x3) - { - std::string tmp; - - if(decode_ea_src(AMA_DATA, 2, i, "src", &tmp)) - { - assert(ret.size() == 0); - - ret = s("%s; MUL%s(src, instr_b11_b9);", tmp.c_str(), ((i >> 8) & 1) ? "S" : "U"); - } - } - - { - const unsigned exgm = (i >> 3) & 0x3F; - switch(exgm) - { - case 0x28: - assert(ret.size() == 0); - ret = s("EXG(&D[instr_b11_b9], &D[instr_b2_b0]); /* EXG Dx, Dy */"); - break; - - case 0x29: - assert(ret.size() == 0); - ret = s("EXG(&A[instr_b11_b9], &A[instr_b2_b0]); /* EXG Ax, Ay */"); - break; - - case 0x31: - assert(ret.size() == 0); - ret = s("EXG(&D[instr_b11_b9], &A[instr_b2_b0]); /* EXG Dx, Ay */"); - break; - } - } - - return ret; -} - -static std::string InstrD(const unsigned i) -{ - return form_dar_opm_ea_SUB_ADD(i, "ADD"); -} - -static std::string InstrE(const unsigned i) -{ - std::string ret; - - const unsigned szi = (i >> 6) & 0x3; - static const char* op_bases[4] = { "AS", "LS", "ROX", "RO" }; - static const char* op_suffixes[2] = { "R", "L" }; - const bool dr = (i >> 8) & 1; // Direction, 0=right, 1=left - - if(szi == 0x3) - { - if(((i >> 11) & 1) == 0) - { - std::string tmp; - - if(decode_ea_src(AMA_MEMORY | AMA_ALTERABLE, 2, i, "targ", &tmp)) - { - const unsigned type = (i >> 9) & 0x3; - - ret += s("%s; %s%s(targ, 1);", tmp.c_str(), op_bases[type], op_suffixes[dr]); - } - } - } - else - { - const unsigned type = (i >> 3) & 0x3; - const bool lr = (i >> 5) & 1; - std::string cnt; - - if(lr) - cnt = s("D[instr_b11_b9]"); - else - cnt = s("instr_b11_b9 ? instr_b11_b9 : 8"); - - ret += s("HAM<%s, DATA_REG_DIR> targ(this, instr_b2_b0); %s%s(targ, %s);", size_names[szi], op_bases[type], op_suffixes[dr], cnt.c_str()); - } - - return ret; -} - -static std::string InstrA(const unsigned instr) -{ - return "LINEA();"; -} - -static std::string InstrF(const unsigned instr) -{ - return "LINEF();"; -} - - -int main() -{ - std::map> bm; - - for(unsigned i = 0; i < 65536; i++) - { - std::string body; - - switch(i >> 12) - { - case 0x0: body += Instr0(i); break; - case 0x1: body += Instr1(i); break; - case 0x2: - case 0x3: body += Instr23(i); break; - case 0x4: body += Instr4(i); break; - case 0x5: body += Instr5(i); break; - case 0x6: body += Instr6(i); break; - case 0x7: body += Instr7(i); break; - case 0x8: body += Instr8(i); break; - case 0x9: body += Instr9(i); break; - case 0xA: body += InstrA(i); break; - case 0xB: body += InstrB(i); break; - case 0xC: body += InstrC(i); break; - case 0xD: body += InstrD(i); break; - case 0xE: body += InstrE(i); break; - case 0xF: body += InstrF(i); break; - } - if(body.size() > 0) - bm[body].push_back(i); - } - - for(auto const& bme : bm) - { - for(auto const& ve : bme.second) - { - printf("case 0x%04x:\n", ve); - } - - printf("\t{\n\t %s\n\t}\n\tbreak;\n\n", bme.first.c_str()); - } -} diff --git a/mednafen/hw_cpu/m68k/gen_split.cpp b/mednafen/hw_cpu/m68k/gen_split.cpp deleted file mode 100644 index 3ea75602..00000000 --- a/mednafen/hw_cpu/m68k/gen_split.cpp +++ /dev/null @@ -1,1295 +0,0 @@ -/******************************************************************************/ -/* Mednafen - Multi-system Emulator */ -/******************************************************************************/ -/* gen.cpp - Table Generator for Motorola 68000 CPU Emulator -** Copyright (C) 2015-2016 Mednafen Team -** -** This program is free software; you can redistribute it and/or -** modify it under the terms of the GNU General Public License -** as published by the Free Software Foundation; either version 2 -** of the License, or (at your option) any later version. -** -** This program is distributed in the hope that it will be useful, -** but WITHOUT ANY WARRANTY; without even the implied warranty of -** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -** GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License -** along with this program; if not, write to the Free Software Foundation, Inc., -** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -*/ - -// g++ -std=gnu++14 -Wall -O2 -o gen_split gen_split.cpp -// ./gen_split m68k_instr_split%d.inc 2 - -#include -#include -#include -#include -#include - -#include -#include -#include - -static std::string s(const char* format, ...) -{ - static char buf[65536]; - va_list ap; - - va_start(ap, format); - vsnprintf(buf, sizeof(buf), format, ap); - va_end(ap); - - return std::string(buf); -} - -/* - DATA_REG_DIR, - ADDR_REG_DIR, - - ADDR_REG_INDIR, - ADDR_REG_INDIR_POST, - ADDR_REG_INDIR_PRE, - - ADDR_REG_INDIR_DISP, - - ADDR_REG_INDIR_INDX, - - ABS_SHORT, - ABS_LONG, - - PC_DISP, - PC_INDEX, - - IMMEDIATE -*/ -enum -{ - AMA_DATA = 0x0001, - AMA_MEMORY = 0x0002, - AMA_CONTROL = 0x0004, - AMA_ALTERABLE = 0x0008, -}; - -static const char* bsize_to_type(unsigned size) -{ - if(size == 0) - return "std::tuple<>"; - - if(size == 1) - return "uint8_t"; - - if(size == 2) - return "uint16_t"; - - if(size == 4) - return "uint32_t"; - - return NULL; -} - -/* Phase-9d: decode_ea retired into - * decode_ea_src (move_dest=false) and decode_ea_dst - * (move_dest=true) via source-fold. */ - -bool decode_ea_src(unsigned allowed, int size, unsigned instr, const char* ham_name, std::string* ham_out) -{ - - const unsigned mode = (instr >> ( 3)) & 0x7; - const unsigned reg = (instr >> ( 0)) & 0x7; - - if(size == -1) - { - if(mode == 0) - size = 4; - else - size = 1; - } - - if((size != 0 && size != 1 && size != 2 && size != 4) || (size == 0 && allowed != AMA_CONTROL)) - return false; - - static const struct - { - const char* name; - unsigned flags; - } ams[2][0x8] = - { - { - { "DATA_REG_DIR", AMA_DATA | AMA_ALTERABLE }, - { "ADDR_REG_DIR", AMA_ALTERABLE }, - { "ADDR_REG_INDIR", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "ADDR_REG_INDIR_POST", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE }, - { "ADDR_REG_INDIR_PRE", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE }, - { "ADDR_REG_INDIR_DISP", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "ADDR_REG_INDIR_INDX", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { NULL, 0 }, - }, - { - { "ABS_SHORT", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "ABS_LONG", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "PC_DISP", AMA_DATA | AMA_MEMORY | AMA_CONTROL }, - { "PC_INDEX", AMA_DATA | AMA_MEMORY | AMA_CONTROL }, - { "IMMEDIATE", AMA_DATA | AMA_MEMORY }, - { NULL, 0 }, - { NULL, 0 }, - { NULL, 0 }, - } - }; - - auto const* const am = &ams[mode == 0x7][(mode == 0x7) ? reg : mode]; - - if((am->flags & allowed) != allowed) - return false; - - if(mode == 0x1 && size == 1) - return false; - - if(mode < 0x7) - { - *ham_out += s("HAM<%s, %s> %s(this, %s)", bsize_to_type(size), am->name, ham_name, ( "instr_b2_b0")); - - return true; - } - else if(am->name) - { - *ham_out += s("HAM<%s, %s> %s(this)", bsize_to_type(size), am->name, ham_name); - - return true; - } - - return false; -} - -bool decode_ea_dst(unsigned allowed, int size, unsigned instr, const char* ham_name, std::string* ham_out) -{ - - const unsigned mode = (instr >> ( 6 )) & 0x7; - const unsigned reg = (instr >> ( 9 )) & 0x7; - - if(size == -1) - { - if(mode == 0) - size = 4; - else - size = 1; - } - - if((size != 0 && size != 1 && size != 2 && size != 4) || (size == 0 && allowed != AMA_CONTROL)) - return false; - - static const struct - { - const char* name; - unsigned flags; - } ams[2][0x8] = - { - { - { "DATA_REG_DIR", AMA_DATA | AMA_ALTERABLE }, - { "ADDR_REG_DIR", AMA_ALTERABLE }, - { "ADDR_REG_INDIR", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "ADDR_REG_INDIR_POST", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE }, - { "ADDR_REG_INDIR_PRE", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE }, - { "ADDR_REG_INDIR_DISP", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "ADDR_REG_INDIR_INDX", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { NULL, 0 }, - }, - { - { "ABS_SHORT", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "ABS_LONG", AMA_DATA | AMA_MEMORY | AMA_ALTERABLE | AMA_CONTROL }, - { "PC_DISP", AMA_DATA | AMA_MEMORY | AMA_CONTROL }, - { "PC_INDEX", AMA_DATA | AMA_MEMORY | AMA_CONTROL }, - { "IMMEDIATE", AMA_DATA | AMA_MEMORY }, - { NULL, 0 }, - { NULL, 0 }, - { NULL, 0 }, - } - }; - - auto const* const am = &ams[mode == 0x7][(mode == 0x7) ? reg : mode]; - - if((am->flags & allowed) != allowed) - return false; - - if(mode == 0x1 && size == 1) - return false; - - if(mode < 0x7) - { - *ham_out += s("HAM<%s, %s> %s(this, %s)", bsize_to_type(size), am->name, ham_name, ( "instr_b11_b9" )); - - return true; - } - else if(am->name) - { - *ham_out += s("HAM<%s, %s> %s(this)", bsize_to_type(size), am->name, ham_name); - - return true; - } - - return false; -} - - -static void PrivilegeWrap(std::string *str, bool mfsr = false) -{ - if(mfsr) - *str = s("if(!Revision_E || CheckPrivilege()) { %s }", str->c_str()); - else - *str = s("if(CheckPrivilege()) { %s }", str->c_str()); -} - -static const char* size_names[4] = { "uint8_t", "uint16_t", "uint32_t", NULL }; - -// -// Bit manipulation, MOVEP, immediate -// -static std::string Instr0(const unsigned i) -{ - std::string ret; - - if(i & 0x100) - { - // MOVEP - if((i & 0x38) == 0x08) - { - assert(ret.size() == 0); - const char* sn = (i & 0x40) ? "uint32_t" : "uint16_t"; - - /* Phase-8d: emit named MOVEP variants instead of template form. */ - { - const bool is_l = (i & 0x40) != 0; - const bool to_mem = (i & 0x80) != 0; - const char* fname = is_l - ? (to_mem ? "MOVEP_l_reg_to_mem" : "MOVEP_l_mem_to_reg") - : (to_mem ? "MOVEP_w_reg_to_mem" : "MOVEP_w_mem_to_reg"); - ret += s("%s(instr_b2_b0, instr_b11_b9);", fname); - } - } - } - else - { - static const char* op_names[8] = { "OR", "AND", "SUB", "ADD", NULL, "EOR", "CMP", NULL }; - const unsigned opi = (i >> 9) & 0x7; - const unsigned szi = (i >> 6) & 0x3; - - std::string tmp; - - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1 << szi, i, "dst", &tmp)) - { - if(op_names[opi] && size_names[szi]) - { - assert(ret.size() == 0); - ret += s("HAM<%s, IMMEDIATE> src(this); %s; %s(src, dst);", size_names[szi], tmp.c_str(), op_names[opi]); //, size_names[szi]); - } - } - // FEDCBA9876543210 FEDCBA9876543210 - if((i & 0b1111111110111111) == 0b0000000000111100) - { - assert(ret.size() == 0); - ret += s("ORI_%s();", (i & 0x40) ? "SR" : "CCR" ); - if(i & 0x40) - PrivilegeWrap(&ret); - } - - // FEDCBA9876543210 FEDCBA9876543210 - if((i & 0b1111111110111111) == 0b0000001000111100) - { - assert(ret.size() == 0); - ret += s("ANDI_%s();", (i & 0x40) ? "SR" : "CCR" ); - if(i & 0x40) - PrivilegeWrap(&ret); - } - - // FEDCBA9876543210 FEDCBA9876543210 - if((i & 0b1111111110111111) == 0b0000101000111100) - { - assert(ret.size() == 0); - ret += s("EORI_%s();", (i & 0x40) ? "SR" : "CCR" ); - if(i & 0x40) - PrivilegeWrap(&ret); - } - } - - - // bit - { - static const char* op_names[4] = { "BTST", "BCHG", "BCLR", "BSET" }; - const unsigned type = (i >> 6) & 0x3; - const unsigned allowed = AMA_DATA | ((type == 0) ? 0 : AMA_ALTERABLE); - std::string tmp; - - if(decode_ea_src(allowed, -1, i, "targ", &tmp)) - { - if(i & 0x100) // Dynamic - { - assert(ret.size() == 0); - ret += s("%s; %s(targ, D[instr_b11_b9]);", tmp.c_str(), op_names[type]); - } - else if(((i >> 9) & 0x7) == 0x4 && ((i & 0x3F) != 0x3C)) // Static - { - //printf("%04x -- %s\n", i, ret.c_str()); - assert(ret.size() == 0); - ret += s("unsigned wb = ReadOp(); %s; %s(targ, wb);", tmp.c_str(), op_names[type]); - } - } - } - - return ret; -} - - -// -// MOVEB -// -static std::string Instr1(const unsigned i) -{ - std::string ret; - std::string src_tmp; - std::string dst_tmp; - - if(decode_ea_src(AMA_DATA, 1, i, "src", &src_tmp) && decode_ea_dst(AMA_DATA | AMA_ALTERABLE, 1, i, "dst", &dst_tmp)) - { - ret += s("%s; %s; MOVE(src, dst);", src_tmp.c_str(), dst_tmp.c_str()); - } - - return ret; -} - -// -// MOVEL MOVEW -// -static std::string Instr23(const unsigned i) -{ - std::string ret; - std::string src_tmp; - std::string dst_tmp; - const unsigned size = ((i >> 12) == 0x2) ? 4 : 2; - - if(decode_ea_src(0, size, i, "src", &src_tmp)) - { - if(decode_ea_dst(AMA_DATA | AMA_ALTERABLE, size, i, "dst", &dst_tmp)) - { - ret += s("%s; %s; MOVE(src, dst);", src_tmp.c_str(), dst_tmp.c_str()); - } - else if(((i >> 6) & 0x7) == 0x1) - { - ret += s("%s; MOVEA(src, instr_b11_b9);", src_tmp.c_str()); - } - } - return ret; -} - -/* Phase-9d: form_dar_opm_ea retired into 4 named variants via - * source-fold. Suffixes map to the four distinct call-site - * argument tuples (see callers below). */ - -static std::string form_dar_opm_ea_OR_AND(const unsigned i, const char* name) -{ - - std::string ret; - const unsigned opm = (i >> 6) & 0x7; - unsigned szi; - bool to_mem; - bool is_a = false; - - if(opm == 0x3 || opm == 0x7) - { - to_mem = false; - szi = (opm & 0x4) ? 2 : 1; - is_a = true; - - return ""; - } - else - { - to_mem = (bool)(opm & 0x4); - szi = opm & 0x3; - } - - std::string tmp_ea, tmp_reg; - unsigned allowed; - - if(to_mem) - { - allowed = AMA_MEMORY | AMA_ALTERABLE; - } - else - { - allowed = AMA_DATA; - } - - if(decode_ea_src(allowed, 1U << szi, i, to_mem ? "dst" : "src", &tmp_ea)) - { - tmp_ea += "; "; - tmp_reg = s("HAM<%s, %s> %s\(this, instr_b11_b9); ", is_a ? "uint32_t" : size_names[szi], is_a ? "ADDR_REG_DIR" : "DATA_REG_DIR", to_mem ? "src" : "dst"); - - if(to_mem) - ret += tmp_reg + tmp_ea; - else - ret += tmp_ea + tmp_reg; - - ret += s("%s(src, dst);", name); - } - else if(false && ((i >> 8) & 0x1) == 0x1 && ((i >> 4) & 0x3) == 0x0) - { - const bool rm = (i >> 3) & 1; - const unsigned szi = (i >> 6) & 0x3; - - if(szi != 0x3) - { - if(rm) // address register, predecrement addressing mode - { - ret += s("HAM<%s, ADDR_REG_INDIR_PRE> src(this, instr_b2_b0); HAM<%s, ADDR_REG_INDIR_PRE> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - else - { - ret += s("HAM<%s, DATA_REG_DIR> src(this, instr_b2_b0); HAM<%s, DATA_REG_DIR> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - } - } - return ret; -} - -static std::string form_dar_opm_ea_SUB_ADD(const unsigned i, const char* name) -{ - - std::string ret; - const unsigned opm = (i >> 6) & 0x7; - unsigned szi; - bool to_mem; - bool is_a = false; - - if(opm == 0x3 || opm == 0x7) - { - to_mem = false; - szi = (opm & 0x4) ? 2 : 1; - is_a = true; - - } - else - { - to_mem = (bool)(opm & 0x4); - szi = opm & 0x3; - } - - std::string tmp_ea, tmp_reg; - unsigned allowed; - - if(to_mem) - { - allowed = AMA_MEMORY | AMA_ALTERABLE; - } - else - { - allowed = 0; - } - - if(decode_ea_src(allowed, 1U << szi, i, to_mem ? "dst" : "src", &tmp_ea)) - { - tmp_ea += "; "; - tmp_reg = s("HAM<%s, %s> %s\(this, instr_b11_b9); ", is_a ? "uint32_t" : size_names[szi], is_a ? "ADDR_REG_DIR" : "DATA_REG_DIR", to_mem ? "src" : "dst"); - - if(to_mem) - ret += tmp_reg + tmp_ea; - else - ret += tmp_ea + tmp_reg; - - ret += s("%s(src, dst);", name); - } - else if(true && ((i >> 8) & 0x1) == 0x1 && ((i >> 4) & 0x3) == 0x0) - { - const bool rm = (i >> 3) & 1; - const unsigned szi = (i >> 6) & 0x3; - - if(szi != 0x3) - { - if(rm) // address register, predecrement addressing mode - { - ret += s("HAM<%s, ADDR_REG_INDIR_PRE> src(this, instr_b2_b0); HAM<%s, ADDR_REG_INDIR_PRE> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - else - { - ret += s("HAM<%s, DATA_REG_DIR> src(this, instr_b2_b0); HAM<%s, DATA_REG_DIR> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - } - } - return ret; -} - -static std::string form_dar_opm_ea_EOR(const unsigned i, const char* name) -{ - - std::string ret; - const unsigned opm = (i >> 6) & 0x7; - unsigned szi; - bool to_mem; - bool is_a = false; - - if(opm == 0x3 || opm == 0x7) - { - to_mem = false; - szi = (opm & 0x4) ? 2 : 1; - is_a = true; - - return ""; - } - else - { - to_mem = (bool)(opm & 0x4); - szi = opm & 0x3; - } - - std::string tmp_ea, tmp_reg; - unsigned allowed; - - if(to_mem) - { - allowed = AMA_DATA | AMA_ALTERABLE; - } - else - { - allowed = AMA_DATA; - } - - if(decode_ea_src(allowed, 1U << szi, i, to_mem ? "dst" : "src", &tmp_ea)) - { - tmp_ea += "; "; - tmp_reg = s("HAM<%s, %s> %s\(this, instr_b11_b9); ", is_a ? "uint32_t" : size_names[szi], is_a ? "ADDR_REG_DIR" : "DATA_REG_DIR", to_mem ? "src" : "dst"); - - if(to_mem) - ret += tmp_reg + tmp_ea; - else - ret += tmp_ea + tmp_reg; - - ret += s("%s(src, dst);", name); - } - else if(false && ((i >> 8) & 0x1) == 0x1 && ((i >> 4) & 0x3) == 0x0) - { - const bool rm = (i >> 3) & 1; - const unsigned szi = (i >> 6) & 0x3; - - if(szi != 0x3) - { - if(rm) // address register, predecrement addressing mode - { - ret += s("HAM<%s, ADDR_REG_INDIR_PRE> src(this, instr_b2_b0); HAM<%s, ADDR_REG_INDIR_PRE> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - else - { - ret += s("HAM<%s, DATA_REG_DIR> src(this, instr_b2_b0); HAM<%s, DATA_REG_DIR> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - } - } - return ret; -} - -static std::string form_dar_opm_ea_CMP(const unsigned i, const char* name) -{ - - std::string ret; - const unsigned opm = (i >> 6) & 0x7; - unsigned szi; - bool to_mem; - bool is_a = false; - - if(opm == 0x3 || opm == 0x7) - { - to_mem = false; - szi = (opm & 0x4) ? 2 : 1; - is_a = true; - - } - else - { - to_mem = (bool)(opm & 0x4); - szi = opm & 0x3; - } - - std::string tmp_ea, tmp_reg; - unsigned allowed; - - if(to_mem) - { - allowed = AMA_MEMORY | AMA_ALTERABLE; - } - else - { - allowed = 0; - } - - if(decode_ea_src(allowed, 1U << szi, i, to_mem ? "dst" : "src", &tmp_ea)) - { - tmp_ea += "; "; - tmp_reg = s("HAM<%s, %s> %s\(this, instr_b11_b9); ", is_a ? "uint32_t" : size_names[szi], is_a ? "ADDR_REG_DIR" : "DATA_REG_DIR", to_mem ? "src" : "dst"); - - if(to_mem) - ret += tmp_reg + tmp_ea; - else - ret += tmp_ea + tmp_reg; - - ret += s("%s(src, dst);", name); - } - else if(false && ((i >> 8) & 0x1) == 0x1 && ((i >> 4) & 0x3) == 0x0) - { - const bool rm = (i >> 3) & 1; - const unsigned szi = (i >> 6) & 0x3; - - if(szi != 0x3) - { - if(rm) // address register, predecrement addressing mode - { - ret += s("HAM<%s, ADDR_REG_INDIR_PRE> src(this, instr_b2_b0); HAM<%s, ADDR_REG_INDIR_PRE> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - else - { - ret += s("HAM<%s, DATA_REG_DIR> src(this, instr_b2_b0); HAM<%s, DATA_REG_DIR> dst(this, instr_b11_b9); %sX(src, dst);", size_names[szi], size_names[szi], name); - } - } - } - return ret; -} - - -static std::string form_destr_rm_srcr(const unsigned i, const char* name, const unsigned szi, const char* mem_am = "ADDR_REG_INDIR_PRE") -{ - const bool rm = (i >> 3) & 1; - std::string ret; - - if(rm) // address register, predecrement addressing mode - ret = s("HAM<%s, %s> src(this, instr_b2_b0); HAM<%s, %s> dst(this, instr_b11_b9); %s(src, dst);", size_names[szi], mem_am, size_names[szi], mem_am, name); - else - ret = s("HAM<%s, DATA_REG_DIR> src(this, instr_b2_b0); HAM<%s, DATA_REG_DIR> dst(this, instr_b11_b9); %s(src, dst);", size_names[szi], size_names[szi], name); - - return ret; -} - -static std::string Instr4(const unsigned instr) -{ - std::string ret; - std::string tmp; - - if(((instr >> 7) & 0x1F) == 0x1D && decode_ea_src(AMA_CONTROL, 0, instr, "targ", &tmp)) - { - assert(ret.size() == 0); - - ret = s("%s; %s(targ);", tmp.c_str(), (instr & 0x40) ? "JMP" : "JSR"); - } - - if(((instr >> 7) & 0x7) == 0x1 && ((instr >> 11) & 0x1) == 0x1) - { - const bool dr = (instr >> 10) & 1; // 0 = reg->mem, 1 = mem->reg - const bool sz = (instr >> 6) & 1; // 0 = word, 1 = long - std::string rls = "const uint16_t reglist = ReadOp()"; - unsigned instr_adj = instr; - - if(!dr) - { - bool pseudo_predec = false; - if(((instr_adj >> 3) & 0x7) == 4) - { - instr_adj = (instr_adj &~ (0x7 << 3)) | (0x2 << 3); - pseudo_predec = true; - } - - if(decode_ea_src(AMA_CONTROL | AMA_ALTERABLE, 2 << sz, instr_adj, "dst", &tmp)) - { - assert(ret.size() == 0); - ret += s("%s; %s; MOVEM_to_MEM(%s, reglist, dst);", rls.c_str(), tmp.c_str(), pseudo_predec ? "true" : "false"); - } - } - else - { - bool pseudo_postinc = false; - - if(((instr_adj >> 3) & 0x7) == 3) - { - instr_adj = (instr_adj &~ (0x7 << 3)) | (0x2 << 3); - pseudo_postinc = true; - } - - if(decode_ea_src(AMA_CONTROL, 2 << sz, instr_adj, "src", &tmp)) - { - assert(ret.size() == 0); - ret += s("%s; %s; MOVEM_to_REGS(%s, src, reglist);", rls.c_str(), tmp.c_str(), pseudo_postinc ? "true" : "false"); - } - } - } - - - if((instr & 0xF00) == 0x000) - { - const unsigned szi = (instr >> 6) & 0x3; - - if(szi == 0x3) - { - // MOVE from SR - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 2, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; MOVE_from_SR(dst);", tmp.c_str()); - PrivilegeWrap(&ret, true); - } - } - else - { - // NEGX - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1U << szi, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; NEGX(dst);", tmp.c_str()); - } - } - } - - - - // - // - // - { - const unsigned type = (instr >> 6) & 0x7; - - if(type == 0x6) - { - // CHK - if(decode_ea_src(AMA_DATA, 2, instr, "src", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; HAM dst(this, instr_b11_b9); CHK(src, dst);", tmp.c_str()); - } - } - else if(type == 0x7) - { - // LEA - if(decode_ea_src(AMA_CONTROL, 0, instr, "src", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; LEA(src, instr_b11_b9);", tmp.c_str()); - } - } - } - - - if((instr & 0xF00) == 0x200) - { - const unsigned szi = (instr >> 6) & 0x3; - - if(szi == 0x3) - { - - } - else - { - // CLR - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1U << szi, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; CLR(dst);", tmp.c_str()); - } - } - } - - if((instr & 0xD00) == 0x400) - { - const unsigned szi = (instr >> 6) & 0x3; - - if(szi == 0x3) - { - if(decode_ea_src(AMA_DATA, 2, instr, "src", &tmp)) - { - assert(ret.size() == 0); - - if(instr & 0x200) // MOVE to SR - { - ret += s("%s; MOVE_to_SR(src);", tmp.c_str()); - PrivilegeWrap(&ret); - } - else // MOVE to CCR - ret += s("%s; MOVE_to_CCR(src);", tmp.c_str()); - } - } - else - { - // NOT - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1U << szi, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - - if(instr & 0x200) // NOT - ret += s("%s; NOT(dst);", tmp.c_str()); - else // NEG - ret += s("%s; NEG(dst);", tmp.c_str()); - } - } - } - - // NBCD - if((instr & 0xFC0) == 0x800 && decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; NBCD(dst);", tmp.c_str()); - } - - // SWAP - if((instr & 0xFF8) == 0x840) - { - assert(ret.size() == 0); - - ret += s("SWAP(instr_b2_b0);"); - } - - // PEA - if((instr & 0xFC0) == 0x840 && decode_ea_src(AMA_CONTROL, 0, instr, "src", &tmp)) - { - assert(ret.size() == 0); - - ret += s("%s; PEA(src);", tmp.c_str()); - } - - // EXT - if(((instr >> 9) & 0x7) == 0x4 && ((instr >> 3) & 0x7) == 0x0) - { - const unsigned type = (instr >> 6) & 0x7; - - if(type == 0x2 || type == 0x3) - { - assert(ret.size() == 0); - - ret = s("HAM<%s, DATA_REG_DIR> dst(this, instr_b2_b0); EXT(dst);", (type & 0x1) ? "uint32_t" : "uint16_t"); - } - } - - - // MOVEM EA to Regs - // TODO! - if((instr & 0xF80) == 0xC80) - { - //assert(ret.size() == 0); - } - - // MOVEM regs to EA - // TODO! - if((instr & 0xF80) == 0x880) - { - //assert(ret.size() == 0); - } - - if((instr & 0xF00) == 0xA00) - { - const unsigned szi = (instr >> 6) & 0x3; - - if(szi == 0x3) - { - // TAS - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - ret += s("%s; TAS(dst);", tmp.c_str()); - } - } - else - { - // TST - if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1U << szi, instr, "dst", &tmp)) - { - assert(ret.size() == 0); - ret += s("%s; TST(dst);", tmp.c_str()); - } - } - } - - - // TRAP - if((instr & 0xFF0) == 0xE40) - { - assert(ret.size() == 0); - ret += s("TRAP(instr & 0xF);"); - } - - // MOVE to/from USP - if((instr & 0xFF0) == 0xE60) - { - assert(ret.size() == 0); - ret += s("MOVE_USP(%s, instr_b2_b0);", (instr & 0x8) ? "true" : "false"); - PrivilegeWrap(&ret); - } - - // LINK - if((instr & 0xFF8) == 0xE50) - { - assert(ret.size() == 0); - ret += s("LINK(instr_b2_b0);"); - } - - // UNLK - if((instr & 0xFF8) == 0xE58) - { - assert(ret.size() == 0); - ret += s("UNLK(instr_b2_b0);"); - } - - // RTR - // FEDCBA9876543210 - if(instr == 0b0100111001110111) - { - assert(ret.size() == 0); - ret += s("RTR();"); - } - - - // TRAPV - // FEDCBA9876543210 - if(instr == 0b0100111001110110) - { - assert(ret.size() == 0); - ret += s("TRAPV();"); - } - - // RTS - // FEDCBA9876543210 - if(instr == 0b0100111001110101) - { - assert(ret.size() == 0); - ret += s("RTS();"); - } - - // RTE - // FEDCBA9876543210 - if(instr == 0b0100111001110011) - { - assert(ret.size() == 0); - ret += s("RTE();"); - PrivilegeWrap(&ret); - } - - // STOP - // FEDCBA9876543210 - if(instr == 0b0100111001110010) - { - assert(ret.size() == 0); - ret += s("STOP();"); - PrivilegeWrap(&ret); - } - - // NOP - // FEDCBA9876543210 - if(instr == 0b0100111001110001) - { - assert(ret.size() == 0); - ret += s("NOP();"); - } - - // RESET - // FEDCBA9876543210 - if(instr == 0b0100111001110000) - { - assert(ret.size() == 0); - ret += s("RESET();"); - PrivilegeWrap(&ret); - } - - - return ret; -} - -static std::string Instr5(const unsigned i) -{ - std::string ret; - std::string tmp; - const unsigned szi = (i >> 6) & 0x3; - - if(szi == 0x3) - { - if(((i >> 3) & 0x7) == 0x1) // DBcc - { - ret = s("DBcc<0x%02x>(instr_b2_b0);", (i >> 8) & 0xF); - } - else if(decode_ea_src(AMA_DATA | AMA_ALTERABLE, 1, i, "dst", &tmp)) // Scc - { - ret = s("%s; Scc<0x%02x>(dst);", tmp.c_str(), (i >> 8) & 0xF); - } - } - else if(decode_ea_src(AMA_ALTERABLE, (((i >> 3) & 0x7) == 1) ? 4 : (1U << szi), i, "dst", &tmp)) // ADDQ and SUBQ - { - ret = s("HAM<%s, IMMEDIATE> src(this, instr_b11_b9 ? instr_b11_b9 : 8); ", size_names[szi]); - ret += s("%s; %s(src, dst);", tmp.c_str(), (i & 0x100) ? "SUB" : "ADD"); - } - - - return ret; -} - -static std::string Instr6(const unsigned i) -{ - return s("Bxx<0x%02x>((int8_t)instr);", (i >> 8) & 0xF); -} - -static std::string Instr7(const unsigned i) -{ - std::string ret; - - // MOVEQ - if(((i >> 8) & 0x1) == 0x0) - { - ret = s("HAM src(this, (int8_t)instr); HAM dst(this, instr_b11_b9); MOVE(src, dst);"); - } - - return ret; -} - -static std::string Instr8(const unsigned i) -{ - std::string ret; - - ret = form_dar_opm_ea_OR_AND(i, "OR"); - - // DIVU/DIVS - if(((i >> 6) & 0x3) == 0x3) - { - std::string tmp; - - if(decode_ea_src(AMA_DATA, 2, i, "src", &tmp)) - { - assert(ret.size() == 0); - - ret = s("%s; DIV%s(src, instr_b11_b9);", tmp.c_str(), ((i >> 8) & 1) ? "S" : "U"); - } - } - else if(((i >> 4) & 0x1F) == 0x10) // SBCD - { - assert(ret.size() == 0); - - ret = form_destr_rm_srcr(i, "SBCD", 0); - } - return ret; -} - -static std::string Instr9(const unsigned i) -{ - return form_dar_opm_ea_SUB_ADD(i, "SUB"); -} - -static std::string InstrB(const unsigned i) -{ - std::string ret; - - switch((i >> 6) & 0x7) - { - case 0x4: case 0x5: case 0x6: - ret = form_dar_opm_ea_EOR(i, "EOR"); - break; - - case 0x0: case 0x1: case 0x2: case 0x3: case 0x7: - ret = form_dar_opm_ea_CMP(i, "CMP"); - break; - } - - // CMPM - if(((i >> 8) & 0x1) == 0x1 && ((i >> 3) & 0x7) == 0x1) - { - const unsigned szi = (i >> 6) & 0x3; - - if(szi != 0x3) - { - assert(ret.size() == 0); - ret = form_destr_rm_srcr(i, "CMP", szi, "ADDR_REG_INDIR_POST"); - } - } - - return ret; -} - -static std::string InstrC(const unsigned i) -{ - std::string ret; - - ret = form_dar_opm_ea_OR_AND(i, "AND"); - - if(((i >> 4) & 0x1F) == 0x10) // ABCD - { - assert(ret.size() == 0); - - ret = form_destr_rm_srcr(i, "ABCD", 0); - } - - // MULU/MULS - if(((i >> 6) & 0x3) == 0x3) - { - std::string tmp; - - if(decode_ea_src(AMA_DATA, 2, i, "src", &tmp)) - { - assert(ret.size() == 0); - - ret = s("%s; MUL%s(src, instr_b11_b9);", tmp.c_str(), ((i >> 8) & 1) ? "S" : "U"); - } - } - - { - const unsigned exgm = (i >> 3) & 0x3F; - switch(exgm) - { - case 0x28: - assert(ret.size() == 0); - ret = s("EXG(&D[instr_b11_b9], &D[instr_b2_b0]); /* EXG Dx, Dy */"); - break; - - case 0x29: - assert(ret.size() == 0); - ret = s("EXG(&A[instr_b11_b9], &A[instr_b2_b0]); /* EXG Ax, Ay */"); - break; - - case 0x31: - assert(ret.size() == 0); - ret = s("EXG(&D[instr_b11_b9], &A[instr_b2_b0]); /* EXG Dx, Ay */"); - break; - } - } - - return ret; -} - -static std::string InstrD(const unsigned i) -{ - return form_dar_opm_ea_SUB_ADD(i, "ADD"); -} - -static std::string InstrE(const unsigned i) -{ - std::string ret; - - const unsigned szi = (i >> 6) & 0x3; - static const char* op_bases[4] = { "AS", "LS", "ROX", "RO" }; - static const char* op_suffixes[2] = { "R", "L" }; - const bool dr = (i >> 8) & 1; // Direction, 0=right, 1=left - - if(szi == 0x3) - { - if(((i >> 11) & 1) == 0) - { - std::string tmp; - - if(decode_ea_src(AMA_MEMORY | AMA_ALTERABLE, 2, i, "targ", &tmp)) - { - const unsigned type = (i >> 9) & 0x3; - - ret += s("%s; %s%s(targ, 1);", tmp.c_str(), op_bases[type], op_suffixes[dr]); - } - } - } - else - { - const unsigned type = (i >> 3) & 0x3; - const bool lr = (i >> 5) & 1; - std::string cnt; - - if(lr) - cnt = s("D[instr_b11_b9]"); - else - cnt = s("instr_b11_b9 ? instr_b11_b9 : 8"); - - ret += s("HAM<%s, DATA_REG_DIR> targ(this, instr_b2_b0); %s%s(targ, %s);", size_names[szi], op_bases[type], op_suffixes[dr], cnt.c_str()); - } - - return ret; -} - -static std::string InstrA(const unsigned instr) -{ - return "LINEA();"; -} - -static std::string InstrF(const unsigned instr) -{ - return "LINEF();"; -} - - -int main(int argc, char *argv[]) -{ - if(argc < 2 || argc > 3) - { - printf("Usage: %s [split count]\n", argv[0]); - return 1; - } - char *output_pattern = argv[1]; - if(strlen(output_pattern) == 0) - { - printf("Error: need a string for output filename pattern!\n"); - return 1; - } - int split_count = 1; - if(argc == 3) - split_count = std::atoi(argv[2]); - if(split_count < 1 || split_count > 16) - { - printf("Error: invalid split count! (%d)\n", split_count); - return 1; - } - unsigned sections_per_file = std::ceil(16.0 / split_count); - - std::vector>> bm[16]; - - for(unsigned i = 0; i < 65536; i++) - { - std::string body; - unsigned section = i >> 12; - - switch(section) - { - case 0x0: body = Instr0(i); break; - case 0x1: body = Instr1(i); break; - case 0x2: - case 0x3: body = Instr23(i); break; - case 0x4: body = Instr4(i); break; - case 0x5: body = Instr5(i); break; - case 0x6: body = Instr6(i); break; - case 0x7: body = Instr7(i); break; - case 0x8: body = Instr8(i); break; - case 0x9: body = Instr9(i); break; - case 0xA: body = InstrA(i); break; - case 0xB: body = InstrB(i); break; - case 0xC: body = InstrC(i); break; - case 0xD: body = InstrD(i); break; - case 0xE: body = InstrE(i); break; - case 0xF: body = InstrF(i); break; - } - if(body.size() > 0) - { - if(bm[section].size() && bm[section].back().first == body) - bm[section].back().second.push_back(i); - else - bm[section].push_back({body, {i}}); - } - } - - FILE *f = NULL; - for(unsigned i = 0; i < 16; i++) - { - if(i % sections_per_file == 0) - { - if(f) - fclose(f); - f = fopen(s(output_pattern, i / sections_per_file).c_str(), "w"); - } - for(auto const& bme : bm[i]) - { - for(auto const& ve : bme.second) - { - fprintf(f, "case 0x%04x:\n", ve % (0x1000 * sections_per_file)); - } - - fprintf(f, "\t{\n\t %s\n\t}\n\tbreak;\n\n", bme.first.c_str()); - } - } - fclose(f); -} diff --git a/mednafen/hw_cpu/m68k/m68k.cpp b/mednafen/hw_cpu/m68k/m68k.cpp index ddbbf234..7bfd1fbd 100644 --- a/mednafen/hw_cpu/m68k/m68k.cpp +++ b/mednafen/hw_cpu/m68k/m68k.cpp @@ -44,7 +44,7 @@ CMPM (a7)+,(a7)+ */ -#include +#include "../../mednafen.h" #include "m68k.h" #include @@ -57,23 +57,78 @@ static MDFN_FASTCALL void Dummy_BusRESET(bool state) { } -M68K::M68K(const bool rev_e) : Revision_E(rev_e), - BusReadInstr(nullptr), BusRead8(nullptr), BusRead16(nullptr), - BusWrite8(nullptr), BusWrite16(nullptr), - BusRMW(nullptr), - BusIntAck(nullptr), - BusRESET(Dummy_BusRESET) +/* All M68K_* free-function wrappers exposed by m68k.h live in this + * `extern "C" { ... }` block so the symbol names are C-mangled + * (i.e. unmangled). Required because m68k.h's declarations are + * also wrapped in `extern "C"` for C-consumer compatibility -- + * the definition and declaration linkage must match or the linker + * gets two different mangled symbols and fails to resolve. + * + * The bodies are pure thunks: each forwards to the matching + * struct M68K member method. Bodies are in this TU (m68k.cpp) + * because they reach Dummy_BusRESET / member methods / sources + * not exposed in the public header. */ +extern "C" { + +void M68K_Construct(M68K* z, bool rev_e) { - timestamp = 0; - XPending = 0; - IPL = 0; - Reset(true); + z->Revision_E = rev_e; + + z->BusReadInstr = NULL; + z->BusRead8 = NULL; + z->BusRead16 = NULL; + z->BusWrite8 = NULL; + z->BusWrite16 = NULL; + z->BusRMW = NULL; + z->BusIntAck = NULL; + z->BusRESET = Dummy_BusRESET; + + z->timestamp = 0; + z->XPending = 0; + z->IPL = 0; + + z->Reset(true); } -M68K::~M68K() +void M68K_SetIPL (M68K* z, uint8_t ipl_new) { + if(z->IPL < 0x7 && ipl_new == 0x7) + z->XPending |= M68K::XPENDING_MASK_NMI; + else if(ipl_new < 0x7) + z->XPending &= ~M68K::XPENDING_MASK_NMI; + z->IPL = ipl_new; + z->RecalcInt(); +} +void M68K_SignalDTACKHalted (M68K* z, uint32_t addr) { z->SignalDTACKHalted(addr); } +void M68K_SignalAddressError (M68K* z, uint32_t addr, uint8_t type) { z->SignalAddressError(addr, type); } +void M68K_Reset (M68K* z, bool pwr) { z->Reset(pwr); } +void M68K_Run (M68K* z, int32_t until) { z->Run(until); } +void M68K_SetExtHalted (M68K* z, bool state) +{ + z->XPending &= ~M68K::XPENDING_MASK_EXTHALTED; + if(state) + z->XPending |= M68K::XPENDING_MASK_EXTHALTED; } +void M68K_StateAction (M68K* z, StateMem* sm, const unsigned load, + const bool data_only, const char* sname) + { z->StateAction(sm, load, data_only, sname); } +uint32_t M68K_GetRegister (M68K* z, const unsigned id, char* const special, const uint32_t special_len) + { return z->GetRegister(id, special, special_len); } +void M68K_SetRegister (M68K* z, const unsigned id, const uint32_t value) + { z->SetRegister(id, value); } + +} /* extern "C" */ + +/* Phase-9 cleanup: M68K::M68K(const bool) and M68K::~M68K() retired. + * Zero remaining callers after sound_glue.cpp -> sound_glue.c + * (fd5bf98) switched from `static M68K SoundCPU(true);` to a + * zero-initialised SoundCPU plus an explicit M68K_Construct call + * in SoundGlue_Init(). The ctor body matched M68K_Construct's + * body 1:1 (the prep commit 5cafd34's free-function counterpart + * for the same work). The dtor body was empty. M68K is a pure- + * data struct now -- no class methods need calling at end-of- + * scope, no class methods need calling at construction. */ void M68K::StateAction(StateMem* sm, const unsigned load, const bool data_only, const char* sname) { @@ -103,24 +158,6 @@ void M68K::StateAction(StateMem* sm, const unsigned load, const bool data_only, XPending &= XPENDING_MASK__VALID; } -void M68K::SetIPL(uint8_t ipl_new) -{ - if(IPL < 0x7 && ipl_new == 0x7) - XPending |= XPENDING_MASK_NMI; - else if(ipl_new < 0x7) - XPending &= ~XPENDING_MASK_NMI; - - IPL = ipl_new; - RecalcInt(); -} - -void M68K::SetExtHalted(bool state) -{ - XPending &= ~XPENDING_MASK_EXTHALTED; - if(state) - XPending |= XPENDING_MASK_EXTHALTED; -} - // // // diff --git a/mednafen/hw_cpu/m68k/m68k.h b/mednafen/hw_cpu/m68k/m68k.h index 9d5e00a4..4724efb1 100644 --- a/mednafen/hw_cpu/m68k/m68k.h +++ b/mednafen/hw_cpu/m68k/m68k.h @@ -22,7 +22,25 @@ #ifndef __MDFN_M68K_H #define __MDFN_M68K_H -#include +#include "../../mednafen.h" + +/* M68K_BUS_INT_ACK_AUTO -- BusIntAck callback can return this to + * tell M68K to use automatic interrupt-acknowledge vectoring (auto- + * vector mode) instead of supplying an explicit vector number. + * File-scope so consumers can spell it without the `M68K::` class- + * scope qualifier (needed once sound_glue.cpp becomes sound_glue.c + * -- C has no class-scope qualifier syntax). Value matches the + * former class-scoped `M68K::BUS_INT_ACK_AUTO` exactly. */ +enum { M68K_BUS_INT_ACK_AUTO = -1 }; + +/* C-compat typedef: in C the struct tag is not auto-aliased to a + * type name, so the bare `M68K*` spellings used in the data- + * member BusRMW function-pointer signature (inside this struct) + * and in the M68K_* free-function declarations (after this struct) + * fail to parse from a C TU. Forward-declare the typedef up + * front; same pattern scsp.h uses for SS_SCSP_Slot / SS_SCSP_Timer + * / SS_SCSP / etc. */ +typedef struct M68K M68K; /* Phase-9c: class -> struct. See Phase-9a comment in scsp.h * for rationale. M68K already had `//private:` (commented out) @@ -31,15 +49,29 @@ struct M68K { - M68K(const bool rev_e = false) MDFN_COLD; - ~M68K() MDFN_COLD; +#ifdef __cplusplus + /* C++-only: class methods reachable on this struct. C + * consumers see this header as a plain data struct (same + * layout, same member offsets). All bodies live in + * m68k.cpp / m68k_instr.inc / m68k_instr_split{0,1}.cpp; + * C consumers reach them via the `extern "C"` M68K_* free + * functions declared at the bottom of this header. */ + + /* Phase-9: M68K::M68K(rev_e) and M68K::~M68K() retired. Zero + * callers after sound_glue.cpp -> sound_glue.c switched to + * M68K_Construct. M68K is pure-data now; instances are + * zero-initialised at file scope and finalised with an + * explicit M68K_Construct(&inst, rev_e) call. */ void Run(int32_t run_until_time); void Reset(bool powering_up) MDFN_COLD; - void SetIPL(uint8_t ipl_new); - void SetExtHalted(bool state); + /* Phase-9d-1: SetIPL and SetExtHalted retired from the class. + * Bodies moved inline into the M68K_SetIPL / M68K_SetExtHalted + * extern "C" wrappers in m68k.cpp -- they were already 1-line + * forwarders to z->SetIPL(...) / z->SetExtHalted(...) and the + * 8 / 4 line bodies don't need the dispatch round-trip. */ // @@ -64,6 +96,8 @@ struct M68K void StateAction(StateMem* sm, const unsigned load, const bool data_only, const char* sname); +#endif /* __cplusplus */ + // // // @@ -110,8 +144,16 @@ struct M68K XPENDING_MASK__VALID = XPENDING_MASK_INT | XPENDING_MASK_NMI | XPENDING_MASK_RESET | XPENDING_MASK_ADDRESS | XPENDING_MASK_BUS | XPENDING_MASK_STOPPED | XPENDING_MASK_ERRORHALTED | XPENDING_MASK_DTACKHALTED | XPENDING_MASK_EXTHALTED }; - const bool Revision_E; + /* Set by M68K_Construct / M68K::M68K from the `rev_e` parameter + * and never written again. Was `const bool` -- contractual + * single-init via the ctor's member-initializer list. Dropped + * the const so the free-function M68K_Construct can assign to + * it (C-style construction has no member-initializer-list + * syntax). Set-once-at-construction is now preserved by + * convention, not by compiler-enforced const-correctness. */ + bool Revision_E; +#ifdef __cplusplus //private: void RecalcInt(void); @@ -151,6 +193,8 @@ struct M68K void RunSplit1(uint16_t instr, const unsigned instr_b11_b9, const unsigned instr_b2_b0); #endif +#endif /* __cplusplus */ + enum AddressMode { DATA_REG_DIR, @@ -173,6 +217,7 @@ struct M68K IMMEDIATE }; +#ifdef __cplusplus // // MOVE byte and word: instructions, 2 cycle penalty for source predecrement only // 2 cycle penalty for (d8, An, Xn) for both source and dest ams @@ -216,6 +261,7 @@ struct M68K void SetSR(uint16_t val); bool GetSVisor(void); +#endif /* __cplusplus */ // // @@ -259,6 +305,7 @@ struct M68K EXCEPTION_TRAP }; +#ifdef __cplusplus void NO_INLINE Exception(unsigned which, unsigned vecnum); template @@ -492,13 +539,13 @@ struct M68K void STOP(void); bool CheckPrivilege(void); +#endif /* __cplusplus */ // // // // // // These externally-provided functions should add >= 4 to M68K::timestamp per call: - enum { BUS_INT_ACK_AUTO = -1 }; uint16_t (MDFN_FASTCALL *BusReadInstr)(uint32_t A); uint8_t (MDFN_FASTCALL *BusRead8)(uint32_t A); @@ -544,30 +591,55 @@ struct M68K GSREG_USP }; +#ifdef __cplusplus uint32_t GetRegister(unsigned which, char* special = nullptr, const uint32_t special_len = 0); void SetRegister(unsigned which, uint32_t value); +#endif /* __cplusplus */ }; -/* Phase-9 step 3: free-function wrappers around M68K members used by - * sound_glue.cpp. Pure inline forwarders; codegen folds to direct - * member access under -O2. Member function bodies remain in - * m68k_private.h / m68k.cpp for now and will be converted to true - * free functions in a later phase (gated on retirement of the - * HAM cascade). */ -static FORCE_INLINE void M68K_SetIPL (M68K* z, uint8_t ipl_new) { z->SetIPL(ipl_new); } -static FORCE_INLINE void M68K_SignalDTACKHalted (M68K* z, uint32_t addr) { z->SignalDTACKHalted(addr); } -static FORCE_INLINE void M68K_SignalAddressError (M68K* z, uint32_t addr, uint8_t type) { z->SignalAddressError(addr, type); } - -static FORCE_INLINE void M68K_Reset (M68K* z, bool pwr) { z->Reset(pwr); } -static FORCE_INLINE void M68K_Run (M68K* z, int32_t until) { z->Run(until); } -static FORCE_INLINE void M68K_SetExtHalted (M68K* z, bool state) { z->SetExtHalted(state); } -static FORCE_INLINE void M68K_StateAction (M68K* z, StateMem* sm, const unsigned load, - const bool data_only, const char* sname) - { z->StateAction(sm, load, data_only, sname); } -static FORCE_INLINE uint32_t M68K_GetRegister (M68K* z, const unsigned id, char* const special, const uint32_t special_len) - { return z->GetRegister(id, special, special_len); } -static FORCE_INLINE void M68K_SetRegister (M68K* z, const unsigned id, const uint32_t value) - { z->SetRegister(id, value); } +/* M68K_* free-function API exposed to consumers of m68k.h. + * + * All declarations live inside an `extern "C" { ... }` block (gated + * by __cplusplus so plain C consumers can include this header + * directly) -- the matching definitions in m68k.cpp also use + * `extern "C"` linkage. This makes the wrappers callable from + * both C++ and C TUs, with one well-defined ABI symbol per name. + * + * Trade-off vs the previous `static FORCE_INLINE` header-side + * definitions: we lose call-site inlining of the thunk body + * (each wrapper became a real function call to a 1-2 instruction + * out-of-line body in m68k.cpp), but gain a C-callable surface + * that sound_glue.cpp -> sound_glue.c needs. None of these + * wrappers are on the M68K::Run inner loop -- they're called + * from external orchestration code (IRQ change, savestate, + * reset, scheduler step, debugger register read/write) -- so + * the per-call function-call overhead is negligible in profile + * terms. Phase-9 step 3's original comment about codegen + * folding under -O2 stops applying here; cross-TU inlining is + * now LTO-dependent. + */ +#ifdef __cplusplus +extern "C" { +#endif + +void M68K_Construct (M68K* z, bool rev_e) MDFN_COLD; + +void M68K_SetIPL (M68K* z, uint8_t ipl_new); +void M68K_SignalDTACKHalted (M68K* z, uint32_t addr); +void M68K_SignalAddressError (M68K* z, uint32_t addr, uint8_t type); + +void M68K_Reset (M68K* z, bool pwr) MDFN_COLD; +void M68K_Run (M68K* z, int32_t until); +void M68K_SetExtHalted (M68K* z, bool state); +void M68K_StateAction (M68K* z, StateMem* sm, const unsigned load, + const bool data_only, const char* sname); +uint32_t M68K_GetRegister (M68K* z, const unsigned id, char* const special, + const uint32_t special_len); +void M68K_SetRegister (M68K* z, const unsigned id, const uint32_t value); + +#ifdef __cplusplus +} /* extern "C" */ +#endif #endif diff --git a/mednafen/hw_cpu/m68k/m68k_instr_split0.cpp b/mednafen/hw_cpu/m68k/m68k_instr_split0.cpp index fe4e8660..eaacf535 100644 --- a/mednafen/hw_cpu/m68k/m68k_instr_split0.cpp +++ b/mednafen/hw_cpu/m68k/m68k_instr_split0.cpp @@ -1,4 +1,6 @@ -#include +#include + +#include "../../mednafen.h" #include "m68k.h" #include "m68k_private.h" diff --git a/mednafen/hw_cpu/m68k/m68k_instr_split1.cpp b/mednafen/hw_cpu/m68k/m68k_instr_split1.cpp index 97abf293..3e4f1bdf 100644 --- a/mednafen/hw_cpu/m68k/m68k_instr_split1.cpp +++ b/mednafen/hw_cpu/m68k/m68k_instr_split1.cpp @@ -1,4 +1,4 @@ -#include +#include "../../mednafen.h" #include "m68k.h" #include "m68k_private.h" diff --git a/mednafen/hw_cpu/m68k/m68k_private.h b/mednafen/hw_cpu/m68k/m68k_private.h index 15c6264b..6d14750c 100644 --- a/mednafen/hw_cpu/m68k/m68k_private.h +++ b/mednafen/hw_cpu/m68k/m68k_private.h @@ -1,7 +1,7 @@ #ifndef __MDFN_M68K_PRIVATE_H #define __MDFN_M68K_PRIVATE_H -#include +#include "../../mednafen.h" #include "m68k.h" INLINE void M68K::RecalcInt(void) diff --git a/mednafen/math_ops.h b/mednafen/math_ops.h index 59170462..e9133bfd 100644 --- a/mednafen/math_ops.h +++ b/mednafen/math_ops.h @@ -32,32 +32,9 @@ #include #endif -static INLINE unsigned MDFN_lzcount16_0UD(uint16_t v) -{ - #if defined(__GNUC__) || defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) - return 15 ^ 31 ^ __builtin_clz(v); - #elif defined(_MSC_VER) - unsigned long idx; - - _BitScanReverse(&idx, v); - - return 15 ^ idx; - #else - unsigned ret = 0; - unsigned tmp; - - tmp = !(v & 0xFF00) << 3; v <<= tmp; ret += tmp; - tmp = !(v & 0xF000) << 2; v <<= tmp; ret += tmp; - tmp = !(v & 0xC000) << 1; v <<= tmp; ret += tmp; - tmp = !(v & 0x8000) << 0; ret += tmp; - - return(ret); - #endif -} - static INLINE unsigned MDFN_lzcount32_0UD(uint32_t v) { - #if defined(__GNUC__) || defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) +#if defined(__GNUC__) || defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) return __builtin_clz(v); #elif defined(_MSC_VER) unsigned long idx; @@ -65,30 +42,28 @@ static INLINE unsigned MDFN_lzcount32_0UD(uint32_t v) _BitScanReverse(&idx, v); return 31 ^ idx; - #else +#else unsigned ret = 0; - unsigned tmp; - - tmp = !(v & 0xFFFF0000) << 4; v <<= tmp; ret += tmp; + unsigned tmp = !(v & 0xFFFF0000) << 4; v <<= tmp; ret += tmp; tmp = !(v & 0xFF000000) << 3; v <<= tmp; ret += tmp; tmp = !(v & 0xF0000000) << 2; v <<= tmp; ret += tmp; tmp = !(v & 0xC0000000) << 1; v <<= tmp; ret += tmp; tmp = !(v & 0x80000000) << 0; ret += tmp; return(ret); - #endif +#endif } static INLINE unsigned MDFN_lzcount64_0UD(uint64_t v) { - #if defined(__GNUC__) || defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) +#if defined(__GNUC__) || defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) return __builtin_clzll(v); - #elif defined(_MSC_VER) - #if defined(_WIN64) +#elif defined(_MSC_VER) +#if defined(_WIN64) unsigned long idx; _BitScanReverse64(&idx, v); return 63 ^ idx; - #else +#else unsigned long idx0; unsigned long idx1; @@ -100,20 +75,17 @@ static INLINE unsigned MDFN_lzcount64_0UD(uint64_t v) idx0 += 32; return 63 ^ idx0; - #endif - #else +#endif +#else unsigned ret = 0; - unsigned tmp; - - tmp = !(v & 0xFFFFFFFF00000000ULL) << 5; v <<= tmp; ret += tmp; + unsigned tmp = !(v & 0xFFFFFFFF00000000ULL) << 5; v <<= tmp; ret += tmp; tmp = !(v & 0xFFFF000000000000ULL) << 4; v <<= tmp; ret += tmp; tmp = !(v & 0xFF00000000000000ULL) << 3; v <<= tmp; ret += tmp; tmp = !(v & 0xF000000000000000ULL) << 2; v <<= tmp; ret += tmp; tmp = !(v & 0xC000000000000000ULL) << 1; v <<= tmp; ret += tmp; tmp = !(v & 0x8000000000000000ULL) << 0; ret += tmp; - return(ret); - #endif +#endif } static INLINE unsigned MDFN_tzcount16_0UD(uint16_t v) @@ -139,74 +111,13 @@ static INLINE unsigned MDFN_tzcount16_0UD(uint16_t v) #endif } -static INLINE unsigned MDFN_tzcount32_0UD(uint32_t v) -{ - #if defined(__GNUC__) || defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) - return __builtin_ctz(v); - #elif defined(_MSC_VER) - unsigned long idx; - - _BitScanForward(&idx, v); - - return idx; - #else - unsigned ret = 0; - unsigned tmp; - - tmp = !((uint16_t)v) << 4; v >>= tmp; ret += tmp; - tmp = !( (uint8_t)v) << 3; v >>= tmp; ret += tmp; - tmp = !(v & 0x000F) << 2; v >>= tmp; ret += tmp; - tmp = !(v & 0x0003) << 1; v >>= tmp; ret += tmp; - tmp = !(v & 0x0001) << 0; ret += tmp; - - return ret; - #endif -} - -static INLINE unsigned MDFN_tzcount64_0UD(uint64_t v) -{ - #if defined(__GNUC__) || defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) - return __builtin_ctzll(v); - #elif defined(_MSC_VER) - #if defined(_WIN64) - unsigned long idx; - _BitScanForward64(&idx, v); - return idx; - #else - unsigned long idx0, idx1; - - _BitScanForward(&idx1, v >> 32); - idx1 += 32; - if(!_BitScanForward(&idx0, v)) - idx0 = idx1; - - return idx0; - #endif - #else - unsigned ret = 0; - unsigned tmp; - - tmp = !((uint32_t)v) << 5; v >>= tmp; ret += tmp; - tmp = !((uint16_t)v) << 4; v >>= tmp; ret += tmp; - tmp = !( (uint8_t)v) << 3; v >>= tmp; ret += tmp; - tmp = !(v & 0x000F) << 2; v >>= tmp; ret += tmp; - tmp = !(v & 0x0003) << 1; v >>= tmp; ret += tmp; - tmp = !(v & 0x0001) << 0; ret += tmp; - - return ret; - #endif -} - // // Result is defined for all possible inputs(including 0). // -static INLINE unsigned MDFN_lzcount16(uint16_t v) { return !v ? 16 : MDFN_lzcount16_0UD(v); } static INLINE unsigned MDFN_lzcount32(uint32_t v) { return !v ? 32 : MDFN_lzcount32_0UD(v); } static INLINE unsigned MDFN_lzcount64(uint64_t v) { return !v ? 64 : MDFN_lzcount64_0UD(v); } static INLINE unsigned MDFN_tzcount16(uint16_t v) { return !v ? 16 : MDFN_tzcount16_0UD(v); } -static INLINE unsigned MDFN_tzcount32(uint32_t v) { return !v ? 32 : MDFN_tzcount32_0UD(v); } -static INLINE unsigned MDFN_tzcount64(uint64_t v) { return !v ? 64 : MDFN_tzcount64_0UD(v); } // 0-undefined-input log2. Single 64-bit form; 32-bit callers promote // cleanly. (Was a set of C++ overloads; only round_up_pow2 ever called @@ -235,20 +146,4 @@ static INLINE uint64_t round_up_pow2(uint64_t v) { uint64_t tmp = (uint64_t)1 << // convert those faster with typecasts... #define sign_x_to_s32(_bits, _value) (((int32_t)((uint32_t)(_value) << (32 - _bits))) >> (32 - _bits)) -static INLINE int32_t clamp_to_u8(int32_t i) -{ - if(i & 0xFFFFFF00) - i = (((~i) >> 30) & 0xFF); - - return(i); -} - -static INLINE int32_t clamp_to_u16(int32_t i) -{ - if(i & 0xFFFF0000) - i = (((~i) >> 31) & 0xFFFF); - - return(i); -} - #endif diff --git a/mednafen/mednafen-driver.h b/mednafen/mednafen-driver.h deleted file mode 100644 index 897036cc..00000000 --- a/mednafen/mednafen-driver.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __MDFN_MEDNAFEN_DRIVER_H -#define __MDFN_MEDNAFEN_DRIVER_H - -#include -#include -#include - -#include "settings-common.h" - -extern std::vectorMDFNSystems; - -// Call this function as early as possible, even before MDFNI_Initialize() -bool MDFNI_InitializeModule(void); - -/* Sets the base directory(save states, snapshots, etc. are saved in directories - below this directory. */ -void MDFNI_SetBaseDirectory(const char *dir); - -/* Closes currently loaded game */ -void MDFNI_CloseGame(void); - -#endif diff --git a/mednafen/mednafen-types.h b/mednafen/mednafen-types.h index 9b308330..2df389af 100644 --- a/mednafen/mednafen-types.h +++ b/mednafen/mednafen-types.h @@ -90,11 +90,24 @@ // noclone and emits -Wunknown-attributes at every use site. Keep // noclone on real GCC and elide it on clang; the other attributes // (hot, cold, always_inline, visibility) are supported by both. +// +// MDFN_UNREACHABLE tells the optimizer that a code path is dead +// (e.g. the `default:` arm of a switch over a known-enumerated +// integer). GCC and clang expose `__builtin_unreachable()`; MSVC +// has had the equivalent `__assume(0)` since VS 2005, which long +// predates the MSVC C89 target this codebase still wants to +// compile under. The fallback path expands to nothing, which is +// always safe -- the worst case is a bounds-check the optimizer +// could otherwise have elided. Useful for dense switch dispatches +// where every value is accounted for, so the compiler can drop the +// jump-table bounds check (one indirect jump beats a compare + +// branch + indirect jump). #if defined(__GNUC__) && !defined(__clang__) #define MDFN_HOT __attribute__((hot)) #define MDFN_COLD __attribute__((cold)) #define NO_CLONE __attribute__((noclone)) #define MDFN_FORCE_INLINE __attribute__((always_inline)) inline + #define MDFN_UNREACHABLE __builtin_unreachable() #if defined(_WIN32) || defined(__CYGWIN__) #define MDFN_HIDE #else @@ -105,6 +118,7 @@ #define MDFN_COLD __attribute__((cold)) #define NO_CLONE #define MDFN_FORCE_INLINE __attribute__((always_inline)) inline + #define MDFN_UNREACHABLE __builtin_unreachable() #if defined(_WIN32) || defined(__CYGWIN__) #define MDFN_HIDE #else @@ -116,12 +130,14 @@ #define MDFN_HIDE #define NO_CLONE #define MDFN_FORCE_INLINE __forceinline + #define MDFN_UNREACHABLE __assume(0) #else #define MDFN_HOT #define MDFN_COLD #define MDFN_HIDE #define NO_CLONE #define MDFN_FORCE_INLINE inline + #define MDFN_UNREACHABLE /* nothing -- compiler keeps any bounds checks */ #endif #ifdef __cplusplus @@ -144,15 +160,30 @@ template typename std::remove_all_extents::type* MDAP(T* v) { ret #elif !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define MDFN_STATIC_ASSERT(c_, msg_) _Static_assert((c_), msg_) #else - #ifdef __COUNTER__ - #define MDFN_STATIC_ASSERT_ID_ __COUNTER__ - #else - #define MDFN_STATIC_ASSERT_ID_ __LINE__ - #endif + /* C89 / MSVC-89 / pre-C++11 fallback. + * + * Uniqueness via __LINE__ -- specifically NOT __COUNTER__. Several + * sites in this codebase encode counter checkpoints in their + * assertion conditions (e.g. + * `MDFN_STATIC_ASSERT(__COUNTER__ == 5000, "...")` + * `MDFN_STATIC_ASSERT(__COUNTER__ == 5000 + 393 + 512 + 1, "...")` + * in sh7095_ops.inc and sh7095.inc). These rely on __COUNTER__ + * being incremented exactly once per textual occurrence of + * __COUNTER__ in the source. + * + * If MDFN_STATIC_ASSERT itself expanded __COUNTER__ for its typedef + * name, every assertion would silently consume one extra counter + * value past what its own condition expanded -- so checkpoints + * placed N asserts after the previous checkpoint would drift by N. + * + * __LINE__ does not have this problem (it's a property of the + * source location, not a side-effect-bearing macro), and there + * is no instance of two MDFN_STATIC_ASSERT() invocations on the + * same source line anywhere in the codebase. */ #define MDFN_STATIC_ASSERT_CAT2_(a_, b_) a_##b_ #define MDFN_STATIC_ASSERT_CAT_(a_, b_) MDFN_STATIC_ASSERT_CAT2_(a_, b_) #define MDFN_STATIC_ASSERT(c_, msg_) \ - typedef char MDFN_STATIC_ASSERT_CAT_(_mdfn_static_assert_, MDFN_STATIC_ASSERT_ID_) \ + typedef char MDFN_STATIC_ASSERT_CAT_(_mdfn_static_assert_, __LINE__) \ [(c_) ? 1 : -1] MDFN_NOWARN_UNUSED #endif diff --git a/mednafen/mednafen.h b/mednafen/mednafen.h index f13f7680..b9ee7153 100644 --- a/mednafen/mednafen.h +++ b/mednafen/mednafen.h @@ -6,11 +6,6 @@ #include #include -#define trio_sprintf sprintf /*compatibility with mednafen on libretro*/ -#define trio_snprintf snprintf /*compatibility with mednafen on libretro*/ - -#define _(String) (String) - #include "math_ops.h" #include "git.h" @@ -22,9 +17,4 @@ extern MDFNGI *MDFNGameInfo; #include "settings.h" -void MDFN_DispMessage(const char *format, ...); - -#include "mednafen-driver.h" - - #endif diff --git a/mednafen/mempatcher-driver.h b/mednafen/mempatcher-driver.h deleted file mode 100644 index 82825bd4..00000000 --- a/mednafen/mempatcher-driver.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef __MDFN_MEMPATCHER_DRIVER_H -#define __MDFN_MEMPATCHER_DRIVER_H - -struct MemoryPatch -{ - MemoryPatch(); - ~MemoryPatch(); - std::string name; - std::string conditions; - uint32_t addr; - uint64_t val; - uint64_t compare; - uint32_t mltpl_count; - uint32_t mltpl_addr_inc; - uint64_t mltpl_val_inc; - uint32_t copy_src_addr; - uint32_t copy_src_addr_inc; - unsigned length; - bool bigendian; - bool status; // (in)active - unsigned icount; - char type; /* 'R' for replace, 'S' for substitute(GG), 'C' for substitute with compare */ - /* 'T' for copy/transfer data, 'A' for add(variant of type R) */ - //enum { TypeReplace, TypeSubst, TypeCompSubst }; - //int type; -}; - -int MDFNI_DecodePAR(const char *code, uint32_t *a, uint8_t *v, uint8_t *c, char *type); -int MDFNI_DecodeGG(const char *str, uint32_t *a, uint8_t *v, uint8_t *c, char *type); -int MDFNI_AddCheat(const char *name, uint32_t addr, uint64_t val, uint64_t compare, char type, unsigned int length, bool bigendian); -int MDFNI_DelCheat(uint32_t which); -int MDFNI_ToggleCheat(uint32_t which); - -int32_t MDFNI_CheatSearchGetCount(void); -void MDFNI_CheatSearchGetRange(uint32_t first, uint32_t last, int (*callb)(uint32_t a, uint8_t last, uint8_t current)); -void MDFNI_CheatSearchGet(int (*callb)(uint32_t a, uint64_t last, uint64_t current, void *data), void *data); -void MDFNI_CheatSearchBegin(void); -void MDFNI_CheatSearchEnd(int type, uint64_t v1, uint64_t v2, unsigned int bytelen, bool bigendian); -void MDFNI_ListCheats(int (*callb)(char *name, uint32_t a, uint64_t v, uint64_t compare, int s, char type, unsigned int length, bool bigendian, void *data), void *data); - -int MDFNI_GetCheat(uint32_t which, char **name, uint32_t *a, uint64_t *v, uint64_t *compare, int *s, char *type, unsigned int *length, bool *bigendian); -int MDFNI_SetCheat(uint32_t which, const char *name, uint32_t a, uint64_t v, uint64_t compare, int s, char type, unsigned int length, bool bigendian); - -void MDFNI_CheatSearchShowExcluded(void); -void MDFNI_CheatSearchSetCurrentAsOriginal(void); - -#endif diff --git a/mednafen/mempatcher.c b/mednafen/mempatcher.c index 36a41d9a..076fa9a7 100644 --- a/mednafen/mempatcher.c +++ b/mednafen/mempatcher.c @@ -28,10 +28,8 @@ #include #include "settings.h" -#include "settings-common.h" #include "mempatcher.h" - extern retro_log_printf_t log_cb; static uint8_t **RAMPtrs = NULL; @@ -196,7 +194,6 @@ void MDFNMP_Kill(void) } } - void MDFNMP_AddRAM(uint32_t size, uint32_t A, uint8_t *RAM) { uint32_t AB = A / PageSize; @@ -244,36 +241,6 @@ void MDFNMP_RemoveReadPatches(void) #endif } -/* This function doesn't allocate any memory for "name" */ -static int AddCheatEntry(char *name, char *conditions, uint32_t addr, uint64_t val, uint64_t compare, int status, char type, unsigned int length, bool bigendian) -{ - CHEATF temp; - - memset(&temp, 0, sizeof(CHEATF)); - - temp.name=name; - temp.conditions = conditions; - temp.addr=addr; - temp.val=val; - temp.status=status; - temp.compare=compare; - temp.length = length; - temp.bigendian = bigendian; - temp.type=type; - - if(cheats_count >= cheats_cap) - { - size_t newcap = cheats_cap ? cheats_cap * 2 : 8; - CHEATF *np = (CHEATF *)realloc(cheats, newcap * sizeof(CHEATF)); - if(!np) - return(0); - cheats = np; - cheats_cap = newcap; - } - cheats[cheats_count++] = temp; - return(1); -} - void MDFN_LoadGameCheats(void) { RebuildSubCheats(); @@ -294,46 +261,6 @@ void MDFN_FlushGameCheats(void) RebuildSubCheats(); } -int MDFNI_AddCheat(const char *name, uint32_t addr, uint64_t val, uint64_t compare, char type, unsigned int length, bool bigendian) -{ - char *t; - - if(!(t = strdup(name))) - return(0); - - if(!AddCheatEntry(t, NULL, addr,val,compare,1,type, length, bigendian)) - { - free(t); - return(0); - } - - savecheats = 1; - - MDFNMP_RemoveReadPatches(); - RebuildSubCheats(); - MDFNMP_InstallReadPatches(); - - return(1); -} - -int MDFNI_DelCheat(uint32_t which) -{ - free(cheats[which].name); - /* erase element 'which': shift the tail down one slot. */ - if((size_t)which + 1 < cheats_count) - memmove(&cheats[which], &cheats[which + 1], - (cheats_count - which - 1) * sizeof(CHEATF)); - cheats_count--; - - savecheats=1; - - MDFNMP_RemoveReadPatches(); - RebuildSubCheats(); - MDFNMP_InstallReadPatches(); - - return(1); -} - /* Condition format(ws = white space): @@ -509,249 +436,6 @@ void MDFNMP_ApplyPeriodicCheats(void) } } - -void MDFNI_ListCheats(int (*callb)(char *name, uint32_t a, uint64_t v, uint64_t compare, int s, char type, unsigned int length, bool bigendian, void *data), void *data) -{ - size_t ci; - - for(ci = 0; ci < cheats_count; ci++) - { - CHEATF *chit = &cheats[ci]; - if(!callb(chit->name, chit->addr, chit->val, chit->compare, chit->status, chit->type, chit->length, chit->bigendian, data)) break; - } -} - -int MDFNI_GetCheat(uint32_t which, char **name, uint32_t *a, uint64_t *v, uint64_t *compare, int *s, char *type, unsigned int *length, bool *bigendian) -{ - CHEATF *next = &cheats[which]; - - if(name) - *name=next->name; - if(a) - *a=next->addr; - if(v) - *v=next->val; - if(s) - *s=next->status; - if(compare) - *compare=next->compare; - if(type) - *type=next->type; - if(length) - *length = next->length; - if(bigendian) - *bigendian = next->bigendian; - return(1); -} - -static uint8_t CharToNibble(char thechar) -{ - const char lut[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - int x; - - thechar = toupper(thechar); - - for(x = 0; x < 16; x++) - if(lut[x] == thechar) - return(x); - - return(0xFF); -} - -bool MDFNI_DecodeGBGG(const char *instr, uint32_t *a, uint8_t *v, uint8_t *c, char *type) -{ - char str[10]; - int len; - int x; - uint32_t tmp_address; - uint8_t tmp_value; - uint8_t tmp_compare = 0; - - for(x = 0; x < 9; x++) - { - while(*instr && CharToNibble(*instr) == 255) - instr++; - if(!(str[x] = *instr)) break; - instr++; - } - str[9] = 0; - - len = strlen(str); - - if(len != 9 && len != 6) - return(0); - - tmp_address = (CharToNibble(str[5]) << 12) | (CharToNibble(str[2]) << 8) | (CharToNibble(str[3]) << 4) | (CharToNibble(str[4]) << 0); - tmp_address ^= 0xF000; - tmp_value = (CharToNibble(str[0]) << 4) | (CharToNibble(str[1]) << 0); - - if(len == 9) - { - tmp_compare = (CharToNibble(str[6]) << 4) | (CharToNibble(str[8]) << 0); - tmp_compare = (tmp_compare >> 2) | ((tmp_compare << 6) & 0xC0); - tmp_compare ^= 0xBA; - } - - *a = tmp_address; - *v = tmp_value; - - if(len == 9) - { - *c = tmp_compare; - *type = 'C'; - } - else - { - *c = 0; - *type = 'S'; - } - - return(1); -} - -static int GGtobin(char c) -{ - static char lets[16]={'A','P','Z','L','G','I','T','Y','E','O','X','U','K','S','V','N'}; - int x; - - for(x=0;x<16;x++) - if(lets[x] == toupper(c)) return(x); - return(0); -} - -/* Returns 1 on success, 0 on failure. Sets *a,*v,*c. */ -int MDFNI_DecodeGG(const char *str, uint32_t *a, uint8_t *v, uint8_t *c, char *type) -{ - uint16_t A; - uint8_t V,C; - uint8_t t; - int s; - - A=0x8000; - V=0; - C=0; - - s=strlen(str); - if(s!=6 && s!=8) return(0); - - t=GGtobin(*str++); - V|=(t&0x07); - V|=(t&0x08)<<4; - - t=GGtobin(*str++); - V|=(t&0x07)<<4; - A|=(t&0x08)<<4; - - t=GGtobin(*str++); - A|=(t&0x07)<<4; - //if(t&0x08) return(0); /* 8-character code?! */ - - t=GGtobin(*str++); - A|=(t&0x07)<<12; - A|=(t&0x08); - - t=GGtobin(*str++); - A|=(t&0x07); - A|=(t&0x08)<<8; - - if(s==6) - { - t=GGtobin(*str++); - A|=(t&0x07)<<8; - V|=(t&0x08); - - *a=A; - *v=V; - *type = 'S'; - *c = 0; - } - else - { - t=GGtobin(*str++); - A|=(t&0x07)<<8; - C|=(t&0x08); - - t=GGtobin(*str++); - C|=(t&0x07); - C|=(t&0x08)<<4; - - t=GGtobin(*str++); - C|=(t&0x07)<<4; - V|=(t&0x08); - *a=A; - *v=V; - *c=C; - *type = 'C'; - } - - return(1); -} - -int MDFNI_DecodePAR(const char *str, uint32_t *a, uint8_t *v, uint8_t *c, char *type) -{ - int boo[4]; - if(strlen(str)!=8) return(0); - - sscanf(str,"%02x%02x%02x%02x",boo,boo+1,boo+2,boo+3); - - *c = 0; - - if(1) - { - *a=(boo[3]<<8)|(boo[2]+0x7F); - *v=0; - } - else - { - *v=boo[3]; - *a=boo[2]|(boo[1]<<8); - } - - *type = 'S'; - return(1); -} - -/* name can be NULL if the name isn't going to be changed. */ -int MDFNI_SetCheat(uint32_t which, const char *name, uint32_t a, uint64_t v, uint64_t compare, int s, char type, unsigned int length, bool bigendian) -{ - CHEATF *next = &cheats[which]; - - if(name) - { - char *t; - - if((t=(char *)realloc(next->name,strlen(name+1)))) - { - next->name=t; - strcpy(next->name,name); - } - else - return(0); - } - next->addr=a; - next->val=v; - next->status=s; - next->compare=compare; - next->type=type; - next->length = length; - next->bigendian = bigendian; - - RebuildSubCheats(); - savecheats=1; - - return(1); -} - -/* Convenience function. */ -int MDFNI_ToggleCheat(uint32_t which) -{ - cheats[which].status = !cheats[which].status; - savecheats = 1; - RebuildSubCheats(); - - return(cheats[which].status); -} - static void SettingChanged(const char *name) { MDFNMP_RemoveReadPatches(); diff --git a/mednafen/mempatcher.h b/mednafen/mempatcher.h index f307350d..31e7eeff 100644 --- a/mednafen/mempatcher.h +++ b/mednafen/mempatcher.h @@ -31,19 +31,6 @@ void MDFNMP_RegSearchable(uint32_t addr, uint32_t size); void MDFN_LoadGameCheats(void); void MDFN_FlushGameCheats(void); -/* Cheat code decoders and the cheat-list interface. Defined in - * mempatcher.c; currently no in-tree caller, but kept declared so the - * definitions are prototyped and remain reachable. */ -int MDFNI_DecodePAR(const char *str, uint32_t *a, uint8_t *v, uint8_t *c, char *type); -int MDFNI_DecodeGG(const char *str, uint32_t *a, uint8_t *v, uint8_t *c, char *type); -bool MDFNI_DecodeGBGG(const char *instr, uint32_t *a, uint8_t *v, uint8_t *c, char *type); -int MDFNI_AddCheat(const char *name, uint32_t addr, uint64_t val, uint64_t compare, char type, unsigned int length, bool bigendian); -int MDFNI_DelCheat(uint32_t which); -int MDFNI_ToggleCheat(uint32_t which); -int MDFNI_GetCheat(uint32_t which, char **name, uint32_t *a, uint64_t *v, uint64_t *compare, int *s, char *type, unsigned int *length, bool *bigendian); -int MDFNI_SetCheat(uint32_t which, const char *name, uint32_t a, uint64_t v, uint64_t compare, int s, char type, unsigned int length, bool bigendian); -void MDFNI_ListCheats(int (*callb)(char *name, uint32_t a, uint64_t v, uint64_t compare, int s, char type, unsigned int length, bool bigendian, void *data), void *data); - extern MDFNSetting MDFNMP_Settings[]; #ifdef __cplusplus diff --git a/mednafen/settings-common.h b/mednafen/settings-common.h index 9f5e4996..52d02dd1 100644 --- a/mednafen/settings-common.h +++ b/mednafen/settings-common.h @@ -2,7 +2,7 @@ #define _MDFN_SETTINGS_COMMON_H #include -#include +#include typedef enum { @@ -17,20 +17,6 @@ typedef enum #define MDFNSF_NOFLAGS 0 -#define MDFNSF_CAT_INPUT (1 << 8) -#define MDFNSF_CAT_SOUND (1 << 9) -#define MDFNSF_CAT_VIDEO (1 << 10) - -#define MDFNSF_EMU_STATE (1 << 17) -#define MDFNSF_UNTRUSTED_SAFE (1 << 18) - -#define MDFNSF_SUPPRESS_DOC (1 << 19) -#define MDFNSF_COMMON_TEMPLATE (1 << 20) -#define MDFNSF_NONPERSISTENT (1 << 21) // Don't save setting in settings file. - -#define MDFNSF_REQUIRES_RELOAD (1 << 24) -#define MDFNSF_REQUIRES_RESTART (1 << 25) - typedef struct { const char *string; diff --git a/mednafen/settings-driver.h b/mednafen/settings-driver.h deleted file mode 100644 index 5a3ad28d..00000000 --- a/mednafen/settings-driver.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _MDFN_SETTINGS_DRIVER_H -#define _MDFN_SETTINGS_DRIVER_H - -#include - -#include "settings-common.h" - -#endif diff --git a/mednafen/settings.h b/mednafen/settings.h index 97e4b0c2..cdc6e588 100644 --- a/mednafen/settings.h +++ b/mednafen/settings.h @@ -2,7 +2,7 @@ #define MDFN_SETTINGS_H #include -#include +#include #ifdef __cplusplus extern "C" { diff --git a/mednafen/ss/a64emit.c b/mednafen/ss/a64emit.c new file mode 100644 index 00000000..fc6b0378 --- /dev/null +++ b/mednafen/ss/a64emit.c @@ -0,0 +1,1189 @@ +/******************************************************************************/ +/* Mednafen Sega Saturn Emulation Module */ +/******************************************************************************/ +/* a64emit.c - minimal AArch64 instruction emitter (C, integer-only) +** Copyright (C) 2026 pstef +*/ + +/* + * Targets Linux/aarch64 with GNU C extensions (inline asm for cache + * maintenance). Apple/Windows AArch64 are not handled here; on a + * non-AArch64 host or on a non-Linux AArch64 host every entry point + * compiles to either a no-op or NULL. + */ + +#include "a64emit.h" + +#include +#include +#include +#include +#include + +/* ==================================================================== + * Section 1 -- non-aarch64 host: stub out every entry point. + * ==================================================================== */ +#if !(defined(__aarch64__) || defined(__arm64__)) || !defined(__linux__) + +struct a64_codegen { int dummy; }; + +a64_codegen* a64_codegen_create(size_t bytes) { (void)bytes; return NULL; } +void a64_codegen_destroy(a64_codegen* cg) { (void)cg; } +void* a64_codegen_base (const a64_codegen* cg) { (void)cg; return NULL; } +void* a64_codegen_wptr (const a64_codegen* cg) { (void)cg; return NULL; } +size_t a64_codegen_offset (const a64_codegen* cg) { (void)cg; return 0; } +size_t a64_codegen_capacity(const a64_codegen* cg) { (void)cg; return 0; } +size_t a64_codegen_remaining(const a64_codegen* cg) { (void)cg; return 0; } +void a64_codegen_set_wptr(a64_codegen* cg, void* p) { (void)cg; (void)p; } +void* a64_codegen_save (const a64_codegen* cg) { (void)cg; return NULL; } +void a64_codegen_restore(a64_codegen* cg, void* p) { (void)cg; (void)p; } +void a64_codegen_invalidate(a64_codegen* cg, void* p, size_t b) { (void)cg; (void)p; (void)b; } + +void a64_label_reset(a64_label* l) { if(l) memset(l, 0, sizeof *l); } +void a64_label_bind (a64_codegen* cg, a64_label* l) { (void)cg; (void)l; } + +int a64_can_encode_logical_imm32(uint32_t imm) { (void)imm; return 0; } +int a64_can_encode_logical_imm64(uint64_t imm) { (void)imm; return 0; } +int a64_can_encode_addsub_imm (uint32_t imm) { (void)imm; return 0; } + +void a64_ldr_x_pool (a64_codegen* cg, unsigned xd, uint64_t v) +{ (void)cg; (void)xd; (void)v; } +void a64_pool_flush (a64_codegen* cg) { (void)cg; } +void a64_pool_reset (a64_codegen* cg) { (void)cg; } +unsigned a64_pool_pending(const a64_codegen* cg) { (void)cg; return 0; } + +int a64_patch_b (void* s, const void* t) { (void)s; (void)t; return 0; } +int a64_patch_b_cond (void* s, const void* t) { (void)s; (void)t; return 0; } +int a64_patch_cbz (void* s, const void* t) { (void)s; (void)t; return 0; } +int a64_patch_tbz (void* s, const void* t) { (void)s; (void)t; return 0; } + +/* Every emitter is a no-op on the stub path; argument lists kept so + * callers compile. This is a stand-in matching the consumer convention + * that emit_* is silently inert when WANT_JIT or the host arch is wrong. */ +#define A64_STUB1(name, a1) void name(a64_codegen* cg, a1) { (void)cg; } +#define A64_STUB2(name, a1, a2) void name(a64_codegen* cg, a1, a2) { (void)cg; } +#define A64_STUB3(name, a1, a2, a3) void name(a64_codegen* cg, a1, a2, a3) { (void)cg; } +#define A64_STUB4(name, a1, a2, a3, a4) void name(a64_codegen* cg, a1, a2, a3, a4) { (void)cg; } +#define A64_STUB1_INT(name, a1) int name(a64_codegen* cg, a1) { (void)cg; return 0; } +#define A64_STUB3_INT(name, a1, a2, a3) int name(a64_codegen* cg, a1, a2, a3) { (void)cg; return 0; } + +A64_STUB2(a64_mov_w_imm, unsigned wd, uint32_t imm) +A64_STUB2(a64_mov_x_imm, unsigned xd, uint64_t imm) +A64_STUB2(a64_mov_w_reg, unsigned wd, unsigned wm) +A64_STUB2(a64_mov_x_reg, unsigned xd, unsigned xm) +A64_STUB1(a64_mov_x_sp, unsigned xd) +A64_STUB2(a64_movp2r, unsigned xd, const void* ptr) +A64_STUB2(a64_movp2r_pool, unsigned xd, const void* ptr) +A64_STUB3(a64_add_w_imm, unsigned wd, unsigned wn, uint32_t imm) +A64_STUB3(a64_sub_w_imm, unsigned wd, unsigned wn, uint32_t imm) +A64_STUB3(a64_add_x_imm, unsigned xd, unsigned xn, uint32_t imm) +A64_STUB3(a64_sub_x_imm, unsigned xd, unsigned xn, uint32_t imm) +A64_STUB3_INT(a64_try_add_w_imm, unsigned wd, unsigned wn, uint32_t imm) +A64_STUB3_INT(a64_try_sub_w_imm, unsigned wd, unsigned wn, uint32_t imm) +A64_STUB3_INT(a64_try_add_x_imm, unsigned xd, unsigned xn, uint32_t imm) +A64_STUB3_INT(a64_try_sub_x_imm, unsigned xd, unsigned xn, uint32_t imm) +A64_STUB3(a64_add_w_reg, unsigned wd, unsigned wn, unsigned wm) +A64_STUB3(a64_sub_w_reg, unsigned wd, unsigned wn, unsigned wm) +A64_STUB3(a64_add_x_reg, unsigned xd, unsigned xn, unsigned xm) +A64_STUB3(a64_sub_x_reg, unsigned xd, unsigned xn, unsigned xm) +A64_STUB3(a64_adds_w_imm,unsigned wd, unsigned wn, uint32_t imm) +A64_STUB3(a64_subs_w_imm,unsigned wd, unsigned wn, uint32_t imm) +A64_STUB3(a64_adds_w_reg,unsigned wd, unsigned wn, unsigned wm) +A64_STUB3(a64_subs_w_reg,unsigned wd, unsigned wn, unsigned wm) +A64_STUB3(a64_ands_w_reg,unsigned wd, unsigned wn, unsigned wm) +A64_STUB2(a64_cmp_w_imm, unsigned wn, uint32_t imm) +A64_STUB2(a64_cmp_w_reg, unsigned wn, unsigned wm) +A64_STUB2(a64_tst_w_reg, unsigned wn, unsigned wm) +A64_STUB2(a64_tst_x_reg, unsigned xn, unsigned xm) +A64_STUB2(a64_cset_w, unsigned wd, unsigned cond) +A64_STUB4(a64_csel_w, unsigned wd, unsigned wn, unsigned wm, unsigned cond) +A64_STUB3_INT(a64_and_w_imm, unsigned wd, unsigned wn, uint32_t imm) +A64_STUB3_INT(a64_orr_w_imm, unsigned wd, unsigned wn, uint32_t imm) +A64_STUB3(a64_and_w_reg, unsigned wd, unsigned wn, unsigned wm) +A64_STUB3(a64_orr_w_reg, unsigned wd, unsigned wn, unsigned wm) +A64_STUB3(a64_eor_w_reg, unsigned wd, unsigned wn, unsigned wm) +A64_STUB3(a64_and_x_reg, unsigned xd, unsigned xn, unsigned xm) +A64_STUB3(a64_eor_x_reg, unsigned xd, unsigned xn, unsigned xm) +A64_STUB3(a64_bic_x_reg, unsigned xd, unsigned xn, unsigned xm) +A64_STUB4(a64_orr_w_reg_lsl, unsigned wd, unsigned wn, unsigned wm, unsigned shift) +A64_STUB3(a64_lsl_w_imm, unsigned wd, unsigned wn, unsigned shift) +A64_STUB3(a64_lsr_w_imm, unsigned wd, unsigned wn, unsigned shift) +A64_STUB3(a64_asr_w_imm, unsigned wd, unsigned wn, unsigned shift) +A64_STUB3(a64_ror_w_imm, unsigned wd, unsigned wn, unsigned shift) +A64_STUB3(a64_asr_w_reg, unsigned wd, unsigned wn, unsigned wm) +A64_STUB3(a64_lsl_x_imm, unsigned xd, unsigned xn, unsigned shift) +A64_STUB3(a64_lsr_x_imm, unsigned xd, unsigned xn, unsigned shift) +A64_STUB3(a64_asr_x_imm, unsigned xd, unsigned xn, unsigned shift) +A64_STUB4(a64_ubfx_w, unsigned wd, unsigned wn, unsigned lsb, unsigned width) +A64_STUB4(a64_sbfx_w, unsigned wd, unsigned wn, unsigned lsb, unsigned width) +A64_STUB4(a64_bfi_w, unsigned wd, unsigned wn, unsigned lsb, unsigned width) +A64_STUB4(a64_bfi_x, unsigned xd, unsigned xn, unsigned lsb, unsigned width) +A64_STUB2(a64_sxtw, unsigned xd, unsigned wn) +A64_STUB2(a64_clz_w, unsigned wd, unsigned wn) +A64_STUB3(a64_smull, unsigned xd, unsigned wn, unsigned wm) +A64_STUB2(a64_neg_w, unsigned wd, unsigned wm) +A64_STUB2(a64_cbz_w, unsigned wn, a64_label* l) +A64_STUB2(a64_cbnz_w, unsigned wn, a64_label* l) +A64_STUB3(a64_tbnz_w, unsigned wn, unsigned bit, a64_label* l) +A64_STUB1(a64_b, a64_label* l) +A64_STUB2(a64_b_cond, unsigned cond, a64_label* l) +A64_STUB1(a64_b_addr, const void* addr) +A64_STUB1(a64_br, unsigned xn) +A64_STUB1(a64_blr, unsigned xn) +void a64_ret(a64_codegen* cg) { (void)cg; } +A64_STUB3(a64_ldr_w_imm, unsigned wt, unsigned xn, uint32_t off) +A64_STUB3(a64_str_w_imm, unsigned wt, unsigned xn, uint32_t off) +A64_STUB3(a64_ldr_x_imm, unsigned xt, unsigned xn, uint32_t off) +A64_STUB3(a64_str_x_imm, unsigned xt, unsigned xn, uint32_t off) +A64_STUB3(a64_ldrsw_x_imm,unsigned xt, unsigned xn, uint32_t off) +A64_STUB3(a64_ldrh_w_imm, unsigned wt, unsigned xn, uint32_t off) +A64_STUB3(a64_strh_w_imm, unsigned wt, unsigned xn, uint32_t off) +A64_STUB3(a64_ldrb_w_imm, unsigned wt, unsigned xn, uint32_t off) +A64_STUB3(a64_strb_w_imm, unsigned wt, unsigned xn, uint32_t off) +A64_STUB3(a64_ldur_w, unsigned wt, unsigned xn, int off) +A64_STUB3(a64_stur_w, unsigned wt, unsigned xn, int off) +A64_STUB3(a64_ldr_w_reg, unsigned wt, unsigned xn, unsigned xm) +A64_STUB3(a64_str_w_reg, unsigned wt, unsigned xn, unsigned xm) +A64_STUB3(a64_ldrh_w_reg, unsigned wt, unsigned xn, unsigned xm) +A64_STUB3(a64_strh_w_reg, unsigned wt, unsigned xn, unsigned xm) +A64_STUB4(a64_ldr_w_idx_lsl, unsigned wt, unsigned xn, unsigned xm, unsigned shift) +A64_STUB4(a64_str_w_idx_lsl, unsigned wt, unsigned xn, unsigned xm, unsigned shift) +A64_STUB4(a64_ldr_x_idx_lsl, unsigned xt, unsigned xn, unsigned xm, unsigned shift) +A64_STUB4(a64_ldr_w_uxtw, unsigned wt, unsigned xn, unsigned wm, unsigned shift) +A64_STUB4(a64_str_w_uxtw, unsigned wt, unsigned xn, unsigned wm, unsigned shift) +A64_STUB4(a64_ldrh_w_uxtw, unsigned wt, unsigned xn, unsigned wm, unsigned shift) +A64_STUB4(a64_strh_w_uxtw, unsigned wt, unsigned xn, unsigned wm, unsigned shift) +A64_STUB3(a64_stp_x_pre, unsigned xt1, unsigned xt2, int off) +A64_STUB3(a64_ldp_x_post, unsigned xt1, unsigned xt2, int off) +A64_STUB4(a64_stp_x_off, unsigned xt1, unsigned xt2, unsigned xn, int off) +A64_STUB4(a64_ldp_x_off, unsigned xt1, unsigned xt2, unsigned xn, int off) +void a64_nop(a64_codegen* cg) { (void)cg; } + +#else /* AArch64 + Linux */ + +#include + +/* ==================================================================== + * Section 2 -- CodeGenerator state. + * ==================================================================== */ + +struct a64_pool_ref { + ptrdiff_t wb_off; /* byte offset of LDR site from base */ + unsigned entry; /* index into pool_values[] */ +}; + +struct a64_codegen { + uint32_t* base; /* base of the mmap'd region */ + uint32_t* wp; /* current write pointer (always within [base, end]) */ + size_t size; /* bytes in the region */ + + /* Embedded 64-bit constant pool. */ + uint64_t pool_values[A64_POOL_MAX_ENTRIES]; + unsigned pool_count; + struct a64_pool_ref pool_refs[A64_POOL_MAX_REFS]; + unsigned pool_ref_count; +}; + +a64_codegen* a64_codegen_create(size_t bytes) +{ + a64_codegen* cg; + void* mem; + + cg = (a64_codegen*)calloc(1, sizeof *cg); + if(!cg) return NULL; + + mem = mmap(NULL, bytes, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANON | MAP_PRIVATE, -1, 0); + if(mem == MAP_FAILED) { + free(cg); + return NULL; + } + + cg->base = (uint32_t*)mem; + cg->wp = (uint32_t*)mem; + cg->size = bytes; + return cg; +} + +void a64_codegen_destroy(a64_codegen* cg) +{ + if(!cg) return; + if(cg->base) munmap(cg->base, cg->size); + free(cg); +} + +void* a64_codegen_base (const a64_codegen* cg) { return cg ? (void*)cg->base : NULL; } +void* a64_codegen_wptr (const a64_codegen* cg) { return cg ? (void*)cg->wp : NULL; } +size_t a64_codegen_capacity(const a64_codegen* cg) { return cg ? cg->size : 0u; } + +size_t a64_codegen_offset(const a64_codegen* cg) +{ + if(!cg) return 0; + return (size_t)((char*)cg->wp - (char*)cg->base); +} + +size_t a64_codegen_remaining(const a64_codegen* cg) +{ + size_t used; + if(!cg) return 0; + used = (size_t)((char*)cg->wp - (char*)cg->base); + return (used <= cg->size) ? (cg->size - used) : 0u; +} + +void a64_codegen_set_wptr(a64_codegen* cg, void* p) +{ + if(cg) cg->wp = (uint32_t*)p; +} + +void* a64_codegen_save(const a64_codegen* cg) +{ + return cg ? (void*)cg->wp : NULL; +} + +void a64_codegen_restore(a64_codegen* cg, void* p) +{ + if(cg) cg->wp = (uint32_t*)p; +} + +/* + * Architectural icache invalidation for ARMv8: clean each D-cache line + * to PoU, then invalidate each I-cache line to PoU. The CTR_EL0 read + * gives us per-line size; we cache the floor across calls. + */ +void a64_codegen_invalidate(a64_codegen* cg, void* mem, size_t bytes) +{ + static size_t icache_line_size = 0x10000; + static size_t dcache_line_size = 0x10000; + uint64_t ctr; + size_t isize, dsize; + uintptr_t addr, end; + + (void)cg; + if(!mem || !bytes) return; + + __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr)); + dsize = (size_t)4u << ((ctr >> 16) & 0xfu); + isize = (size_t)4u << ((ctr >> 0) & 0xfu); + if(dsize < dcache_line_size) dcache_line_size = dsize; + if(isize < icache_line_size) icache_line_size = isize; + dsize = dcache_line_size; + isize = icache_line_size; + + end = (uintptr_t)mem + bytes; + + for(addr = ((uintptr_t)mem) & ~(dsize - 1); addr < end; addr += dsize) + __asm__ volatile("dc cvau, %0" :: "r"(addr) : "memory"); + __asm__ volatile("dsb ish" ::: "memory"); + + for(addr = ((uintptr_t)mem) & ~(isize - 1); addr < end; addr += isize) + __asm__ volatile("ic ivau, %0" :: "r"(addr) : "memory"); + __asm__ volatile("dsb ish\nisb" ::: "memory"); +} + +/* ==================================================================== + * Section 3 -- low-level emit + immediate encoders. + * ==================================================================== */ + +#define A64_REG(r) ((uint32_t)((r) & 0x1Fu)) + +/* Patch kinds for forward-branch resolution. */ +enum { + A64_PATCH_B = 0, /* imm26 at bits 25..0 (B / BL) */ + A64_PATCH_BCOND = 1, /* imm19 at bits 23..5 (B.cond / CBZ / CBNZ) */ + A64_PATCH_TBNZ = 2 /* imm14 at bits 18..5 (TBZ / TBNZ) */ +}; + +static void emit_w(a64_codegen* cg, uint32_t w) +{ + *cg->wp++ = w; +} + +static int sint_fits(int64_t v, unsigned bits) +{ + int64_t lo = -((int64_t)1 << (bits - 1)); + int64_t hi = ((int64_t)1 << (bits - 1)) - 1; + return v >= lo && v <= hi; +} + +/* + * Encode `imm` as a 12-bit AddSubImm value with optional shift-by-12. + * Returns 1 on success and writes (shift22|imm10) into *out, where + * shift22 is the bit-22 marker (0 or (1<<22)) and imm10 is the 12-bit + * value left-shifted into bits 21..10. + */ +static int encode_addsub_imm(uint32_t imm, uint32_t* out) +{ + if(imm <= 0xFFFu) { + *out = imm << 10; + return 1; + } + if((imm & 0xFFFu) == 0u && (imm >> 12) <= 0xFFFu) { + *out = ((uint32_t)1u << 22) | ((imm >> 12) << 10); + return 1; + } + return 0; +} + +/* + * Encode a 64-bit value as a 13-bit (N|immr|imms) logical-immediate + * bit-pattern. Returns 1 on success and writes the field into *out. + * Algorithm matches the ARMv8-A "DecodeBitMasks" pseudocode inverse. + */ +static int encode_logical_imm64(uint64_t imm, uint32_t* out) +{ + uint64_t imm_low; + unsigned size, ones; + unsigned R; + unsigned int popc; + uint64_t pattern; + uint64_t rot, mask; + unsigned half; + uint64_t lo, hi; + uint32_t N, immr, imms; + + if(imm == 0u || imm == ~(uint64_t)0u) return 0; + + imm_low = imm; + size = 64; + while(size > 2u) { + half = size >> 1; + if(half == 64u) mask = ~(uint64_t)0u; + else mask = ((uint64_t)1u << half) - 1u; + lo = imm_low & mask; + hi = (imm_low >> half) & mask; + if(lo != hi) break; + imm_low = lo; + size = half; + } + + popc = 0u; + { + uint64_t v = imm_low; + while(v) { + popc += (unsigned)(v & 1u); + v >>= 1; + } + } + ones = popc; + if(ones == 0u || ones == size) return 0; + + pattern = (ones >= 64u) ? ~(uint64_t)0u : (((uint64_t)1u << ones) - 1u); + if(size == 64u) mask = ~(uint64_t)0u; + else mask = ((uint64_t)1u << size) - 1u; + + /* Find R such that ROL(pattern, R) == imm_low (i.e., R is the + * canonical immr field). */ + for(R = 0u; R < size; ++R) { + if(R == 0u) rot = pattern & mask; + else rot = ((pattern << R) | (pattern >> (size - R))) & mask; + if(rot == imm_low) break; + } + if(R == size) return 0; + + immr = R; + if(size == 64u) { + N = 1u; + imms = ones - 1u; + } else { + /* For element size 2^k (k=1..5), imms[5:k+1] = all-1, imms[k] = 0, + * imms[k-1:0] = ones-1. Equivalent to ((~(size-1)) << 1 | (ones-1)) + * masked to 6 bits. The naked `~(size-1)` form (no shift) is a + * common off-by-one trap -- it puts imms[k] = 1 and makes + * DecodeBitMasks() return UNDEFINED. */ + N = 0u; + imms = (((~(unsigned)(size - 1u)) << 1) | (ones - 1u)) & 0x3Fu; + } + *out = (N << 12) | (immr << 6) | imms; + return 1; +} + +static int encode_logical_imm32(uint32_t imm, uint32_t* out) +{ + /* 32-bit form requires N=0; replicate imm to 64 bits and reuse. */ + uint64_t imm64 = ((uint64_t)imm << 32) | imm; + uint32_t enc; + if(!encode_logical_imm64(imm64, &enc)) return 0; + if((enc >> 12) & 1u) return 0; /* N must be 0 for 32-bit form */ + *out = enc; + return 1; +} + +int a64_can_encode_logical_imm32(uint32_t imm) +{ + uint32_t enc; + return encode_logical_imm32(imm, &enc); +} + +int a64_can_encode_logical_imm64(uint64_t imm) +{ + uint32_t enc; + return encode_logical_imm64(imm, &enc); +} + +int a64_can_encode_addsub_imm(uint32_t imm) +{ + uint32_t fld; + return encode_addsub_imm(imm, &fld); +} + +/* + * MovImm16 validity: imm fits in 16 bits left-shifted by 0/16 (W) or + * 0/16/32/48 (X). Writes the (hw, imm16) pair into *out_hw / *out_imm + * on success. + */ +static int try_mov_imm16(uint64_t imm, int is_x, unsigned* out_hw, uint32_t* out_imm) +{ + unsigned max_hw = is_x ? 4u : 2u; + unsigned hw; + for(hw = 0; hw < max_hw; ++hw) { + uint64_t mask16 = (uint64_t)0xFFFFu << (hw * 16u); + if((imm & ~mask16) == 0u) { + *out_hw = hw; + *out_imm = (uint32_t)((imm >> (hw * 16u)) & 0xFFFFu); + return 1; + } + } + return 0; +} + +/* ==================================================================== + * Section 4 -- labels. + * ==================================================================== */ + +void a64_label_reset(a64_label* l) +{ + if(l) memset(l, 0, sizeof *l); +} + +static void label_add_patch(a64_label* lbl, ptrdiff_t wb_off, unsigned kind) +{ + assert(lbl->patch_count < A64_LABEL_MAX_PATCHES); + lbl->patches[lbl->patch_count].wb_off = wb_off; + lbl->patches[lbl->patch_count].kind = kind; + lbl->patch_count++; +} + +void a64_label_bind(a64_codegen* cg, a64_label* lbl) +{ + unsigned i; + ptrdiff_t target; + + assert(!lbl->bound); + target = (char*)cg->wp - (char*)cg->base; + lbl->target_off = target; + lbl->bound = 1; + + for(i = 0; i < lbl->patch_count; ++i) { + ptrdiff_t wb = lbl->patches[i].wb_off; + unsigned kind = lbl->patches[i].kind; + int64_t delta = (int64_t)((target - wb) >> 2); + uint32_t* p = (uint32_t*)((char*)cg->base + wb); + + switch(kind) { + case A64_PATCH_B: + assert(sint_fits(delta, 26)); + *p |= (uint32_t)((uint64_t)delta & 0x3FFFFFFu); + break; + case A64_PATCH_BCOND: + assert(sint_fits(delta, 19)); + *p |= (uint32_t)(((uint64_t)delta & 0x7FFFFu) << 5); + break; + case A64_PATCH_TBNZ: + assert(sint_fits(delta, 14)); + *p |= (uint32_t)(((uint64_t)delta & 0x3FFFu) << 5); + break; + default: + assert(0); + } + } + lbl->patch_count = 0; +} + +/* ==================================================================== + * Section 5 -- instruction emitters. + * + * Each function builds the 32-bit instruction word from a base opcode + * pattern plus register and immediate fields, then appends it via + * emit_w(). Encoding references the ARMv8-A Reference Manual. + * ==================================================================== */ + +/* --- MOV / pointer materialisation -------------------------------- */ + +void a64_mov_w_imm(a64_codegen* cg, unsigned wd, uint32_t imm) +{ + unsigned hw; + uint32_t imm16; + uint32_t enc; + + if(A64_REG(wd) == 31u) return; /* MOV WZR, imm : no-op */ + + if(try_mov_imm16(imm, 0, &hw, &imm16)) { + /* MOVZ Wd, #imm16, LSL #(hw*16) */ + emit_w(cg, 0x52800000u | ((uint32_t)hw << 21) | (imm16 << 5) | A64_REG(wd)); + return; + } + if(try_mov_imm16((uint32_t)~imm, 0, &hw, &imm16)) { + /* MOVN Wd, #~imm16, LSL #(hw*16) */ + emit_w(cg, 0x12800000u | ((uint32_t)hw << 21) | (imm16 << 5) | A64_REG(wd)); + return; + } + if(encode_logical_imm32(imm, &enc)) { + /* ORR Wd, WZR, #imm */ + emit_w(cg, 0x32000000u | (enc << 10) | (31u << 5) | A64_REG(wd)); + return; + } + /* MOVZ low half + MOVK high half. */ + emit_w(cg, 0x52800000u | ((uint32_t)0u << 21) | ((imm & 0xFFFFu) << 5) | A64_REG(wd)); + emit_w(cg, 0x72800000u | ((uint32_t)1u << 21) | (((imm >> 16) & 0xFFFFu) << 5) | A64_REG(wd)); +} + +void a64_mov_x_imm(a64_codegen* cg, unsigned xd, uint64_t imm) +{ + unsigned hw; + uint32_t imm16; + uint32_t enc; + unsigned hword[4]; + unsigned zero_count; + unsigned ones_count; + unsigned filler; + uint32_t seed_op; + int movz_done; + unsigned i; + + if(A64_REG(xd) == 31u) return; + + if((imm >> 32) == 0u) { + a64_mov_w_imm(cg, xd, (uint32_t)imm); + return; + } + if(try_mov_imm16(imm, 1, &hw, &imm16)) { + emit_w(cg, 0xD2800000u | ((uint32_t)hw << 21) | (imm16 << 5) | A64_REG(xd)); + return; + } + if(try_mov_imm16(~imm, 1, &hw, &imm16)) { + emit_w(cg, 0x92800000u | ((uint32_t)hw << 21) | (imm16 << 5) | A64_REG(xd)); + return; + } + if(encode_logical_imm64(imm, &enc)) { + /* ORR Xd, XZR, #imm (N|immr|imms shifted into bits 22..10) */ + emit_w(cg, 0xB2000000u | (enc << 10) | (31u << 5) | A64_REG(xd)); + return; + } + + /* Multi-hword fallback. Pick MOVN-base instead of MOVZ-base when + * 0xFFFF hwords outnumber zero hwords: that lets the chain skip more + * MOVKs, since MOVN seeds all hwords to 0xFFFF and MOVZ seeds them to + * zero. Cuts MOVZ+3 MOVK to MOVN+1 MOVK for sign-extended-negative + * patterns common in MIPS-style decoded immediates. */ + zero_count = 0u; + ones_count = 0u; + for(i = 0; i < 4u; ++i) { + hword[i] = (unsigned)((imm >> (i * 16u)) & 0xFFFFu); + if(hword[i] == 0u) zero_count++; + else if(hword[i] == 0xFFFFu) ones_count++; + } + if(ones_count > zero_count) { + filler = 0xFFFFu; + seed_op = 0x92800000u; /* MOVN (x) */ + } else { + filler = 0u; + seed_op = 0xD2800000u; /* MOVZ (x) */ + } + + movz_done = 0; + for(i = 0; i < 4u; ++i) { + unsigned hw16 = hword[i]; + if(hw16 == filler) continue; + if(!movz_done) { + uint32_t v = (filler == 0xFFFFu) ? ((~hw16) & 0xFFFFu) : hw16; + emit_w(cg, seed_op | ((uint32_t)i << 21) | (v << 5) | A64_REG(xd)); + movz_done = 1; + } else { + emit_w(cg, 0xF2800000u | ((uint32_t)i << 21) | (hw16 << 5) | A64_REG(xd)); + } + } + if(!movz_done) /* Defensive: every hword matched the filler. All-zero + * and all-ones are caught above, so this should be + * unreachable -- keep as a no-surprises seed. */ + emit_w(cg, seed_op | A64_REG(xd)); +} + +void a64_mov_w_reg(a64_codegen* cg, unsigned wd, unsigned wm) +{ + /* ORR Wd, WZR, Wm */ + emit_w(cg, 0x2A0003E0u | (A64_REG(wm) << 16) | A64_REG(wd)); +} + +void a64_mov_x_reg(a64_codegen* cg, unsigned xd, unsigned xm) +{ + emit_w(cg, 0xAA0003E0u | (A64_REG(xm) << 16) | A64_REG(xd)); +} + +void a64_mov_x_sp(a64_codegen* cg, unsigned xd) +{ + /* ADD Xd_sp, SP, #0 */ + emit_w(cg, 0x91000000u | (31u << 5) | A64_REG(xd)); +} + +void a64_movp2r(a64_codegen* cg, unsigned xd, const void* ptr) +{ + uintptr_t here = (uintptr_t)cg->wp; + uintptr_t targ = (uintptr_t)ptr; + int64_t diff = (int64_t)(targ - here); + uintptr_t here_page, targ_page; + int64_t page_diff; + + /* Try ADR (+/-1 MiB). imm21 = signed 21-bit byte offset. */ + if(diff >= -((int64_t)1 << 20) && diff < ((int64_t)1 << 20)) { + uint32_t imm21 = (uint32_t)((uint64_t)diff & 0x1FFFFFu); + uint32_t immlo = imm21 & 0x3u; + uint32_t immhi = (imm21 >> 2) & 0x7FFFFu; + emit_w(cg, 0x10000000u | (immlo << 29) | (immhi << 5) | A64_REG(xd)); + return; + } + + /* Try ADRP+ADD (+/-4 GiB, 4 KiB-aligned page). */ + here_page = here & ~(uintptr_t)0xFFFu; + targ_page = targ & ~(uintptr_t)0xFFFu; + page_diff = ((int64_t)targ_page - (int64_t)here_page) >> 12; + if(page_diff >= -((int64_t)1 << 20) && page_diff < ((int64_t)1 << 20)) { + uint32_t imm21 = (uint32_t)((uint64_t)page_diff & 0x1FFFFFu); + uint32_t immlo = imm21 & 0x3u; + uint32_t immhi = (imm21 >> 2) & 0x7FFFFu; + emit_w(cg, 0x90000000u | (immlo << 29) | (immhi << 5) | A64_REG(xd)); + /* ADD Xd, Xd, #(targ & 0xFFF) */ + a64_add_x_imm(cg, xd, xd, (uint32_t)(targ & 0xFFFu)); + return; + } + + /* Fallback: full 64-bit immediate. */ + a64_mov_x_imm(cg, xd, (uint64_t)targ); +} + +/* --- Add/Sub (imm and reg) ---------------------------------------- */ + +static void emit_addsub_imm(a64_codegen* cg, uint32_t base, + unsigned rd, unsigned rn, uint32_t imm) +{ + uint32_t fld; + int ok = encode_addsub_imm(imm, &fld); + assert(ok); (void)ok; + emit_w(cg, base | fld | (A64_REG(rn) << 5) | A64_REG(rd)); +} + +void a64_add_w_imm(a64_codegen* cg, unsigned wd, unsigned wn, uint32_t imm) +{ emit_addsub_imm(cg, 0x11000000u, wd, wn, imm); } +void a64_sub_w_imm(a64_codegen* cg, unsigned wd, unsigned wn, uint32_t imm) +{ emit_addsub_imm(cg, 0x51000000u, wd, wn, imm); } +void a64_add_x_imm(a64_codegen* cg, unsigned xd, unsigned xn, uint32_t imm) +{ emit_addsub_imm(cg, 0x91000000u, xd, xn, imm); } +void a64_sub_x_imm(a64_codegen* cg, unsigned xd, unsigned xn, uint32_t imm) +{ emit_addsub_imm(cg, 0xD1000000u, xd, xn, imm); } + +static int try_emit_addsub_imm(a64_codegen* cg, uint32_t base, + unsigned rd, unsigned rn, uint32_t imm) +{ + uint32_t fld; + if(!encode_addsub_imm(imm, &fld)) return 0; + emit_w(cg, base | fld | (A64_REG(rn) << 5) | A64_REG(rd)); + return 1; +} + +int a64_try_add_w_imm(a64_codegen* cg, unsigned wd, unsigned wn, uint32_t imm) +{ return try_emit_addsub_imm(cg, 0x11000000u, wd, wn, imm); } +int a64_try_sub_w_imm(a64_codegen* cg, unsigned wd, unsigned wn, uint32_t imm) +{ return try_emit_addsub_imm(cg, 0x51000000u, wd, wn, imm); } +int a64_try_add_x_imm(a64_codegen* cg, unsigned xd, unsigned xn, uint32_t imm) +{ return try_emit_addsub_imm(cg, 0x91000000u, xd, xn, imm); } +int a64_try_sub_x_imm(a64_codegen* cg, unsigned xd, unsigned xn, uint32_t imm) +{ return try_emit_addsub_imm(cg, 0xD1000000u, xd, xn, imm); } + +void a64_adds_w_imm(a64_codegen* cg, unsigned wd, unsigned wn, uint32_t imm) +{ emit_addsub_imm(cg, 0x31000000u, wd, wn, imm); } +void a64_subs_w_imm(a64_codegen* cg, unsigned wd, unsigned wn, uint32_t imm) +{ emit_addsub_imm(cg, 0x71000000u, wd, wn, imm); } + +void a64_add_w_reg(a64_codegen* cg, unsigned wd, unsigned wn, unsigned wm) +{ emit_w(cg, 0x0B000000u | (A64_REG(wm) << 16) | (A64_REG(wn) << 5) | A64_REG(wd)); } +void a64_sub_w_reg(a64_codegen* cg, unsigned wd, unsigned wn, unsigned wm) +{ emit_w(cg, 0x4B000000u | (A64_REG(wm) << 16) | (A64_REG(wn) << 5) | A64_REG(wd)); } +void a64_add_x_reg(a64_codegen* cg, unsigned xd, unsigned xn, unsigned xm) +{ emit_w(cg, 0x8B000000u | (A64_REG(xm) << 16) | (A64_REG(xn) << 5) | A64_REG(xd)); } +void a64_sub_x_reg(a64_codegen* cg, unsigned xd, unsigned xn, unsigned xm) +{ emit_w(cg, 0xCB000000u | (A64_REG(xm) << 16) | (A64_REG(xn) << 5) | A64_REG(xd)); } + +void a64_adds_w_reg(a64_codegen* cg, unsigned wd, unsigned wn, unsigned wm) +{ emit_w(cg, 0x2B000000u | (A64_REG(wm) << 16) | (A64_REG(wn) << 5) | A64_REG(wd)); } +void a64_subs_w_reg(a64_codegen* cg, unsigned wd, unsigned wn, unsigned wm) +{ emit_w(cg, 0x6B000000u | (A64_REG(wm) << 16) | (A64_REG(wn) << 5) | A64_REG(wd)); } +void a64_ands_w_reg(a64_codegen* cg, unsigned wd, unsigned wn, unsigned wm) +{ emit_w(cg, 0x6A000000u | (A64_REG(wm) << 16) | (A64_REG(wn) << 5) | A64_REG(wd)); } + +void a64_cmp_w_imm(a64_codegen* cg, unsigned wn, uint32_t imm) +{ a64_subs_w_imm(cg, 31u, wn, imm); } +void a64_cmp_w_reg(a64_codegen* cg, unsigned wn, unsigned wm) +{ a64_subs_w_reg(cg, 31u, wn, wm); } +void a64_tst_w_reg(a64_codegen* cg, unsigned wn, unsigned wm) +{ a64_ands_w_reg(cg, 31u, wn, wm); } +void a64_tst_x_reg(a64_codegen* cg, unsigned xn, unsigned xm) +{ /* ANDS Xd=31, Xn, Xm */ + emit_w(cg, 0xEA000000u | (A64_REG(xm) << 16) | (A64_REG(xn) << 5) | 31u); +} + +void a64_cset_w(a64_codegen* cg, unsigned wd, unsigned cond) +{ + /* CSINC Wd, WZR, WZR, !cond */ + uint32_t inv = (cond ^ 1u) & 0xFu; + emit_w(cg, 0x1A9F07E0u | (inv << 12) | A64_REG(wd)); +} + +void a64_csel_w(a64_codegen* cg, unsigned wd, unsigned wn, unsigned wm, unsigned cond) +{ + emit_w(cg, 0x1A800000u | (A64_REG(wm) << 16) | ((cond & 0xFu) << 12) + | (A64_REG(wn) << 5) | A64_REG(wd)); +} + +/* --- Bitwise -------------------------------------------------------- */ + +int a64_and_w_imm(a64_codegen* cg, unsigned wd, unsigned wn, uint32_t imm) +{ + uint32_t enc; + if(!encode_logical_imm32(imm, &enc)) return 0; + emit_w(cg, 0x12000000u | (enc << 10) | (A64_REG(wn) << 5) | A64_REG(wd)); + return 1; +} + +int a64_orr_w_imm(a64_codegen* cg, unsigned wd, unsigned wn, uint32_t imm) +{ + uint32_t enc; + if(!encode_logical_imm32(imm, &enc)) return 0; + emit_w(cg, 0x32000000u | (enc << 10) | (A64_REG(wn) << 5) | A64_REG(wd)); + return 1; +} + +void a64_and_w_reg(a64_codegen* cg, unsigned wd, unsigned wn, unsigned wm) +{ emit_w(cg, 0x0A000000u | (A64_REG(wm) << 16) | (A64_REG(wn) << 5) | A64_REG(wd)); } +void a64_orr_w_reg(a64_codegen* cg, unsigned wd, unsigned wn, unsigned wm) +{ emit_w(cg, 0x2A000000u | (A64_REG(wm) << 16) | (A64_REG(wn) << 5) | A64_REG(wd)); } +void a64_eor_w_reg(a64_codegen* cg, unsigned wd, unsigned wn, unsigned wm) +{ emit_w(cg, 0x4A000000u | (A64_REG(wm) << 16) | (A64_REG(wn) << 5) | A64_REG(wd)); } +void a64_and_x_reg(a64_codegen* cg, unsigned xd, unsigned xn, unsigned xm) +{ emit_w(cg, 0x8A000000u | (A64_REG(xm) << 16) | (A64_REG(xn) << 5) | A64_REG(xd)); } +void a64_eor_x_reg(a64_codegen* cg, unsigned xd, unsigned xn, unsigned xm) +{ emit_w(cg, 0xCA000000u | (A64_REG(xm) << 16) | (A64_REG(xn) << 5) | A64_REG(xd)); } +void a64_bic_x_reg(a64_codegen* cg, unsigned xd, unsigned xn, unsigned xm) +{ emit_w(cg, 0x8A200000u | (A64_REG(xm) << 16) | (A64_REG(xn) << 5) | A64_REG(xd)); } + +void a64_orr_w_reg_lsl(a64_codegen* cg, unsigned wd, unsigned wn, unsigned wm, + unsigned shift) +{ + assert(shift < 32u); + emit_w(cg, 0x2A000000u | (A64_REG(wm) << 16) | (shift << 10) + | (A64_REG(wn) << 5) | A64_REG(wd)); +} + +/* --- Shifts (imm and reg) ------------------------------------------ */ + +void a64_lsl_w_imm(a64_codegen* cg, unsigned wd, unsigned wn, unsigned s) +{ + uint32_t immr = (32u - s) & 0x1Fu; + uint32_t imms = 31u - s; + emit_w(cg, 0x53000000u | (immr << 16) | (imms << 10) + | (A64_REG(wn) << 5) | A64_REG(wd)); +} +void a64_lsr_w_imm(a64_codegen* cg, unsigned wd, unsigned wn, unsigned s) +{ + emit_w(cg, 0x53000000u | ((s & 0x1Fu) << 16) | (31u << 10) + | (A64_REG(wn) << 5) | A64_REG(wd)); +} +void a64_asr_w_imm(a64_codegen* cg, unsigned wd, unsigned wn, unsigned s) +{ + emit_w(cg, 0x13000000u | ((s & 0x1Fu) << 16) | (31u << 10) + | (A64_REG(wn) << 5) | A64_REG(wd)); +} +void a64_ror_w_imm(a64_codegen* cg, unsigned wd, unsigned wn, unsigned s) +{ + /* EXTR Wd, Wn, Wn, #s */ + emit_w(cg, 0x13800000u | (A64_REG(wn) << 16) | ((s & 0x1Fu) << 10) + | (A64_REG(wn) << 5) | A64_REG(wd)); +} +void a64_asr_w_reg(a64_codegen* cg, unsigned wd, unsigned wn, unsigned wm) +{ + /* ASRV Wd, Wn, Wm */ + emit_w(cg, 0x1AC02800u | (A64_REG(wm) << 16) | (A64_REG(wn) << 5) | A64_REG(wd)); +} +void a64_lsl_x_imm(a64_codegen* cg, unsigned xd, unsigned xn, unsigned s) +{ + uint32_t immr = (64u - s) & 0x3Fu; + uint32_t imms = 63u - s; + emit_w(cg, 0xD3400000u | (immr << 16) | (imms << 10) + | (A64_REG(xn) << 5) | A64_REG(xd)); +} +void a64_lsr_x_imm(a64_codegen* cg, unsigned xd, unsigned xn, unsigned s) +{ + emit_w(cg, 0xD3400000u | ((s & 0x3Fu) << 16) | (63u << 10) + | (A64_REG(xn) << 5) | A64_REG(xd)); +} +void a64_asr_x_imm(a64_codegen* cg, unsigned xd, unsigned xn, unsigned s) +{ + emit_w(cg, 0x9340FC00u | ((s & 0x3Fu) << 16) + | (A64_REG(xn) << 5) | A64_REG(xd)); +} + +/* --- Bitfield ------------------------------------------------------ */ + +void a64_ubfx_w(a64_codegen* cg, unsigned wd, unsigned wn, unsigned lsb, unsigned width) +{ + emit_w(cg, 0x53000000u | ((lsb & 0x1Fu) << 16) | (((lsb + width - 1u) & 0x1Fu) << 10) + | (A64_REG(wn) << 5) | A64_REG(wd)); +} +void a64_sbfx_w(a64_codegen* cg, unsigned wd, unsigned wn, unsigned lsb, unsigned width) +{ + emit_w(cg, 0x13000000u | ((lsb & 0x1Fu) << 16) | (((lsb + width - 1u) & 0x1Fu) << 10) + | (A64_REG(wn) << 5) | A64_REG(wd)); +} +void a64_bfi_w(a64_codegen* cg, unsigned wd, unsigned wn, unsigned lsb, unsigned width) +{ + uint32_t immr = (32u - lsb) & 0x1Fu; + uint32_t imms = (width - 1u) & 0x1Fu; + emit_w(cg, 0x33000000u | (immr << 16) | (imms << 10) + | (A64_REG(wn) << 5) | A64_REG(wd)); +} +void a64_bfi_x(a64_codegen* cg, unsigned xd, unsigned xn, unsigned lsb, unsigned width) +{ + uint32_t immr = (64u - lsb) & 0x3Fu; + uint32_t imms = (width - 1u) & 0x3Fu; + emit_w(cg, 0xB3400000u | (immr << 16) | (imms << 10) + | (A64_REG(xn) << 5) | A64_REG(xd)); +} + +void a64_sxtw(a64_codegen* cg, unsigned xd, unsigned wn) +{ /* SBFM Xd, Xn, 0, 31 -> SXTW alias */ + emit_w(cg, 0x93407C00u | (A64_REG(wn) << 5) | A64_REG(xd)); +} + +void a64_clz_w(a64_codegen* cg, unsigned wd, unsigned wn) +{ + emit_w(cg, 0x5AC01000u | (A64_REG(wn) << 5) | A64_REG(wd)); +} + +/* --- Multiply / negate -------------------------------------------- */ + +void a64_smull(a64_codegen* cg, unsigned xd, unsigned wn, unsigned wm) +{ + /* SMADDL Xd, Wn, Wm, XZR */ + emit_w(cg, 0x9B207C00u | (A64_REG(wm) << 16) | (A64_REG(wn) << 5) | A64_REG(xd)); +} + +void a64_neg_w(a64_codegen* cg, unsigned wd, unsigned wm) +{ + emit_w(cg, 0x4B0003E0u | (A64_REG(wm) << 16) | A64_REG(wd)); +} + +/* --- Branches ----------------------------------------------------- */ + +/* + * Emit a forward / backward branch whose offset is either patched on + * bind (forward) or computed inline (backward). + * + * `inst_base` is the instruction with imm-field cleared. + */ +static void emit_branch(a64_codegen* cg, a64_label* lbl, + uint32_t inst_base, unsigned kind) +{ + if(lbl->bound) { + ptrdiff_t here = (char*)cg->wp - (char*)cg->base; + int64_t delta = (int64_t)((lbl->target_off - here) >> 2); + uint32_t imm; + switch(kind) { + case A64_PATCH_B: + assert(sint_fits(delta, 26)); + imm = (uint32_t)((uint64_t)delta & 0x3FFFFFFu); + break; + case A64_PATCH_BCOND: + assert(sint_fits(delta, 19)); + imm = (uint32_t)(((uint64_t)delta & 0x7FFFFu) << 5); + break; + case A64_PATCH_TBNZ: + assert(sint_fits(delta, 14)); + imm = (uint32_t)(((uint64_t)delta & 0x3FFFu) << 5); + break; + default: + imm = 0; + assert(0); + } + emit_w(cg, inst_base | imm); + } else { + ptrdiff_t wb = (char*)cg->wp - (char*)cg->base; + emit_w(cg, inst_base); + label_add_patch(lbl, wb, kind); + } +} + +void a64_b(a64_codegen* cg, a64_label* lbl) +{ emit_branch(cg, lbl, 0x14000000u, A64_PATCH_B); } + +void a64_b_cond(a64_codegen* cg, unsigned cond, a64_label* lbl) +{ emit_branch(cg, lbl, 0x54000000u | (cond & 0xFu), A64_PATCH_BCOND); } + +void a64_cbz_w(a64_codegen* cg, unsigned wn, a64_label* lbl) +{ emit_branch(cg, lbl, 0x34000000u | A64_REG(wn), A64_PATCH_BCOND); } + +void a64_cbnz_w(a64_codegen* cg, unsigned wn, a64_label* lbl) +{ emit_branch(cg, lbl, 0x35000000u | A64_REG(wn), A64_PATCH_BCOND); } + +void a64_tbnz_w(a64_codegen* cg, unsigned wn, unsigned bit, a64_label* lbl) +{ emit_branch(cg, lbl, 0x37000000u | ((bit & 0x1Fu) << 19) | A64_REG(wn), + A64_PATCH_TBNZ); } + +void a64_b_addr(a64_codegen* cg, const void* addr) +{ + int64_t delta = (int64_t)(((intptr_t)addr - (intptr_t)cg->wp) >> 2); + assert(sint_fits(delta, 26)); + emit_w(cg, 0x14000000u | ((uint32_t)((uint64_t)delta & 0x3FFFFFFu))); +} + +void a64_br (a64_codegen* cg, unsigned xn) { emit_w(cg, 0xD61F0000u | (A64_REG(xn) << 5)); } +void a64_blr(a64_codegen* cg, unsigned xn) { emit_w(cg, 0xD63F0000u | (A64_REG(xn) << 5)); } +void a64_ret(a64_codegen* cg) { emit_w(cg, 0xD65F0000u | (30u << 5)); } + +/* --- Loads / stores (imm offset) ---------------------------------- */ + +static void emit_ldst_imm12(a64_codegen* cg, uint32_t base, unsigned size_log2, + unsigned rt, unsigned rn, uint32_t off) +{ + uint32_t scaled = off >> size_log2; + assert((off & ((1u << size_log2) - 1u)) == 0u); + assert(scaled <= 0xFFFu); + emit_w(cg, base | (scaled << 10) | (A64_REG(rn) << 5) | A64_REG(rt)); +} + +void a64_ldr_w_imm (a64_codegen* cg, unsigned wt, unsigned xn, uint32_t off) +{ emit_ldst_imm12(cg, 0xB9400000u, 2u, wt, xn, off); } +void a64_str_w_imm (a64_codegen* cg, unsigned wt, unsigned xn, uint32_t off) +{ emit_ldst_imm12(cg, 0xB9000000u, 2u, wt, xn, off); } +void a64_ldr_x_imm (a64_codegen* cg, unsigned xt, unsigned xn, uint32_t off) +{ emit_ldst_imm12(cg, 0xF9400000u, 3u, xt, xn, off); } +void a64_str_x_imm (a64_codegen* cg, unsigned xt, unsigned xn, uint32_t off) +{ emit_ldst_imm12(cg, 0xF9000000u, 3u, xt, xn, off); } +void a64_ldrsw_x_imm(a64_codegen* cg, unsigned xt, unsigned xn, uint32_t off) +{ emit_ldst_imm12(cg, 0xB9800000u, 2u, xt, xn, off); } +void a64_ldrh_w_imm (a64_codegen* cg, unsigned wt, unsigned xn, uint32_t off) +{ emit_ldst_imm12(cg, 0x79400000u, 1u, wt, xn, off); } +void a64_strh_w_imm (a64_codegen* cg, unsigned wt, unsigned xn, uint32_t off) +{ emit_ldst_imm12(cg, 0x79000000u, 1u, wt, xn, off); } +void a64_ldrb_w_imm (a64_codegen* cg, unsigned wt, unsigned xn, uint32_t off) +{ emit_ldst_imm12(cg, 0x39400000u, 0u, wt, xn, off); } +void a64_strb_w_imm (a64_codegen* cg, unsigned wt, unsigned xn, uint32_t off) +{ emit_ldst_imm12(cg, 0x39000000u, 0u, wt, xn, off); } + +static void emit_ldst_unscaled(a64_codegen* cg, uint32_t base, + unsigned rt, unsigned rn, int off) +{ + uint32_t imm9; + assert(off >= -256 && off <= 255); + imm9 = (uint32_t)((uint32_t)off & 0x1FFu); + emit_w(cg, base | (imm9 << 12) | (A64_REG(rn) << 5) | A64_REG(rt)); +} + +void a64_ldur_w(a64_codegen* cg, unsigned wt, unsigned xn, int off) +{ emit_ldst_unscaled(cg, 0xB8400000u, wt, xn, off); } +void a64_stur_w(a64_codegen* cg, unsigned wt, unsigned xn, int off) +{ emit_ldst_unscaled(cg, 0xB8000000u, wt, xn, off); } + +/* --- Loads / stores (register index) ------------------------------ */ + +static void emit_ldst_reg(a64_codegen* cg, uint32_t base, unsigned size_log2, + unsigned rt, unsigned rn, unsigned rm, + unsigned option, unsigned shift) +{ + unsigned S; + if(shift == 0u) S = 0u; + else { assert(shift == size_log2); S = 1u; } + emit_w(cg, base | (A64_REG(rm) << 16) | (option << 13) | (S << 12) + | (A64_REG(rn) << 5) | A64_REG(rt)); +} + +/* LSL = option 011, UXTW = option 010 */ +void a64_ldr_w_reg(a64_codegen* cg, unsigned wt, unsigned xn, unsigned xm) +{ emit_ldst_reg(cg, 0xB8600800u, 2u, wt, xn, xm, 3u, 0u); } +void a64_str_w_reg(a64_codegen* cg, unsigned wt, unsigned xn, unsigned xm) +{ emit_ldst_reg(cg, 0xB8200800u, 2u, wt, xn, xm, 3u, 0u); } +void a64_ldrh_w_reg(a64_codegen* cg, unsigned wt, unsigned xn, unsigned xm) +{ emit_ldst_reg(cg, 0x78600800u, 1u, wt, xn, xm, 3u, 0u); } +void a64_strh_w_reg(a64_codegen* cg, unsigned wt, unsigned xn, unsigned xm) +{ emit_ldst_reg(cg, 0x78200800u, 1u, wt, xn, xm, 3u, 0u); } +void a64_ldr_w_idx_lsl(a64_codegen* cg, unsigned wt, unsigned xn, unsigned xm, unsigned shift) +{ emit_ldst_reg(cg, 0xB8600800u, 2u, wt, xn, xm, 3u, shift); } +void a64_str_w_idx_lsl(a64_codegen* cg, unsigned wt, unsigned xn, unsigned xm, unsigned shift) +{ emit_ldst_reg(cg, 0xB8200800u, 2u, wt, xn, xm, 3u, shift); } +void a64_ldr_x_idx_lsl(a64_codegen* cg, unsigned xt, unsigned xn, unsigned xm, unsigned shift) +{ emit_ldst_reg(cg, 0xF8600800u, 3u, xt, xn, xm, 3u, shift); } +void a64_ldr_w_uxtw(a64_codegen* cg, unsigned wt, unsigned xn, unsigned wm, unsigned shift) +{ emit_ldst_reg(cg, 0xB8600800u, 2u, wt, xn, wm, 2u, shift); } +void a64_str_w_uxtw(a64_codegen* cg, unsigned wt, unsigned xn, unsigned wm, unsigned shift) +{ emit_ldst_reg(cg, 0xB8200800u, 2u, wt, xn, wm, 2u, shift); } +void a64_ldrh_w_uxtw(a64_codegen* cg, unsigned wt, unsigned xn, unsigned wm, unsigned shift) +{ emit_ldst_reg(cg, 0x78600800u, 1u, wt, xn, wm, 2u, shift); } +void a64_strh_w_uxtw(a64_codegen* cg, unsigned wt, unsigned xn, unsigned wm, unsigned shift) +{ emit_ldst_reg(cg, 0x78200800u, 1u, wt, xn, wm, 2u, shift); } + +/* --- Pair load/store (X regs) ------------------------------------- */ + +static void emit_pair(a64_codegen* cg, uint32_t base, + unsigned rt, unsigned rt2, unsigned rn, int off) +{ + uint32_t imm7; + assert((off & 0x7) == 0); + assert(off >= -512 && off <= 504); + imm7 = (uint32_t)(((int32_t)off >> 3) & 0x7Fu); + emit_w(cg, base | (imm7 << 15) | (A64_REG(rt2) << 10) + | (A64_REG(rn) << 5) | A64_REG(rt)); +} + +void a64_stp_x_pre(a64_codegen* cg, unsigned xt1, unsigned xt2, int off) +{ emit_pair(cg, 0xA9800000u, xt1, xt2, 31u, off); } /* pre, L=0 */ +void a64_ldp_x_post(a64_codegen* cg, unsigned xt1, unsigned xt2, int off) +{ emit_pair(cg, 0xA8C00000u, xt1, xt2, 31u, off); } /* post, L=1 */ +void a64_stp_x_off(a64_codegen* cg, unsigned xt1, unsigned xt2, unsigned xn, int off) +{ emit_pair(cg, 0xA9000000u, xt1, xt2, xn, off); } /* signed off, L=0 */ +void a64_ldp_x_off(a64_codegen* cg, unsigned xt1, unsigned xt2, unsigned xn, int off) +{ emit_pair(cg, 0xA9400000u, xt1, xt2, xn, off); } /* signed off, L=1 */ + +void a64_nop(a64_codegen* cg) { emit_w(cg, 0xD503201Fu); } + +/* ==================================================================== + * Section 6 -- constant pool (deduplicated 64-bit values). + * + * Queue an LDR (literal) at emit time with imm19 left zero, remember + * (site, entry) in pool_refs[]. On flush, align the write pointer to + * 8 bytes, lay down each unique 64-bit value, then walk the refs and + * patch in the now-known imm19. The caller has to branch over the + * pool region before calling flush -- the pool is data, not code. + * ==================================================================== */ + +static unsigned pool_intern(a64_codegen* cg, uint64_t v) +{ + unsigned i; + for(i = 0; i < cg->pool_count; ++i) + if(cg->pool_values[i] == v) return i; + assert(cg->pool_count < A64_POOL_MAX_ENTRIES); + cg->pool_values[cg->pool_count] = v; + return cg->pool_count++; +} + +void a64_ldr_x_pool(a64_codegen* cg, unsigned xd, uint64_t value) +{ + unsigned idx; + ptrdiff_t wb; + + if(A64_REG(xd) == 31u) return; + idx = pool_intern(cg, value); + wb = (char*)cg->wp - (char*)cg->base; + + assert(cg->pool_ref_count < A64_POOL_MAX_REFS); + cg->pool_refs[cg->pool_ref_count].wb_off = wb; + cg->pool_refs[cg->pool_ref_count].entry = idx; + cg->pool_ref_count++; + + /* LDR Xt, label : imm19 left zero, patched at flush. */ + emit_w(cg, 0x58000000u | A64_REG(xd)); +} + +void a64_movp2r_pool(a64_codegen* cg, unsigned xd, const void* ptr) +{ + a64_ldr_x_pool(cg, xd, (uint64_t)(uintptr_t)ptr); +} + +unsigned a64_pool_pending(const a64_codegen* cg) +{ + return cg ? cg->pool_ref_count : 0u; +} + +void a64_pool_reset(a64_codegen* cg) +{ + if(!cg) return; + cg->pool_count = 0u; + cg->pool_ref_count = 0u; +} + +void a64_pool_flush(a64_codegen* cg) +{ + ptrdiff_t pool_off; + unsigned i; + + if(!cg || cg->pool_ref_count == 0u) { + if(cg) cg->pool_count = 0u; + return; + } + + /* Align pool start to 8 bytes (the LDR-X load width). Misaligned is + * permitted with SCTLR.A=0 but costs a split access; one NOP is cheap + * insurance. */ + if(((uintptr_t)cg->wp & 0x7u) != 0u) + a64_nop(cg); + + pool_off = (char*)cg->wp - (char*)cg->base; + + /* Emit the pool values as two 32-bit words each, little-endian. We + * use raw stores rather than memcpy so the bump-allocator invariant + * (one emit_w per 4 bytes) is preserved -- the cache-line layout is + * identical to what an aligned uint64_t store would produce. */ + for(i = 0; i < cg->pool_count; ++i) { + uint64_t v = cg->pool_values[i]; + emit_w(cg, (uint32_t)(v & 0xFFFFFFFFu)); + emit_w(cg, (uint32_t)(v >> 32)); + } + + /* Patch each LDR-literal site with the imm19 byte-offset/4 to its + * pool slot. Range check is identical to what a64_label_bind would + * do for an A64_PATCH_BCOND site (same field). */ + for(i = 0; i < cg->pool_ref_count; ++i) { + ptrdiff_t site_off = cg->pool_refs[i].wb_off; + unsigned entry = cg->pool_refs[i].entry; + ptrdiff_t slot_off = pool_off + (ptrdiff_t)entry * 8; + int64_t delta = (int64_t)((slot_off - site_off) >> 2); + uint32_t* p = (uint32_t*)((char*)cg->base + site_off); + assert(sint_fits(delta, 19)); + *p |= (uint32_t)(((uint64_t)delta & 0x7FFFFu) << 5); + } + + cg->pool_count = 0u; + cg->pool_ref_count = 0u; +} + +/* ==================================================================== + * Section 7 -- in-place branch patching. + * + * Used by higher-level JIT bookkeeping (block stitching, generational + * code-cache rollover) to redirect an already-emitted branch. The + * site is the byte address of the 32-bit instruction word; the imm + * field is cleared and rewritten. Out-of-range returns 0 so the + * caller can insert a trampoline. + * ==================================================================== */ + +static int patch_imm(void* site, const void* target, + unsigned bits, uint32_t field_mask, unsigned shift) +{ + uint32_t* p = (uint32_t*)site; + int64_t delta = (int64_t)(((intptr_t)target - (intptr_t)site) >> 2); + uint64_t field; + if(!sint_fits(delta, bits)) return 0; + field = ((uint64_t)delta & (((uint64_t)1u << bits) - 1u)) << shift; + *p = (*p & ~field_mask) | (uint32_t)field; + return 1; +} + +int a64_patch_b(void* site, const void* target) +{ return patch_imm(site, target, 26u, 0x03FFFFFFu, 0u); } + +int a64_patch_b_cond(void* site, const void* target) +{ return patch_imm(site, target, 19u, 0x00FFFFE0u, 5u); } + +int a64_patch_cbz(void* site, const void* target) +{ return patch_imm(site, target, 19u, 0x00FFFFE0u, 5u); } + +int a64_patch_tbz(void* site, const void* target) +{ return patch_imm(site, target, 14u, 0x0007FFE0u, 5u); } + +#endif /* AArch64 + Linux */ diff --git a/mednafen/ss/a64emit.h b/mednafen/ss/a64emit.h new file mode 100644 index 00000000..2e041fad --- /dev/null +++ b/mednafen/ss/a64emit.h @@ -0,0 +1,344 @@ +/******************************************************************************/ +/* Mednafen Sega Saturn Emulation Module */ +/******************************************************************************/ +/* a64emit.h - minimal AArch64 instruction emitter (C, integer-only) +** Copyright (C) 2026 pstef +*/ + +/* + * Replacement for the (much larger) C++ header-only `oaknut` library used + * by scu_dsp_jit_oaknut.cpp and scsp_dsp_jit_oaknut.cpp. This first cut + * is the strict minimum the two DSP JITs need: integer arithmetic / logic + * / bitfield / shift / compare-and-branch / load-store / pair, plus a + * mmap'd RWX code block with icache invalidation. No FP/SIMD, no atomics, + * no compiler-pass infrastructure -- those are explicit extensions for + * later (see the bottom of a64emit.c for a list of candidates). + * + * Register parameters are plain `unsigned`s -- 0..30 for general + * registers, 31 for WZR/XZR/WSP/SP (the same index encodes either WZR or + * SP depending on the AArch64 instruction form, which the emitter picks + * automatically based on which function is called). Conditions use the + * AArch64 4-bit encoding (also available as the A64_COND_* names below). + */ + +#ifndef MDFN_SS_A64EMIT_H +#define MDFN_SS_A64EMIT_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* SP/WZR/XZR all encode as register index 31. */ +#define A64_SP_INDEX 31u + +/* Condition codes (AArch64 encoding; matches oaknut::Cond order). */ +enum { + A64_COND_EQ = 0, A64_COND_NE = 1, + A64_COND_CS = 2, A64_COND_CC = 3, + A64_COND_MI = 4, A64_COND_PL = 5, + A64_COND_VS = 6, A64_COND_VC = 7, + A64_COND_HI = 8, A64_COND_LS = 9, + A64_COND_GE = 10, A64_COND_LT = 11, + A64_COND_GT = 12, A64_COND_LE = 13, + A64_COND_AL = 14, A64_COND_NV = 15 +}; + +/* Maximum forward-reference patches per label. Bumped only at first + * complaint -- our two DSP JITs use at most 3 patches per label. */ +#define A64_LABEL_MAX_PATCHES 8u + +/* + * Forward-branch label. + * + * POD. Zero-init before first use; reuse by calling a64_label_reset(). + * No move-only-with-vector dance like oaknut::Label; the consumer can + * declare an array of labels directly and reuse via memset/reset. + */ +typedef struct a64_label { + int bound; + ptrdiff_t target_off; + unsigned int patch_count; + struct { + ptrdiff_t wb_off; + unsigned int kind; + } patches[A64_LABEL_MAX_PATCHES]; +} a64_label; + +/* + * CodeGenerator handle. Owns one mmap'd RWX region + a write pointer. + * Opaque from the consumer's perspective; allocate with a64_codegen_create + * and free with a64_codegen_destroy. + */ +typedef struct a64_codegen a64_codegen; + +/* --- Code-segment lifecycle --------------------------------------- */ + +/* Allocate `bytes` of RWX memory and a code-generator pointing at its + * base. Returns NULL on failure (mmap returned MAP_FAILED). */ +a64_codegen* a64_codegen_create(size_t bytes); +void a64_codegen_destroy(a64_codegen*); + +void* a64_codegen_base (const a64_codegen*); +void* a64_codegen_wptr (const a64_codegen*); +size_t a64_codegen_offset (const a64_codegen*); +size_t a64_codegen_capacity(const a64_codegen*); + +/* Bytes between the current write pointer and the end of the segment. + * Useful before starting a block to decide if it will fit. */ +size_t a64_codegen_remaining(const a64_codegen*); + +/* Move the write pointer to `p` (which must point inside the segment). + * Outstanding labels are not affected -- the caller is responsible for + * resetting them. */ +void a64_codegen_set_wptr(a64_codegen*, void* p); + +/* Save / restore the write pointer. Sugar over wptr / set_wptr that + * expresses intent for two-ended segment growth: save the hot-side + * cursor, jump to the cold-side region to emit a stub, then restore. */ +void* a64_codegen_save (const a64_codegen*); +void a64_codegen_restore(a64_codegen*, void*); + +/* Call after emitting a region so the new code is safe to execute. */ +void a64_codegen_invalidate(a64_codegen*, void* p, size_t bytes); + +/* --- Labels ------------------------------------------------------- */ + +/* Discard any pending patches and clear the bound flag; equivalent to + * `memset(lbl, 0, sizeof *lbl)`. Calling this on a bound label whose + * branches have already been patched is harmless. */ +void a64_label_reset(a64_label*); + +/* Bind the label at the current write pointer and resolve every patch + * site that was queued before this point. Subsequent uses of the same + * label require a64_label_reset() first. */ +void a64_label_bind(a64_codegen*, a64_label*); + +/* --- Encodability predicates (no emit) ---------------------------- */ + +/* True iff `imm` is a valid 32/64-bit AArch64 logical immediate. + * The bitwise *_imm emitters below return the same value (0/1). */ +int a64_can_encode_logical_imm32(uint32_t imm); +int a64_can_encode_logical_imm64(uint64_t imm); + +/* True iff `imm` is a valid 12-bit AddSubImm value, optionally with + * shift-by-12. Pair with the a64_try_*_imm variants below to let the + * caller branch instead of asserting. */ +int a64_can_encode_addsub_imm(uint32_t imm); + +/* --- Instruction emitters ----------------------------------------- */ + +/* MOV / pointer materialisation. */ +void a64_mov_w_imm(a64_codegen*, unsigned wd, uint32_t imm); +void a64_mov_x_imm(a64_codegen*, unsigned xd, uint64_t imm); +void a64_mov_w_reg(a64_codegen*, unsigned wd, unsigned wm); +void a64_mov_x_reg(a64_codegen*, unsigned xd, unsigned xm); +/* MOV Xd_sp, SP : alias ADD Xd_sp, SP, #0. Needed for `MOV Xd, SP` + * since the register-form MOV alias only accepts ZR, not SP. */ +void a64_mov_x_sp (a64_codegen*, unsigned xd); +void a64_movp2r (a64_codegen*, unsigned xd, const void* ptr); + +/* Pool-backed pointer materialisation: emits a single LDR (literal) and + * queues `ptr` in the codegen's constant pool. The caller must invoke + * a64_pool_flush before the LDR can fall out of imm19 range (+/-1 MiB). + * For prologues that materialise several state pointers, this trades + * 1..4 instructions per call for one 4-byte load each. */ +void a64_movp2r_pool(a64_codegen*, unsigned xd, const void* ptr); + +/* Add/Sub. imm form: 12-bit unsigned + optional shift-by-12 (the + * encoder picks the shift automatically). reg form: shifted-reg with + * LSL #0. */ +void a64_add_w_imm(a64_codegen*, unsigned wd_sp, unsigned wn_sp, uint32_t imm); +void a64_sub_w_imm(a64_codegen*, unsigned wd_sp, unsigned wn_sp, uint32_t imm); +void a64_add_x_imm(a64_codegen*, unsigned xd_sp, unsigned xn_sp, uint32_t imm); +void a64_sub_x_imm(a64_codegen*, unsigned xd_sp, unsigned xn_sp, uint32_t imm); + +/* Return 1 and emit on success, 0 and emit nothing if `imm` doesn't + * fit the AddSubImm encoding (caller falls back to MOV+reg-form). */ +int a64_try_add_w_imm(a64_codegen*, unsigned wd_sp, unsigned wn_sp, uint32_t imm); +int a64_try_sub_w_imm(a64_codegen*, unsigned wd_sp, unsigned wn_sp, uint32_t imm); +int a64_try_add_x_imm(a64_codegen*, unsigned xd_sp, unsigned xn_sp, uint32_t imm); +int a64_try_sub_x_imm(a64_codegen*, unsigned xd_sp, unsigned xn_sp, uint32_t imm); +void a64_add_w_reg(a64_codegen*, unsigned wd, unsigned wn, unsigned wm); +void a64_sub_w_reg(a64_codegen*, unsigned wd, unsigned wn, unsigned wm); +void a64_add_x_reg(a64_codegen*, unsigned xd, unsigned xn, unsigned xm); +void a64_sub_x_reg(a64_codegen*, unsigned xd, unsigned xn, unsigned xm); + +/* Flag-setting; pair with a64_cset_w to materialise the flag. */ +void a64_adds_w_imm(a64_codegen*, unsigned wd, unsigned wn_sp, uint32_t imm); +void a64_subs_w_imm(a64_codegen*, unsigned wd, unsigned wn_sp, uint32_t imm); +void a64_adds_w_reg(a64_codegen*, unsigned wd, unsigned wn, unsigned wm); +void a64_subs_w_reg(a64_codegen*, unsigned wd, unsigned wn, unsigned wm); +void a64_ands_w_reg(a64_codegen*, unsigned wd, unsigned wn, unsigned wm); + +void a64_cmp_w_imm(a64_codegen*, unsigned wn_sp, uint32_t imm); +void a64_cmp_w_reg(a64_codegen*, unsigned wn, unsigned wm); +void a64_tst_w_reg(a64_codegen*, unsigned wn, unsigned wm); +void a64_tst_x_reg(a64_codegen*, unsigned xn, unsigned xm); + +void a64_cset_w(a64_codegen*, unsigned wd, unsigned cond); +void a64_csel_w(a64_codegen*, unsigned wd, unsigned wn, unsigned wm, unsigned cond); + +/* + * Bitwise immediate. + * + * Returns 1 on success, 0 if `imm` is not a valid logical immediate + * (in which case nothing is emitted). The two DSP JITs currently + * recover by falling back to MOV + reg-form AND/ORR. Callers can + * pre-query with a64_can_encode_logical_imm32 instead. + */ +int a64_and_w_imm(a64_codegen*, unsigned wd_sp, unsigned wn, uint32_t imm); +int a64_orr_w_imm(a64_codegen*, unsigned wd_sp, unsigned wn, uint32_t imm); + +void a64_and_w_reg(a64_codegen*, unsigned wd, unsigned wn, unsigned wm); +void a64_orr_w_reg(a64_codegen*, unsigned wd, unsigned wn, unsigned wm); +void a64_eor_w_reg(a64_codegen*, unsigned wd, unsigned wn, unsigned wm); +void a64_and_x_reg(a64_codegen*, unsigned xd, unsigned xn, unsigned xm); +void a64_eor_x_reg(a64_codegen*, unsigned xd, unsigned xn, unsigned xm); +void a64_bic_x_reg(a64_codegen*, unsigned xd, unsigned xn, unsigned xm); + +void a64_orr_w_reg_lsl(a64_codegen*, unsigned wd, unsigned wn, unsigned wm, + unsigned shift); + +/* Shifts. */ +void a64_lsl_w_imm(a64_codegen*, unsigned wd, unsigned wn, unsigned shift); +void a64_lsr_w_imm(a64_codegen*, unsigned wd, unsigned wn, unsigned shift); +void a64_asr_w_imm(a64_codegen*, unsigned wd, unsigned wn, unsigned shift); +void a64_ror_w_imm(a64_codegen*, unsigned wd, unsigned wn, unsigned shift); +void a64_asr_w_reg(a64_codegen*, unsigned wd, unsigned wn, unsigned wm); +void a64_lsl_x_imm(a64_codegen*, unsigned xd, unsigned xn, unsigned shift); +void a64_lsr_x_imm(a64_codegen*, unsigned xd, unsigned xn, unsigned shift); +void a64_asr_x_imm(a64_codegen*, unsigned xd, unsigned xn, unsigned shift); + +/* Bitfield. */ +void a64_ubfx_w(a64_codegen*, unsigned wd, unsigned wn, unsigned lsb, unsigned width); +void a64_sbfx_w(a64_codegen*, unsigned wd, unsigned wn, unsigned lsb, unsigned width); +void a64_bfi_w (a64_codegen*, unsigned wd, unsigned wn, unsigned lsb, unsigned width); +void a64_bfi_x (a64_codegen*, unsigned xd, unsigned xn, unsigned lsb, unsigned width); + +void a64_sxtw (a64_codegen*, unsigned xd, unsigned wn); +void a64_clz_w (a64_codegen*, unsigned wd, unsigned wn); + +void a64_smull(a64_codegen*, unsigned xd, unsigned wn, unsigned wm); +void a64_neg_w(a64_codegen*, unsigned wd, unsigned wm); + +/* Branches. Labels may be either unbound (forward branch, patched on + * bind) or already bound (backward branch, fully encoded here). */ +void a64_cbz_w (a64_codegen*, unsigned wn, a64_label*); +void a64_cbnz_w(a64_codegen*, unsigned wn, a64_label*); +void a64_tbnz_w(a64_codegen*, unsigned wn, unsigned bit, a64_label*); +void a64_b (a64_codegen*, a64_label*); +void a64_b_cond(a64_codegen*, unsigned cond, a64_label*); + +/* B / BL to an absolute address (must be within +/-128 MiB / +/-128 MiB + * respectively of the current write pointer). */ +void a64_b_addr (a64_codegen*, const void* addr); + +void a64_br (a64_codegen*, unsigned xn); +void a64_blr(a64_codegen*, unsigned xn); +void a64_ret(a64_codegen*); + +/* Loads/stores -- imm offset (unsigned scaled). */ +void a64_ldr_w_imm (a64_codegen*, unsigned wt, unsigned xn_sp, uint32_t off); +void a64_str_w_imm (a64_codegen*, unsigned wt, unsigned xn_sp, uint32_t off); +void a64_ldr_x_imm (a64_codegen*, unsigned xt, unsigned xn_sp, uint32_t off); +void a64_str_x_imm (a64_codegen*, unsigned xt, unsigned xn_sp, uint32_t off); +void a64_ldrsw_x_imm(a64_codegen*, unsigned xt, unsigned xn_sp, uint32_t off); +void a64_ldrh_w_imm (a64_codegen*, unsigned wt, unsigned xn_sp, uint32_t off); +void a64_strh_w_imm (a64_codegen*, unsigned wt, unsigned xn_sp, uint32_t off); +void a64_ldrb_w_imm (a64_codegen*, unsigned wt, unsigned xn_sp, uint32_t off); +void a64_strb_w_imm (a64_codegen*, unsigned wt, unsigned xn_sp, uint32_t off); + +/* Loads/stores -- signed-offset unscaled (LDUR/STUR). */ +void a64_ldur_w(a64_codegen*, unsigned wt, unsigned xn_sp, int off); +void a64_stur_w(a64_codegen*, unsigned wt, unsigned xn_sp, int off); + +/* Loads/stores -- register offset. `shift` is the LSL amount (0 or 2 + * for word, 0 or 3 for dword). *_reg variants emit LSL #0. */ +void a64_ldr_w_reg (a64_codegen*, unsigned wt, unsigned xn, unsigned xm); +void a64_str_w_reg (a64_codegen*, unsigned wt, unsigned xn, unsigned xm); +void a64_ldrh_w_reg (a64_codegen*, unsigned wt, unsigned xn, unsigned xm); +void a64_strh_w_reg (a64_codegen*, unsigned wt, unsigned xn, unsigned xm); +void a64_ldr_w_idx_lsl(a64_codegen*, unsigned wt, unsigned xn, unsigned xm, unsigned shift); +void a64_str_w_idx_lsl(a64_codegen*, unsigned wt, unsigned xn, unsigned xm, unsigned shift); +void a64_ldr_x_idx_lsl(a64_codegen*, unsigned xt, unsigned xn, unsigned xm, unsigned shift); + +/* Loads/stores -- 32-bit-extended index (Wm with UXTW). */ +void a64_ldr_w_uxtw (a64_codegen*, unsigned wt, unsigned xn, unsigned wm, unsigned shift); +void a64_str_w_uxtw (a64_codegen*, unsigned wt, unsigned xn, unsigned wm, unsigned shift); +void a64_ldrh_w_uxtw(a64_codegen*, unsigned wt, unsigned xn, unsigned wm, unsigned shift); +void a64_strh_w_uxtw(a64_codegen*, unsigned wt, unsigned xn, unsigned wm, unsigned shift); + +/* Pair load/store (X regs). */ +void a64_stp_x_pre (a64_codegen*, unsigned xt1, unsigned xt2, int off); +void a64_ldp_x_post(a64_codegen*, unsigned xt1, unsigned xt2, int off); +void a64_stp_x_off (a64_codegen*, unsigned xt1, unsigned xt2, unsigned xn_sp, int off); +void a64_ldp_x_off (a64_codegen*, unsigned xt1, unsigned xt2, unsigned xn_sp, int off); + +void a64_nop(a64_codegen*); + +/* --- Constant pool ------------------------------------------------ */ + +/* + * Embedded 64-bit constant pool. Values are deduplicated; identical + * a64_ldr_x_pool calls share one pool slot. + * + * a64_ldr_x_pool(cg, xd, value) + * -> emits LDR Xd, =value as a single LDR (literal) whose imm19 + * field is patched at flush time. + * + * a64_pool_flush(cg) + * -> aligns to 8 bytes (emitting one NOP if needed), emits every + * queued 64-bit value, then walks the LDR sites and rewrites + * their imm19 to point at the matching pool slot. Pool state + * is cleared on return. The caller is responsible for branching + * over the pool region (the pool is data, not code). + * + * a64_pool_reset(cg) + * -> drops queued entries without emitting. Use this when a + * consumer is rolling over a segment (SH-2 generational cache): + * unresolved LDR sites in the dead segment become unreachable. + * + * Pool capacity is fixed at compile time (see A64_POOL_MAX_*); the + * emitter asserts on overflow rather than silently spilling, so + * consumers know to flush more often. + */ +#define A64_POOL_MAX_ENTRIES 64u +#define A64_POOL_MAX_REFS 256u + +void a64_ldr_x_pool (a64_codegen*, unsigned xd, uint64_t value); +void a64_pool_flush (a64_codegen*); +void a64_pool_reset (a64_codegen*); +unsigned a64_pool_pending(const a64_codegen*); /* number of queued LDR sites */ + +/* --- In-place branch patching ------------------------------------- */ + +/* + * Rewrite the imm field of an already-emitted branch at `site` so it + * targets `target` (both absolute addresses). Returns 1 on success, 0 + * if the resulting offset overflows the encoding -- the caller can + * then insert a trampoline or fall back to an indirect. Callers must + * invalidate I-cache covering `site` afterwards. + * + * a64_patch_b : B / BL (imm26, +/-128 MiB) + * a64_patch_b_cond : B.cond (imm19, +/-1 MiB) + * a64_patch_cbz : CBZ/CBNZ (imm19, +/-1 MiB) + * a64_patch_tbz : TBZ/TBNZ (imm14, +/-32 KiB) + * + * These primitives are intentionally bare: they assume the caller has + * recorded `site` (the byte address of the instruction word) at emit + * time -- typically from a64_codegen_wptr just before the branch is + * emitted. They neither track nor invalidate any label state. + */ +int a64_patch_b (void* site, const void* target); +int a64_patch_b_cond (void* site, const void* target); +int a64_patch_cbz (void* site, const void* target); +int a64_patch_tbz (void* site, const void* target); + +#ifdef __cplusplus +} +#endif + +#endif /* MDFN_SS_A64EMIT_H */ diff --git a/mednafen/ss/ak93c45.c b/mednafen/ss/ak93c45.c index da298be7..76123e82 100644 --- a/mednafen/ss/ak93c45.c +++ b/mednafen/ss/ak93c45.c @@ -28,10 +28,10 @@ in the header. */ #include -#include #include +#include -#include +#include "../state.h" #include "ak93c45.h" diff --git a/mednafen/ss/ak93c45.h b/mednafen/ss/ak93c45.h index facd23b1..6780a7b8 100644 --- a/mednafen/ss/ak93c45.h +++ b/mednafen/ss/ak93c45.h @@ -23,14 +23,13 @@ #define __MDFN_SS_AK93C45_H #include -#ifndef __cplusplus -#include -#endif #include +#include -#include /* MDFN_COLD */ #include /* INLINE */ -#include + +#include "../mednafen-types.h" /* MDFN_COLD */ +#include "../state.h" /* The AK93C45 serial EEPROM (used by the ST-V cart hardware). Converted from a C++ class to a C struct + free functions; it had diff --git a/mednafen/ss/cart.c b/mednafen/ss/cart.c index 01b626cb..0058728e 100644 --- a/mednafen/ss/cart.c +++ b/mednafen/ss/cart.c @@ -29,8 +29,8 @@ #include #include -#include #include +#include #include diff --git a/mednafen/ss/cart.h b/mednafen/ss/cart.h index 342a262c..009469d8 100644 --- a/mednafen/ss/cart.h +++ b/mednafen/ss/cart.h @@ -23,13 +23,11 @@ #define __MDFN_SS_CART_H #include -#ifndef __cplusplus -#include -#endif +#include -#include /* MDFN_HIDE, MDFN_COLD */ -#include /* INLINE */ -#include +#include /* INLINE */ +#include "../mednafen-types.h" /* MDFN_HIDE, MDFN_COLD */ +#include "../state.h" /* Formerly relied on being a C++-only header. Now valid as C too, so cart.c and the cart/ device .c files can include it. diff --git a/mednafen/ss/cart/ar4mp.c b/mednafen/ss/cart/ar4mp.c index 72c180b5..7c332ffc 100644 --- a/mednafen/ss/cart/ar4mp.c +++ b/mednafen/ss/cart/ar4mp.c @@ -30,14 +30,14 @@ the std::unique_ptr ownership guard). */ #include -#include #include #include +#include #include -#include /* MDFN_HOT, MDFN_COLD, MDFN_UNLIKELY */ -#include /* SFORMAT, SFPTR16, SFEND, MDFNSS_StateAction */ +#include "../../mednafen-types.h" /* MDFN_HOT, MDFN_COLD, MDFN_UNLIKELY */ +#include "../../state.h" /* SFORMAT, SFPTR16, SFEND, MDFNSS_StateAction */ #include "../cart.h" #include "ar4mp.h" diff --git a/mednafen/ss/cart/ar4mp.h b/mednafen/ss/cart/ar4mp.h index 367a622c..e45d2385 100644 --- a/mednafen/ss/cart/ar4mp.h +++ b/mednafen/ss/cart/ar4mp.h @@ -23,11 +23,9 @@ #define __MDFN_SS_CART_AR4MP_H #include -#ifndef __cplusplus -#include -#endif +#include -#include /* MDFN_COLD */ +#include "../../mednafen-types.h" /* MDFN_COLD */ #include /* RFILE */ #ifdef __cplusplus diff --git a/mednafen/ss/cart/backup.c b/mednafen/ss/cart/backup.c index bfc71ea6..8011fdc4 100644 --- a/mednafen/ss/cart/backup.c +++ b/mednafen/ss/cart/backup.c @@ -25,11 +25,11 @@ functions, one read and one write. */ #include -#include #include +#include -#include /* MDFN_HOT, MDFN_COLD */ -#include /* SFORMAT, SFPTR8N, SFEND, MDFNSS_StateAction */ +#include "../../mednafen-types.h" /* MDFN_HOT, MDFN_COLD */ +#include "../../state.h" /* SFORMAT, SFPTR8N, SFEND, MDFNSS_StateAction */ #include "../cart.h" #include "backup.h" diff --git a/mednafen/ss/cart/backup.h b/mednafen/ss/cart/backup.h index ee8828bc..7cff11fa 100644 --- a/mednafen/ss/cart/backup.h +++ b/mednafen/ss/cart/backup.h @@ -23,11 +23,9 @@ #define __MDFN_SS_CART_BACKUP_H #include -#ifndef __cplusplus -#include -#endif +#include -#include /* MDFN_COLD */ +#include "../../mednafen-types.h" /* MDFN_COLD */ #ifdef __cplusplus extern "C" { diff --git a/mednafen/ss/cart/bootrom.c b/mednafen/ss/cart/bootrom.c index 448b06bf..1456b22c 100644 --- a/mednafen/ss/cart/bootrom.c +++ b/mednafen/ss/cart/bootrom.c @@ -25,18 +25,18 @@ calls. */ #include -#include #include #include #include +#include #include #include -#include /* MDFN_HOT, MDFN_COLD */ -#include /* round_up_pow2 */ -#include /* MDFNGameInfo */ -#include +#include "../../mednafen-types.h" /* MDFN_HOT, MDFN_COLD */ +#include "../../math_ops.h" /* round_up_pow2 */ +#include "../../mdfn_gameinfo.h" /* MDFNGameInfo */ +#include "../../hash/sha256.h" #include "../cart.h" #include "bootrom.h" #include "backup.h" diff --git a/mednafen/ss/cart/bootrom.h b/mednafen/ss/cart/bootrom.h index 6de71222..2286bb04 100644 --- a/mednafen/ss/cart/bootrom.h +++ b/mednafen/ss/cart/bootrom.h @@ -23,11 +23,9 @@ #define __MDFN_SS_CART_BOOTROM_H #include -#ifndef __cplusplus -#include -#endif +#include -#include /* MDFN_COLD */ +#include "../../mednafen-types.h" /* MDFN_COLD */ #include /* RFILE */ #ifdef __cplusplus diff --git a/mednafen/ss/cart/common.h b/mednafen/ss/cart/common.h index fd8021e9..73036e93 100644 --- a/mednafen/ss/cart/common.h +++ b/mednafen/ss/cart/common.h @@ -1,3 +1,3 @@ #include "../ss.h" #include "../cart.h" -#include +#include "../../mednafen.h" diff --git a/mednafen/ss/cart/cs1ram.c b/mednafen/ss/cart/cs1ram.c index 5dc102a6..555ed4ab 100644 --- a/mednafen/ss/cart/cs1ram.c +++ b/mednafen/ss/cart/cs1ram.c @@ -26,12 +26,12 @@ new[]/delete[] become calloc/free. */ #include -#include #include #include +#include -#include /* MDFN_HOT, MDFN_COLD */ -#include /* SFORMAT, SFPTR16N, SFEND, MDFNSS_StateAction */ +#include "../../mednafen-types.h" /* MDFN_HOT, MDFN_COLD */ +#include "../../state.h" /* SFORMAT, SFPTR16N, SFEND, MDFNSS_StateAction */ #include "../cart.h" #include "cs1ram.h" diff --git a/mednafen/ss/cart/cs1ram.h b/mednafen/ss/cart/cs1ram.h index e23ea688..fe2ee753 100644 --- a/mednafen/ss/cart/cs1ram.h +++ b/mednafen/ss/cart/cs1ram.h @@ -23,11 +23,9 @@ #define __MDFN_SS_CART_CS1RAM_H #include -#ifndef __cplusplus -#include -#endif +#include -#include /* MDFN_COLD */ +#include "../../mednafen-types.h" /* MDFN_COLD */ #ifdef __cplusplus extern "C" { diff --git a/mednafen/ss/cart/extram.c b/mednafen/ss/cart/extram.c index f49cc15a..cae67128 100644 --- a/mednafen/ss/cart/extram.c +++ b/mednafen/ss/cart/extram.c @@ -24,12 +24,12 @@ mask), so it monomorphizes to read16 / write8 / write16. */ #include -#include #include #include +#include -#include /* MDFN_HOT, MDFN_COLD */ -#include /* SFORMAT, SFPTR16N, SFEND, MDFNSS_StateAction */ +#include "../../mednafen-types.h" /* MDFN_HOT, MDFN_COLD */ +#include "../../state.h" /* SFORMAT, SFPTR16N, SFEND, MDFNSS_StateAction */ #include "../cart.h" #include "extram.h" diff --git a/mednafen/ss/cart/extram.h b/mednafen/ss/cart/extram.h index b21502dc..ae52297b 100644 --- a/mednafen/ss/cart/extram.h +++ b/mednafen/ss/cart/extram.h @@ -23,11 +23,9 @@ #define __MDFN_SS_CART_EXTRAM_H #include -#ifndef __cplusplus -#include -#endif +#include -#include /* MDFN_COLD */ +#include "../../mednafen-types.h" /* MDFN_COLD */ #ifdef __cplusplus extern "C" { diff --git a/mednafen/ss/cart/rom.c b/mednafen/ss/cart/rom.c index c275f541..166485ae 100644 --- a/mednafen/ss/cart/rom.c +++ b/mednafen/ss/cart/rom.c @@ -24,12 +24,12 @@ the c->CS01_SetRW8W16 member call becoming a free-function call. */ #include -#include #include /* memset (short-read zero-fill below) */ +#include #include -#include /* MDFN_HOT */ +#include "../../mednafen-types.h" /* MDFN_HOT */ #include "../cart.h" #include "rom.h" diff --git a/mednafen/ss/cart/rom.h b/mednafen/ss/cart/rom.h index 12a06915..8571d04f 100644 --- a/mednafen/ss/cart/rom.h +++ b/mednafen/ss/cart/rom.h @@ -23,11 +23,9 @@ #define __MDFN_SS_CART_ROM_H #include -#ifndef __cplusplus -#include -#endif +#include -#include /* MDFN_COLD */ +#include "../../mednafen-types.h" /* MDFN_COLD */ #include /* RFILE */ #ifdef __cplusplus diff --git a/mednafen/ss/cart/stv.c b/mednafen/ss/cart/stv.c index 572f69cf..a3d4c6f5 100644 --- a/mednafen/ss/cart/stv.c +++ b/mednafen/ss/cart/stv.c @@ -35,16 +35,16 @@ the C-includable db_stv.h. new[]/delete[] become calloc/free. */ #include -#include #include #include #include +#include #include #include -#include /* MDFN_HOT */ -#include /* SFORMAT, SFVAR, SFEND, MDFNSS_StateAction */ +#include "../../mednafen-types.h" /* MDFN_HOT */ +#include "../../state.h" /* SFORMAT, SFVAR, SFEND, MDFNSS_StateAction */ #include "../cart.h" #include "../db_stv.h" #include "stv.h" diff --git a/mednafen/ss/cart/stv.h b/mednafen/ss/cart/stv.h index 7b3589ca..efa2501b 100644 --- a/mednafen/ss/cart/stv.h +++ b/mednafen/ss/cart/stv.h @@ -23,11 +23,9 @@ #define __MDFN_SS_CART_STV_H #include -#ifndef __cplusplus -#include -#endif +#include -#include /* MDFN_COLD */ +#include "../../mednafen-types.h" /* MDFN_COLD */ #ifdef __cplusplus extern "C" { diff --git a/mednafen/ss/cdb.c b/mednafen/ss/cdb.c index 67eb83a3..08e1bde3 100644 --- a/mednafen/ss/cdb.c +++ b/mednafen/ss/cdb.c @@ -139,8 +139,8 @@ #include "sound.h" #include "cdb.h" -#include -#include +#include "../cdrom/CDUtility.h" +#include "../cdrom/cdromif.h" static void CheckBufPauseResume(void); static void StartSeek(const uint32_t cmd_target, const uint32_t cur_play_end, const uint32_t cur_play_repeat, const uint32_t play_end_irq_type, const bool no_pickup_change); diff --git a/mednafen/ss/cdb.h b/mednafen/ss/cdb.h index 96864c82..fc8d93b1 100644 --- a/mednafen/ss/cdb.h +++ b/mednafen/ss/cdb.h @@ -22,17 +22,13 @@ #ifndef __MDFN_SS_CDB_H #define __MDFN_SS_CDB_H -#include +#include "../state.h" /* MDFN_COLD / MDFN_HOT. Existing C++ TUs got these transitively * via ss.h / mednafen.h; for C consumers include them directly. */ -#include +#include "../mednafen-types.h" #include -/* C++ has 'bool' built in; C inclusion needs the stdbool keyword - * macros. */ -#ifndef __cplusplus -#include -#endif +#include #include "../cdrom/cdromif.h" diff --git a/mednafen/ss/db.c b/mednafen/ss/db.c index eb6788d3..f4c33ef1 100644 --- a/mednafen/ss/db.c +++ b/mednafen/ss/db.c @@ -26,9 +26,8 @@ with full cache emulation enabled. */ -#include +#include "../hash/crc.h" #include -#include #include "ss.h" #include "smpc.h" diff --git a/mednafen/ss/db_stv.h b/mednafen/ss/db_stv.h index 734ae184..f77708ef 100644 --- a/mednafen/ss/db_stv.h +++ b/mednafen/ss/db_stv.h @@ -23,9 +23,7 @@ #define __MDFN_SS_DB_STV_H #include -#ifndef __cplusplus -#include -#endif +#include /* The ST-V game-info structs and enums, factored out of db.h so they can be included from plain C. db.h itself is C++ (it pulls in diff --git a/mednafen/ss/jitdump.c b/mednafen/ss/jitdump.c new file mode 100644 index 00000000..39a8e730 --- /dev/null +++ b/mednafen/ss/jitdump.c @@ -0,0 +1,165 @@ +/******************************************************************************/ +/* Mednafen Sega Saturn Emulation Module */ +/******************************************************************************/ +/* jitdump.c - shared Linux perf jitdump writer for the DSP JITs +** Copyright (C) 2026 pstef +*/ + +#include "jitdump.h" + +#if defined(WANT_DSP_JIT_PERF_DUMP) && (defined(__aarch64__) || defined(__arm64__)) + +/* + * Perf jitdump writer. Produces /tmp/jit-.dump in the Linux perf + * jitdump v1 format (see kernel docs Documentation/admin-guide/perf/...). + * `perf record` captures the marker mmap, then `perf inject --jit` reads + * the dump and emits ELF stubs so `perf report` resolves samples landing + * in our code segment to per-slot symbols. + * + * Shared between the SCU and SCSP DSP JITs. Both backends compile under + * their respective subsystem locks on the same emulator thread, so the + * single fd / index counter don't need any explicit serialization here. + * The atexit handler fires after that thread exits. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define JITDUMP_MAGIC 0x4A695444u /* "JiTD" */ +#define JITDUMP_VERSION 1u +#define JIT_CODE_LOAD 0u +#define JIT_CODE_CLOSE 3u +#define ELF_MACH_AARCH64 183u + +struct JitdumpHeader +{ + uint32_t magic; + uint32_t version; + uint32_t total_size; + uint32_t elf_mach; + uint32_t pad1; + uint32_t pid; + uint64_t timestamp; + uint64_t flags; +}; + +struct JitdumpRecPrefix +{ + uint32_t id; + uint32_t total_size; + uint64_t timestamp; +}; + +struct JitdumpRecCodeLoad +{ + struct JitdumpRecPrefix p; + uint32_t pid; + uint32_t tid; + uint64_t vma; + uint64_t code_addr; + uint64_t code_size; + uint64_t code_index; + /* followed by NUL-terminated name, then code_size bytes of code */ +}; + +static int g_jitdump_fd = -1; +static void* g_jitdump_marker = NULL; +static size_t g_jitdump_marker_size = 0; +static uint64_t g_jitdump_index = 0; + +static uint64_t jitdump_now_ns(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec; +} + +static void jitdump_close(void) +{ + struct JitdumpRecPrefix close_rec = {0}; + if(g_jitdump_fd < 0) return; + close_rec.id = JIT_CODE_CLOSE; + close_rec.total_size = sizeof(close_rec); + close_rec.timestamp = jitdump_now_ns(); + (void)write(g_jitdump_fd, &close_rec, sizeof(close_rec)); + if(g_jitdump_marker) (void)munmap(g_jitdump_marker, g_jitdump_marker_size); + (void)close(g_jitdump_fd); + g_jitdump_fd = -1; + g_jitdump_marker = NULL; + g_jitdump_marker_size = 0; +} + +void SS_JitDump_Open(void) +{ + char path[64]; + int fd; + struct JitdumpHeader hdr = {0}; + long pagesz; + + if(g_jitdump_fd >= 0) return; + snprintf(path, sizeof(path), "/tmp/jit-%d.dump", (int)getpid()); + fd = open(path, O_CREAT | O_TRUNC | O_RDWR, 0644); + if(fd < 0) return; + + hdr.magic = JITDUMP_MAGIC; + hdr.version = JITDUMP_VERSION; + hdr.total_size = sizeof(hdr); + hdr.elf_mach = ELF_MACH_AARCH64; + hdr.pid = (uint32_t)getpid(); + hdr.timestamp = jitdump_now_ns(); + if(write(fd, &hdr, sizeof(hdr)) != (ssize_t)sizeof(hdr)) + { + (void)close(fd); + return; + } + + /* The marker mmap is what `perf record` sees in its MMAP events; that + * is how `perf inject --jit` discovers our dump file. One page of + * PROT_READ|PROT_EXEC at file offset 0 is the documented contract. */ + pagesz = sysconf(_SC_PAGESIZE); + g_jitdump_marker_size = (pagesz > 0) ? (size_t)pagesz : 4096u; + g_jitdump_marker = mmap(NULL, g_jitdump_marker_size, + PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0); + if(g_jitdump_marker == MAP_FAILED) g_jitdump_marker = NULL; + + g_jitdump_fd = fd; + atexit(jitdump_close); +} + +void SS_JitDump_Emit(const char* name, const void* code_addr, size_t code_size) +{ + size_t name_len; + struct JitdumpRecCodeLoad rec = {0}; + struct iovec iov[3]; + + if(g_jitdump_fd < 0 || !code_addr || code_size == 0) return; + + name_len = strlen(name) + 1; + rec.p.id = JIT_CODE_LOAD; + rec.p.total_size = (uint32_t)(sizeof(rec) + name_len + code_size); + rec.p.timestamp = jitdump_now_ns(); + rec.pid = (uint32_t)getpid(); + rec.tid = (uint32_t)syscall(SYS_gettid); + rec.vma = (uint64_t)(uintptr_t)code_addr; + rec.code_addr = rec.vma; + rec.code_size = code_size; + rec.code_index = ++g_jitdump_index; + + iov[0].iov_base = &rec; + iov[0].iov_len = sizeof(rec); + iov[1].iov_base = (char*)name; + iov[1].iov_len = name_len; + iov[2].iov_base = (void*)code_addr; + iov[2].iov_len = code_size; + (void)writev(g_jitdump_fd, iov, 3); +} + +#endif diff --git a/mednafen/ss/jitdump.h b/mednafen/ss/jitdump.h new file mode 100644 index 00000000..329e5d1c --- /dev/null +++ b/mednafen/ss/jitdump.h @@ -0,0 +1,43 @@ +/******************************************************************************/ +/* Mednafen Sega Saturn Emulation Module */ +/******************************************************************************/ +/* jitdump.h - shared Linux perf jitdump writer for the DSP JITs +** Copyright (C) 2026 pstef +*/ + +#ifndef __MDFN_SS_JITDUMP_H +#define __MDFN_SS_JITDUMP_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Single per-process jitdump stream shared by the SCU and SCSP DSP + * JITs. Without sharing, each compile unit would own its own + * O_TRUNC'd /tmp/jit-.dump fd and clobber the other's header. + * The implementation is in jitdump.c and is compiled only when + * WANT_DSP_JIT_PERF_DUMP is set and the target is aarch64; on every + * other configuration the stubs below collapse to no-ops at the call + * site so callers don't need their own guards. + */ +#if defined(WANT_DSP_JIT_PERF_DUMP) && (defined(__aarch64__) || defined(__arm64__)) + +void SS_JitDump_Open(void); +void SS_JitDump_Emit(const char* name, const void* code_addr, size_t code_size); + +#else + +static inline void SS_JitDump_Open(void) {} +static inline void SS_JitDump_Emit(const char* name, const void* code_addr, size_t code_size) +{ (void)name; (void)code_addr; (void)code_size; } + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/mednafen/ss/notes/build_sh7095s_ctable.c b/mednafen/ss/notes/build_sh7095s_ctable.c index 457fd3f0..8477c352 100644 --- a/mednafen/ss/notes/build_sh7095s_ctable.c +++ b/mednafen/ss/notes/build_sh7095s_ctable.c @@ -1,15 +1,53 @@ -#include - - -int main(int argc, char* argv[]) -{ -// printf("static_assert(__COUNTER__ <= 11025, \"Unexpected __COUNTER__\");\n"); - const unsigned base = 2*5000; //10000; - const unsigned max_entries = 512; //512; - for(int i = max_entries; i > 0; i--) - { - printf("#if __COUNTER__ >= %u\n", base + max_entries + 2); - printf(" &&Resume_%u,\n", base + i); - printf("#endif\n"); - } -} +/* Regenerator for sh7095s_ctable.inc and sh7095s_ctable_dm.inc. + * + * Both files are switch-case dispatch tables for the + * SH7095_RunSlaveUntil resume mechanism. They map a numeric + * resume_id (saved by the CHECK_EXIT_RESUME__ macro at the yield + * site) back to the corresponding `Resume_NNNN:` label inside + * the function body, via `case N: goto Resume_N;`. + * + * Pre-conversion, these were arrays of GCC `&&Resume_NNNN` label + * addresses for `goto *ptr;` computed-goto dispatch. See + * sh7095.inc's CHECK_EXIT_RESUME__ macro definition and + * sh7095s_rsu.inc's function-entry switch for the consumer side. + * + * Run: + * gcc -O2 build_sh7095s_ctable.c -o build_sh7095s_ctable + * ./build_sh7095s_ctable > ../sh7095s_ctable.inc + * ./build_sh7095s_ctable debug > ../sh7095s_ctable_dm.inc + * + * Range bounds (5001..5392, 10001..10392) match the static_assert + * invariants in sh7095_ops.inc that pin the __COUNTER__ values to + * the 5000 + 393 / 10000 + 393 base offsets. Update the +393 in + * both this generator and the matching assertion if the number of + * CHECK_EXIT_RESUME() expansions in the slave path changes. + */ +#include +#include + +int main(int argc, char* argv[]) +{ + int debug = (argc > 1 && !strcmp(argv[1], "debug")); + unsigned base = debug ? 10000 : 5000; + unsigned first = base + 1; + unsigned last = base + 392; + unsigned i; + + /* Header comment block. */ + printf("/* Switch-case dispatch entries for the %s SH7095_RunSlaveUntil%s\n", + debug ? "debug" : "non-debug", debug ? "_Debug" : ""); + printf(" * resume path. Numbered to match the __COUNTER__ values that\n"); + printf(" * CHECK_EXIT_RESUME() expansions assign as `Resume_NNNN:` labels.\n"); + printf(" *\n"); + printf(" * Range: %u .. %u (%u entries). Regenerate via\n", + first, last, last - first + 1); + printf(" * notes/build_sh7095s_ctable.c%s\n", debug ? " debug" : ""); + printf(" *\n"); + printf(" * Consumes ZERO __COUNTER__ values; the resume-id integers are\n"); + printf(" * compile-time constants in each `case` label. */\n"); + + for(i = first; i <= last; i++) + printf(" case %u: goto Resume_%u;\n", i, i); + + return 0; +} diff --git a/mednafen/ss/scsp.h b/mednafen/ss/scsp.h index 9d558c3a..90ec8203 100644 --- a/mednafen/ss/scsp.h +++ b/mednafen/ss/scsp.h @@ -19,7 +19,7 @@ ** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include +#include "../state.h" /* Phase-9a: class -> struct. Members formerly under `private:` * are now (implicitly) public, preparing for eventual C @@ -59,15 +59,23 @@ enum /* C-compat typedefs: in C the struct tag is not auto-aliased to a * type name, so a plain `SS_SCSP*` parameter at file scope fails to - * parse without an explicit typedef. Forward-declare all four + * parse without an explicit typedef. Forward-declare all five * tag-to-typename aliases up front so the struct bodies below can * reference each other and the function decls further down can * spell `SS_SCSP*` directly. */ typedef struct SS_SCSP_Slot SS_SCSP_Slot; +typedef struct SS_SCSP_Timer SS_SCSP_Timer; typedef struct SS_SCSP_DSPStep SS_SCSP_DSPStep; typedef struct SS_SCSP_DSPS SS_SCSP_DSPS; typedef struct SS_SCSP SS_SCSP; +/* Was an in-class const member with default initializer (C++11 only); + * hoisted to file scope so scsp.h parses as C too. Const + static + * means each TU gets its own copy that LTO folds away. */ +static const uint16_t SS_SCSP_SB_XOR_Table[4] = { + 0x0000, 0x7FFF, 0x8000, 0xFFFF +}; + struct SS_SCSP_Slot { uint32_t StartAddr; // 20 bits, memory address. @@ -242,6 +250,20 @@ struct SS_SCSP_DSPS bool MPROG_Dirty; }; +/* SS_SCSP_Timer -- file-scope so the SS_SCSP_Timer typedef at the + * top of this header resolves to the same type the SS_SCSP::Timers[3] + * field is declared with. Defining the struct inline inside SS_SCSP + * (as it used to be -- anonymous, then briefly named-but-nested) + * would create the nested type `SS_SCSP::SS_SCSP_Timer` in C++, + * which is a distinct type from the file-scope forward-declared + * `struct SS_SCSP_Timer` -- breaking pointer assignments. */ +struct SS_SCSP_Timer +{ + uint8_t Control; + uint8_t Counter; + int32_t Reload; +}; + struct SS_SCSP { /* Phase-8f: RunSample's `template` form @@ -283,8 +305,6 @@ struct SS_SCSP uint32_t LFSR; uint32_t GlobalCounter; - const uint16_t SB_XOR_Table[4] = { 0x0000, 0x7FFF, 0x8000, 0xFFFF }; - // // struct @@ -313,12 +333,7 @@ struct SS_SCSP uint8_t SCILV[3]; // // - struct - { - uint8_t Control; - uint8_t Counter; - int32_t Reload; - } Timers[3]; + SS_SCSP_Timer Timers[3]; // // // DMEA, DRGA, and DTLG are apparently not altered by executing DMA. @@ -335,7 +350,6 @@ struct SS_SCSP uint8_t RBP; uint8_t RBL; - // Carried-state write bitmask, plus the one read-side bit the liveness pass needs. // diff --git a/mednafen/ss/scsp.inc b/mednafen/ss/scsp.inc index 17368eab..4942c3d4 100644 --- a/mednafen/ss/scsp.inc +++ b/mednafen/ss/scsp.inc @@ -193,8 +193,17 @@ static INLINE void SS_SCSP_RecalcShortWaveMask(SS_SCSP* z, SS_SCSP_Slot* s) void SS_SCSP_Reset(SS_SCSP* z, bool powering_up) { +#ifdef WANT_JIT + if(setting_jit_scsp) + { + if(powering_up) + SCSP_DSP_JIT_Init(z); + else + SCSP_DSP_JIT_Reset(z); + } +#endif // - // May need to add a DecodeSlotReg() function or something similar if we implement + // May need to add a DecodeSlotReg() function or something similar if we implement // more aggressive slot register value optimizations on writes in the future. // memset(z->SlotRegs, 0, sizeof(z->SlotRegs)); @@ -1025,96 +1034,96 @@ INLINE void SS_SCSP_RW_u8_W1(SS_SCSP* z, uint32_t A, uint8_t* DBV_p) SCSP_NE16_RW(uint8_t, true, z->SlotRegs[slotnum], A & 0x1F, &(*DBV_p)); { - auto* s = &z->Slots[slotnum]; - uint16_t& SRV = z->SlotRegs[slotnum][(A >> 1) & 0xF]; + SS_SCSP_Slot* s = &z->Slots[slotnum]; + uint16_t* SRV = &z->SlotRegs[slotnum][(A >> 1) & 0xF]; switch((A >> 1) & 0xF) { case 0x00: - z->KeyExecute |= (bool)(SRV & 0x1000); - SRV &= 0x0FFF; + z->KeyExecute |= (bool)(*SRV & 0x1000); + *SRV &= 0x0FFF; - s->KeyBit = (SRV >> 11) & 0x1; - s->SBXOR = z->SB_XOR_Table[(SRV >> 9) & 0x3]; - s->SourceControl = (SRV >> 7) & 0x3; - s->LoopMode = (SRV >> 5) & 0x3; - s->WF8Bit = (SRV >> 4) & 0x1; - s->StartAddr = (s->StartAddr & 0xFFFF) | ((SRV & 0xF) << 16); + s->KeyBit = (*SRV >> 11) & 0x1; + s->SBXOR = SS_SCSP_SB_XOR_Table[(*SRV >> 9) & 0x3]; + s->SourceControl = (*SRV >> 7) & 0x3; + s->LoopMode = (*SRV >> 5) & 0x3; + s->WF8Bit = (*SRV >> 4) & 0x1; + s->StartAddr = (s->StartAddr & 0xFFFF) | ((*SRV & 0xF) << 16); break; case 0x01: - s->StartAddr = (s->StartAddr &~ 0xFFFF) | SRV; + s->StartAddr = (s->StartAddr &~ 0xFFFF) | *SRV; break; case 0x02: - s->LoopStart = SRV; + s->LoopStart = *SRV; break; case 0x03: - s->LoopEnd = SRV; + s->LoopEnd = *SRV; // SS_SCSP_RecalcShortWaveMask(z, s); break; case 0x04: - s->EnvRates[ENV_PHASE_ATTACK] = SRV & 0x1F; - s->AttackHold = (SRV >> 5) & 0x1; - s->EnvRates[ENV_PHASE_DECAY1] = (SRV >> 6) & 0x1F; - s->EnvRates[ENV_PHASE_DECAY2] = (SRV >> 11) & 0x1F; + s->EnvRates[ENV_PHASE_ATTACK] = *SRV & 0x1F; + s->AttackHold = (*SRV >> 5) & 0x1; + s->EnvRates[ENV_PHASE_DECAY1] = (*SRV >> 6) & 0x1F; + s->EnvRates[ENV_PHASE_DECAY2] = (*SRV >> 11) & 0x1F; break; case 0x05: - s->EnvRates[ENV_PHASE_RELEASE] = SRV & 0x1F; - s->DecayLevel = (SRV >> 5) & 0x1F; - s->KRS = (SRV >> 10) & 0xF; - s->AttackLoopLink = (SRV >> 14) & 0x1; - s->EGBypass = (SRV >> 15) & 0x1; + s->EnvRates[ENV_PHASE_RELEASE] = *SRV & 0x1F; + s->DecayLevel = (*SRV >> 5) & 0x1F; + s->KRS = (*SRV >> 10) & 0xF; + s->AttackLoopLink = (*SRV >> 14) & 0x1; + s->EGBypass = (*SRV >> 15) & 0x1; break; case 0x06: - SRV &= 0x0FFF; + *SRV &= 0x0FFF; - s->TotalLevel = SRV & 0xFF; - s->SoundDirect = (SRV >> 8) & 0x1; - s->StackWriteInhibit = (SRV >> 9) & 0x1; + s->TotalLevel = *SRV & 0xFF; + s->SoundDirect = (*SRV >> 8) & 0x1; + s->StackWriteInhibit = (*SRV >> 9) & 0x1; break; case 0x07: - s->ModInputY = SRV & 0x3F; - s->ModInputX = (SRV >> 6) & 0x3F; - s->ModLevel = (SRV >> 12) & 0xF; + s->ModInputY = *SRV & 0x3F; + s->ModInputX = (*SRV >> 6) & 0x3F; + s->ModLevel = (*SRV >> 12) & 0xF; break; case 0x08: - s->FreqNum = SRV & 0x7FF; - s->Octave = (SRV >> 11) & 0xF; - s->ShortWave = (SRV >> 15) & 0x1; + s->FreqNum = *SRV & 0x7FF; + s->Octave = (*SRV >> 11) & 0xF; + s->ShortWave = (*SRV >> 15) & 0x1; // SS_SCSP_RecalcShortWaveMask(z, s); break; case 0x09: - s->ALFOModLevel = SRV & 0x7; - s->ALFOWaveform = (SRV >> 3) & 0x3; - s->PLFOModLevel = (SRV >> 5) & 0x7; - s->PLFOWaveform = (SRV >> 8) & 0x3; - s->LFOFreq = (SRV >> 10) & 0x1F; - s->LFOReset = (SRV >> 15) & 0x1; + s->ALFOModLevel = *SRV & 0x7; + s->ALFOWaveform = (*SRV >> 3) & 0x3; + s->PLFOModLevel = (*SRV >> 5) & 0x7; + s->PLFOWaveform = (*SRV >> 8) & 0x3; + s->LFOFreq = (*SRV >> 10) & 0x1F; + s->LFOReset = (*SRV >> 15) & 0x1; break; case 0x0A: - SRV &= 0x00FF; - s->ToDSPLevel = SRV & 0x7; - s->ToDSPSelect = (SRV >> 3) & 0xF; + *SRV &= 0x00FF; + s->ToDSPLevel = *SRV & 0x7; + s->ToDSPSelect = (*SRV >> 3) & 0xF; break; case 0x0B: - SDL_PAN_ToVolume(s->DirectVolume, (SRV >> 13) & 0x7, (SRV >> 8) & 0x1F); - SDL_PAN_ToVolume(s->EffectVolume, (SRV >> 5) & 0x7, (SRV >> 0) & 0x1F); + SDL_PAN_ToVolume(s->DirectVolume, (*SRV >> 13) & 0x7, (*SRV >> 8) & 0x1F); + SDL_PAN_ToVolume(s->EffectVolume, (*SRV >> 5) & 0x7, (*SRV >> 0) & 0x1F); break; case 0x0C: case 0x0D: case 0x0E: case 0x0F: - SRV = 0; + *SRV = 0; break; } } @@ -1168,8 +1177,13 @@ INLINE void SS_SCSP_RW_u8_W1(SS_SCSP* z, uint32_t A, uint8_t* DBV_p) { uint16_t tmp = z->RBP | (z->RBL << 7); tmp = (tmp &~ mask) | (((*DBV_p) << shift) & mask); - z->RBP = tmp & 0x7F; - z->RBL = (tmp >> 7) & 0x3; + const uint8_t new_RBP = tmp & 0x7F; + const uint8_t new_RBL = (tmp >> 7) & 0x3; + // JIT folds RBL/RBP at emit time, so changes force a recompile. + if(new_RBL != z->RBL || new_RBP != z->RBP) + z->DSP.MPROG_Dirty = true; + z->RBP = new_RBP; + z->RBL = new_RBL; } break; @@ -1254,7 +1268,7 @@ INLINE void SS_SCSP_RW_u8_W1(SS_SCSP* z, uint32_t A, uint8_t* DBV_p) case 0x0D: // TIMB(W), TBCTL(W) case 0x0E: // TIMC(W), TCCTL(W) { - auto* t = &z->Timers[((A >> 1) & 0x1F) - 0x0C]; + SS_SCSP_Timer* t = &z->Timers[((A >> 1) & 0x1F) - 0x0C]; uint16_t tmp = (t->Control << 8); tmp = (tmp &~ mask) | (((*DBV_p) << shift) & mask); t->Control = (tmp >> 8) & 0x7; @@ -1496,96 +1510,96 @@ INLINE void SS_SCSP_RW_u16_W1(SS_SCSP* z, uint32_t A, uint16_t* DBV_p) SCSP_NE16_RW(uint16_t, true, z->SlotRegs[slotnum], A & 0x1F, &(*DBV_p)); { - auto* s = &z->Slots[slotnum]; - uint16_t& SRV = z->SlotRegs[slotnum][(A >> 1) & 0xF]; + SS_SCSP_Slot* s = &z->Slots[slotnum]; + uint16_t* SRV = &z->SlotRegs[slotnum][(A >> 1) & 0xF]; switch((A >> 1) & 0xF) { case 0x00: - z->KeyExecute |= (bool)(SRV & 0x1000); - SRV &= 0x0FFF; + z->KeyExecute |= (bool)(*SRV & 0x1000); + *SRV &= 0x0FFF; - s->KeyBit = (SRV >> 11) & 0x1; - s->SBXOR = z->SB_XOR_Table[(SRV >> 9) & 0x3]; - s->SourceControl = (SRV >> 7) & 0x3; - s->LoopMode = (SRV >> 5) & 0x3; - s->WF8Bit = (SRV >> 4) & 0x1; - s->StartAddr = (s->StartAddr & 0xFFFF) | ((SRV & 0xF) << 16); + s->KeyBit = (*SRV >> 11) & 0x1; + s->SBXOR = SS_SCSP_SB_XOR_Table[(*SRV >> 9) & 0x3]; + s->SourceControl = (*SRV >> 7) & 0x3; + s->LoopMode = (*SRV >> 5) & 0x3; + s->WF8Bit = (*SRV >> 4) & 0x1; + s->StartAddr = (s->StartAddr & 0xFFFF) | ((*SRV & 0xF) << 16); break; case 0x01: - s->StartAddr = (s->StartAddr &~ 0xFFFF) | SRV; + s->StartAddr = (s->StartAddr &~ 0xFFFF) | *SRV; break; case 0x02: - s->LoopStart = SRV; + s->LoopStart = *SRV; break; case 0x03: - s->LoopEnd = SRV; + s->LoopEnd = *SRV; // SS_SCSP_RecalcShortWaveMask(z, s); break; case 0x04: - s->EnvRates[ENV_PHASE_ATTACK] = SRV & 0x1F; - s->AttackHold = (SRV >> 5) & 0x1; - s->EnvRates[ENV_PHASE_DECAY1] = (SRV >> 6) & 0x1F; - s->EnvRates[ENV_PHASE_DECAY2] = (SRV >> 11) & 0x1F; + s->EnvRates[ENV_PHASE_ATTACK] = *SRV & 0x1F; + s->AttackHold = (*SRV >> 5) & 0x1; + s->EnvRates[ENV_PHASE_DECAY1] = (*SRV >> 6) & 0x1F; + s->EnvRates[ENV_PHASE_DECAY2] = (*SRV >> 11) & 0x1F; break; case 0x05: - s->EnvRates[ENV_PHASE_RELEASE] = SRV & 0x1F; - s->DecayLevel = (SRV >> 5) & 0x1F; - s->KRS = (SRV >> 10) & 0xF; - s->AttackLoopLink = (SRV >> 14) & 0x1; - s->EGBypass = (SRV >> 15) & 0x1; + s->EnvRates[ENV_PHASE_RELEASE] = *SRV & 0x1F; + s->DecayLevel = (*SRV >> 5) & 0x1F; + s->KRS = (*SRV >> 10) & 0xF; + s->AttackLoopLink = (*SRV >> 14) & 0x1; + s->EGBypass = (*SRV >> 15) & 0x1; break; case 0x06: - SRV &= 0x0FFF; + *SRV &= 0x0FFF; - s->TotalLevel = SRV & 0xFF; - s->SoundDirect = (SRV >> 8) & 0x1; - s->StackWriteInhibit = (SRV >> 9) & 0x1; + s->TotalLevel = *SRV & 0xFF; + s->SoundDirect = (*SRV >> 8) & 0x1; + s->StackWriteInhibit = (*SRV >> 9) & 0x1; break; case 0x07: - s->ModInputY = SRV & 0x3F; - s->ModInputX = (SRV >> 6) & 0x3F; - s->ModLevel = (SRV >> 12) & 0xF; + s->ModInputY = *SRV & 0x3F; + s->ModInputX = (*SRV >> 6) & 0x3F; + s->ModLevel = (*SRV >> 12) & 0xF; break; case 0x08: - s->FreqNum = SRV & 0x7FF; - s->Octave = (SRV >> 11) & 0xF; - s->ShortWave = (SRV >> 15) & 0x1; + s->FreqNum = *SRV & 0x7FF; + s->Octave = (*SRV >> 11) & 0xF; + s->ShortWave = (*SRV >> 15) & 0x1; // SS_SCSP_RecalcShortWaveMask(z, s); break; case 0x09: - s->ALFOModLevel = SRV & 0x7; - s->ALFOWaveform = (SRV >> 3) & 0x3; - s->PLFOModLevel = (SRV >> 5) & 0x7; - s->PLFOWaveform = (SRV >> 8) & 0x3; - s->LFOFreq = (SRV >> 10) & 0x1F; - s->LFOReset = (SRV >> 15) & 0x1; + s->ALFOModLevel = *SRV & 0x7; + s->ALFOWaveform = (*SRV >> 3) & 0x3; + s->PLFOModLevel = (*SRV >> 5) & 0x7; + s->PLFOWaveform = (*SRV >> 8) & 0x3; + s->LFOFreq = (*SRV >> 10) & 0x1F; + s->LFOReset = (*SRV >> 15) & 0x1; break; case 0x0A: - SRV &= 0x00FF; - s->ToDSPLevel = SRV & 0x7; - s->ToDSPSelect = (SRV >> 3) & 0xF; + *SRV &= 0x00FF; + s->ToDSPLevel = *SRV & 0x7; + s->ToDSPSelect = (*SRV >> 3) & 0xF; break; case 0x0B: - SDL_PAN_ToVolume(s->DirectVolume, (SRV >> 13) & 0x7, (SRV >> 8) & 0x1F); - SDL_PAN_ToVolume(s->EffectVolume, (SRV >> 5) & 0x7, (SRV >> 0) & 0x1F); + SDL_PAN_ToVolume(s->DirectVolume, (*SRV >> 13) & 0x7, (*SRV >> 8) & 0x1F); + SDL_PAN_ToVolume(s->EffectVolume, (*SRV >> 5) & 0x7, (*SRV >> 0) & 0x1F); break; case 0x0C: case 0x0D: case 0x0E: case 0x0F: - SRV = 0; + *SRV = 0; break; } } @@ -1634,8 +1648,12 @@ INLINE void SS_SCSP_RW_u16_W1(SS_SCSP* z, uint32_t A, uint16_t* DBV_p) { uint16_t tmp = z->RBP | (z->RBL << 7); tmp = (tmp &~ mask) | (((*DBV_p) << shift) & mask); - z->RBP = tmp & 0x7F; - z->RBL = (tmp >> 7) & 0x3; + const uint8_t new_RBP = tmp & 0x7F; + const uint8_t new_RBL = (tmp >> 7) & 0x3; + if(new_RBL != z->RBL || new_RBP != z->RBP) + z->DSP.MPROG_Dirty = true; + z->RBP = new_RBP; + z->RBL = new_RBL; } break; @@ -1720,7 +1738,7 @@ INLINE void SS_SCSP_RW_u16_W1(SS_SCSP* z, uint32_t A, uint16_t* DBV_p) case 0x0D: // TIMB(W), TBCTL(W) case 0x0E: // TIMC(W), TCCTL(W) { - auto* t = &z->Timers[((A >> 1) & 0x1F) - 0x0C]; + SS_SCSP_Timer* t = &z->Timers[((A >> 1) & 0x1F) - 0x0C]; uint16_t tmp = (t->Control << 8); tmp = (tmp &~ mask) | (((*DBV_p) << shift) & mask); t->Control = (tmp >> 8) & 0x7; @@ -2185,16 +2203,16 @@ static void SS_SCSP_DecodeMPROG(SS_SCSP* z) for(unsigned step = 0; step < 128; step++) { const uint64_t instr = z->DSP.MPROG[step]; - SS_SCSP_DSPStep& s = z->DSP.MPROG_Decoded[step]; + SS_SCSP_DSPStep* s = &z->DSP.MPROG_Decoded[step]; - s.MASA = (instr >> 2) & 0x1F; - s.CRA = (instr >> 9) & 0x3F; - s.EWA = (instr >> 24) & 0x0F; - s.IWA = (instr >> 32) & 0x1F; - s.IRA = (instr >> 38) & 0x3F; - s.YSEL = (instr >> 45) & 0x03; - s.TWA = (instr >> 48) & 0x7F; - s.TRA = (instr >> 56) & 0x7F; + s->MASA = (instr >> 2) & 0x1F; + s->CRA = (instr >> 9) & 0x3F; + s->EWA = (instr >> 24) & 0x0F; + s->IWA = (instr >> 32) & 0x1F; + s->IRA = (instr >> 38) & 0x3F; + s->YSEL = (instr >> 45) & 0x03; + s->TWA = (instr >> 48) & 0x7F; + s->TRA = (instr >> 56) & 0x7F; uint32_t f = 0; f |= ((instr >> 0) & 1) ? DSPF_NXADDR : 0; @@ -2215,9 +2233,9 @@ static void SS_SCSP_DecodeMPROG(SS_SCSP* z) f |= ((instr >> 37) & 1) ? DSPF_IWT : 0; f |= ((instr >> 47) & 1) ? DSPF_XSEL : 0; f |= ((instr >> 55) & 1) ? DSPF_TWT : 0; - s.flags = f; + s->flags = f; - s.reads = (f & (DSPF_EWT | DSPF_TWT | DSPF_FRCL | DSPF_ADRL | DSPF_MWT | DSPF_BSEL)) ? DSPR_SFT : 0; + s->reads = (f & (DSPF_EWT | DSPF_TWT | DSPF_FRCL | DSPF_ADRL | DSPF_MWT | DSPF_BSEL)) ? DSPR_SFT : 0; uint8_t writes = DSPW_SFT; if(f & DSPF_FRCL) writes |= DSPW_FRC; @@ -2227,9 +2245,9 @@ static void SS_SCSP_DecodeMPROG(SS_SCSP* z) if(f & DSPF_IWT) writes |= DSPW_MEMS; if(f & DSPF_EWT) writes |= DSPW_EFREG; if(f & (DSPF_MRT | DSPF_MWT)) writes |= DSPW_RAM; - s.writes = writes; + s->writes = writes; - s.live = 1; // filled in by liveness pass + s->live = 1; // filled in by liveness pass } // Liveness pass: a step is dead if its only observable writes are SFT_REG and/or @@ -2238,15 +2256,15 @@ static void SS_SCSP_DecodeMPROG(SS_SCSP* z) // [0x32..0x3F]; otherwise the step writes INPUTS via MEMS/MIXS/z->EXTS. for(unsigned step = 0; step < 128; step++) { - const SS_SCSP_DSPStep& cur = z->DSP.MPROG_Decoded[step]; - const SS_SCSP_DSPStep& nxt = z->DSP.MPROG_Decoded[(step + 1) & 0x7F]; - const uint32_t nf = nxt.flags; + const SS_SCSP_DSPStep* cur = &z->DSP.MPROG_Decoded[step]; + const SS_SCSP_DSPStep* nxt = &z->DSP.MPROG_Decoded[(step + 1) & 0x7F]; + const uint32_t nf = nxt->flags; - const bool has_other_writes = (cur.writes & ~(unsigned)DSPW_SFT) != 0; - const bool sft_observed = (nxt.reads & DSPR_SFT) != 0; + const bool has_other_writes = (cur->writes & ~(unsigned)DSPW_SFT) != 0; + const bool sft_observed = (nxt->reads & DSPR_SFT) != 0; - const bool cur_writes_inputs = !((cur.IRA & 0x30) == 0x30 && (cur.IRA & 0x0E) != 0); - const bool nxt_preserves_inputs = ((nxt.IRA & 0x30) == 0x30 && (nxt.IRA & 0x0E) != 0); + const bool cur_writes_inputs = !((cur->IRA & 0x30) == 0x30 && (cur->IRA & 0x0E) != 0); + const bool nxt_preserves_inputs = ((nxt->IRA & 0x30) == 0x30 && (nxt->IRA & 0x0E) != 0); const bool nxt_uses_inputs = (nf & DSPF_YRL) || (nf & DSPF_XSEL) || ((nf & DSPF_ADRL) && !((nf & DSPF_SHFT0) && (nf & DSPF_SHFT1))); const bool inputs_observed = cur_writes_inputs && nxt_preserves_inputs && nxt_uses_inputs; @@ -2270,19 +2288,16 @@ static void SS_SCSP_DecodeMPROG(SS_SCSP* z) z->DSP.MPROG_Dirty = false; } -static INLINE void SS_SCSP_RunDSP(SS_SCSP* z) +/* Extracted from RunDSP so the JIT can BL one step at a time. */ +static INLINE void SS_SCSP_RunDSPStep(SS_SCSP* z, unsigned step) { - if(z->DSP.MPROG_Dirty) - SS_SCSP_DecodeMPROG(z); - - for(unsigned step = 0; step < 128; step++) { - const SS_SCSP_DSPStep& s = z->DSP.MPROG_Decoded[step]; - if(!s.live) continue; - const uint32_t f = s.flags; - const unsigned IRA = s.IRA; - const unsigned TEMPWriteAddr = (s.TWA + z->DSP.MDEC_CT) & 0x7F; - const unsigned TEMPReadAddr = (s.TRA + z->DSP.MDEC_CT) & 0x7F; + const SS_SCSP_DSPStep* s = &z->DSP.MPROG_Decoded[step]; + if(!s->live) return; + const uint32_t f = s->flags; + const unsigned IRA = s->IRA; + const unsigned TEMPWriteAddr = (s->TWA + z->DSP.MDEC_CT) & 0x7F; + const unsigned TEMPReadAddr = (s->TRA + z->DSP.MDEC_CT) & 0x7F; // // @@ -2311,12 +2326,12 @@ static INLINE void SS_SCSP_RunDSP(SS_SCSP* z) // variable index — a store-to-load forwarding stall, the single hottest // line in SS_SCSP_RunDSP. A csel chain keeps the four candidates in registers. const uint16_t y_frc = z->DSP.FRC_REG; - const uint16_t y_coef = z->DSP.COEF[s.CRA]; + const uint16_t y_coef = z->DSP.COEF[s->CRA]; const uint16_t y_yhi = (z->DSP.Y_REG >> 11) & 0x1FFF; const uint16_t y_ylo = (z->DSP.Y_REG >> 4) & 0x0FFF; - const uint16_t y_lopr = (s.YSEL & 1) ? y_coef : y_frc; - const uint16_t y_hipr = (s.YSEL & 1) ? y_ylo : y_yhi; - const uint16_t y_input = (s.YSEL & 2) ? y_hipr : y_lopr; + const uint16_t y_lopr = (s->YSEL & 1) ? y_coef : y_frc; + const uint16_t y_hipr = (s->YSEL & 1) ? y_ylo : y_yhi; + const uint16_t y_input = (s->YSEL & 2) ? y_hipr : y_lopr; // // // @@ -2368,14 +2383,14 @@ static INLINE void SS_SCSP_RunDSP(SS_SCSP* z) // // if(f & DSPF_EWT) - z->DSP.EFREG[s.EWA] = (ShifterOutput >> 8); + z->DSP.EFREG[s->EWA] = (ShifterOutput >> 8); if(f & DSPF_TWT) z->DSP.TEMP[TEMPWriteAddr] = ShifterOutput; if(f & DSPF_IWT) { - z->DSP.MEMS[s.IWA] = z->DSP.ReadValue; + z->DSP.MEMS[s->IWA] = z->DSP.ReadValue; } // // @@ -2396,7 +2411,7 @@ static INLINE void SS_SCSP_RunDSP(SS_SCSP* z) { uint16_t addr; - addr = z->DSP.MADRS[s.MASA]; + addr = z->DSP.MADRS[s->MASA]; addr += (f & DSPF_NXADDR) ? 1 : 0; if(f & DSPF_ADRGB) @@ -2432,11 +2447,39 @@ static INLINE void SS_SCSP_RunDSP(SS_SCSP* z) : (uint16_t)((INPUTS >> 16) & 0xFFF); } } +} + +static INLINE void SS_SCSP_RunDSPInterpreter(SS_SCSP* z) +{ + for(unsigned step = 0; step < 128; step++) + SS_SCSP_RunDSPStep(z, step); if(!z->DSP.MDEC_CT) z->DSP.MDEC_CT = (0x2000 << z->RBL); z->DSP.MDEC_CT--; } + +static INLINE void SS_SCSP_RunDSP(SS_SCSP* z) +{ + if(z->DSP.MPROG_Dirty) + { + SS_SCSP_DecodeMPROG(z); +#ifdef WANT_JIT + if(setting_jit_scsp) + SCSP_DSP_JIT_Compile(z); +#endif + } + +#ifdef WANT_JIT + if(MDFN_LIKELY(setting_jit_scsp && SCSP_DSP_JIT_Entry != nullptr)) + { + SCSP_DSP_JIT_Entry(z); + return; + } +#endif + + SS_SCSP_RunDSPInterpreter(z); +} #endif // // @@ -2453,7 +2496,7 @@ INLINE void SS_SCSP_RunSample(SS_SCSP* z, int16_t* outlr) for(unsigned i = 0; i < 3; i++) { - auto* t = &z->Timers[i]; + SS_SCSP_Timer* t = &z->Timers[i]; const bool DoClock = !(SampleCounter & ((1U << t->Control) - 1)); if(DoClock) @@ -2491,7 +2534,7 @@ INLINE void SS_SCSP_RunSample(SS_SCSP* z, int16_t* outlr) // for(unsigned slot = 0; slot < 32; slot++) { - auto* s = &z->Slots[slot]; + SS_SCSP_Slot* s = &z->Slots[slot]; unsigned key_eg_scale; s->WFAllowAccess &= (s->EnvLevel < 0x3C0 || s->EGBypass); @@ -2569,7 +2612,7 @@ INLINE void SS_SCSP_RunSample(SS_SCSP* z, int16_t* outlr) for(unsigned slot = 0; slot < 32; slot++) { - auto* s = &z->Slots[slot]; + SS_SCSP_Slot* s = &z->Slots[slot]; uint32_t mdata = 0; uint16_t sample = 0; @@ -2603,7 +2646,7 @@ INLINE void SS_SCSP_RunSample(SS_SCSP* z, int16_t* outlr) // TODO/FIXME: Proper handling of the slot 31->0 buggy FM interpolation case with respect to reverse looping(ns->LoopSub) // requires sub-sample timing emulation. // - auto* ns = &z->Slots[(slot + 1) & 0x1F]; + SS_SCSP_Slot* ns = &z->Slots[(slot + 1) & 0x1F]; uint32_t modalizer; uint32_t ns_sia; @@ -2852,10 +2895,16 @@ void SS_SCSP_StateAction(SS_SCSP* z, StateMem* sm, const unsigned load, const bo if(load) { - for(auto& s : z->Slots) + /* Range-based `for(auto& s : z->Slots)` retired for C-compat + * (will be needed once sound_glue.cpp -> .c). Same iteration. */ { - s.EnvLevel &= 0x3FF; - s.EnvPhase &= 0x3; + unsigned i; + for(i = 0; i < 32; i++) + { + SS_SCSP_Slot* s = &z->Slots[i]; + s->EnvLevel &= 0x3FF; + s->EnvPhase &= 0x3; + } } z->SlotMonitorWhich &= 0x1F; @@ -2879,7 +2928,13 @@ void SS_SCSP_StateAction(SS_SCSP* z, StateMem* sm, const unsigned load, const bo for(uint32_t A = 0x100000; A < 0x100400; A += 2) { - SS_SCSP_RW_u16_W1(z, A, MDAP(z->SlotRegs) + ((A & 0x3FE) >> 1)); + /* MDAP(z->SlotRegs) -- mednafen-types.h's + * `template T* MDAP(T (*v)[N])` helper -- isn't + * parseable in C. z->SlotRegs is `uint16_t[0x20][0x10]`, so + * `&z->SlotRegs[0][0]` yields the same flat-array pointer + * to the first uint16_t with no template machinery (parallels + * the scu.inc MDAP(DSP.DataRAM) -> explicit-cast fix). */ + SS_SCSP_RW_u16_W1(z, A, &z->SlotRegs[0][0] + ((A & 0x3FE) >> 1)); } SS_SCSP_RecalcSoundInt(z); SS_SCSP_RecalcMainInt(z); @@ -2955,8 +3010,14 @@ void SS_SCSP_SetRegister(SS_SCSP* z, const unsigned id, const uint32_t value) break; case GSREG_RBC: - z->RBP = value & 0x7F; - z->RBL = (value >> 7) & 0x3; + { + const uint8_t new_RBP = value & 0x7F; + const uint8_t new_RBL = (value >> 7) & 0x3; + if(new_RBL != z->RBL || new_RBP != z->RBP) + z->DSP.MPROG_Dirty = true; + z->RBP = new_RBP; + z->RBL = new_RBL; + } break; case GSREG_MSLC: diff --git a/mednafen/ss/scsp_dsp_jit.c b/mednafen/ss/scsp_dsp_jit.c new file mode 100644 index 00000000..a3543044 --- /dev/null +++ b/mednafen/ss/scsp_dsp_jit.c @@ -0,0 +1,737 @@ +/******************************************************************************/ +/* Mednafen Sega Saturn Emulation Module */ +/******************************************************************************/ +/* scsp_dsp_jit.c - SCSP DSP JIT (aarch64 backend) implementation +** Copyright (C) 2026 pstef +*/ + +/* + * Two compile modes: + * + * Mode A (any live step <= MAX_NATIVE_STEPS): full callee-save + * prologue, load pinned regs (W19..W28), emit per-step body -- + * natively for supported indices, helper-BL with pin flush/reload + * for the rest -- then MDEC_CT update on the pin, frame restore. + * + * Mode B (MAX_NATIVE_STEPS=0 or no live step in range): minimal + * fp/lr frame, BL helper per live step, MDEC_CT memory update, RET. + * + * The folded ring-mask constant (0x2000 << RBL) and the RAM-base add + * (RBP << 12) are captured at Compile time; the RBL/RBP write path + * marks MPROG_Dirty so those constants stay valid. + * + * Pinned-register layout (Mode A): + * + * x0 = SS_SCSP* throughout the body + * w19 = MDEC_CT (16-bit) -- final decrement at exit + * w20 = SFT_REG (26-bit) -- every step writes + * w21 = FRC_REG (13-bit) -- FRCL steps write + * w22 = Y_REG (24-bit) -- YRL steps write + * w23 = ADRS_REG (12-bit) -- ADRL steps write + * w24 = INPUTS (24-bit) -- most steps write + * w25 = RWAddr (19-bit) -- every step writes + * w26 = ReadPending (uint8) -- gates RAM-read branch + * w27 = WritePending (bool) -- gates RAM-write branch + * w28 = ReadValue (uint32) + * + * WriteValue (uint16) is set by an MWT step and consumed by the next + * step's RAM-write block; lives in memory between steps rather than + * being pinned. + */ + +#include +#include +#include +#include + +#include "ss.h" +#include "scsp.h" +#include "scsp_dsp_jit.h" +#include "a64emit.h" +#include "jitdump.h" + +void (*SCSP_DSP_JIT_Entry)(struct SS_SCSP*) = NULL; + +#if defined(WANT_JIT) && (defined(__aarch64__) || defined(__arm64__)) + +/* 0 forces all-helper Mode B; 128 is fully native. Knob is useful + * for bisecting native/interpreter divergence. */ +#ifndef SCSP_DSP_JIT_MAX_NATIVE_STEPS +#define SCSP_DSP_JIT_MAX_NATIVE_STEPS 128 +#endif + +/* 128 native steps ~ 25 KB; 64 KB has headroom for the helper-mixed case. */ +#define SCSP_JIT_CODE_BYTES (64u * 1024u) + +/* Byte offset of an SS_SCSP field, compile-time. */ +#define O(field) ((uint32_t)offsetof(SS_SCSP, field)) + +/* AArch64 register-index conventions. WZR/XZR/SP all encode as 31. + * Numeric in source so a64emit accepts them as plain `unsigned`s. */ +#define W0 0u +#define W1 1u +#define W2 2u +#define W3 3u +#define W4 4u +#define W5 5u +#define W6 6u +#define W7 7u +#define W8 8u +#define W9 9u +#define W10 10u +#define W11 11u +#define W12 12u +#define W13 13u +#define W14 14u +#define W15 15u +#define W16 16u +#define W17 17u +#define W18 18u +#define W19 19u +#define W20 20u +#define W21 21u +#define W22 22u +#define W23 23u +#define W24 24u +#define W25 25u +#define W26 26u +#define W27 27u +#define W28 28u +#define W29 29u +#define W30 30u +#define WZR 31u + +#define X0 0u +#define X1 1u +#define X8 8u +#define X16 16u +#define X17 17u +#define X19 19u +#define X20 20u +#define X21 21u +#define X22 22u +#define X23 23u +#define X24 24u +#define X25 25u +#define X26 26u +#define X27 27u +#define X28 28u +#define X29 29u +#define X30 30u +#define XZR 31u +#define SP_REG 31u + +extern void SCSP_DSP_run_step(struct SS_SCSP* scsp, unsigned step); +extern void SCSP_DSP_run_interpreter(struct SS_SCSP* scsp); + +/* --- Codegen + label pool ---------------------------------------- */ + +/* MPROG has 128 slots; largest single step (RAM-pipeline block) uses + * 3 labels and is followed by labels_reset(), so 64 is plenty. */ +#define LABEL_POOL_SIZE 64u + +static a64_codegen* g_cg = NULL; +static void* g_seg_start = NULL; +static a64_label g_label_pool[LABEL_POOL_SIZE]; +static size_t g_label_count = 0; + +static a64_label* label_new(void) +{ + a64_label* p; + if(g_label_count >= LABEL_POOL_SIZE) return NULL; + p = &g_label_pool[g_label_count++]; + a64_label_reset(p); + return p; +} +static void label_bind(a64_label* lbl) { a64_label_bind(g_cg, lbl); } +static void labels_reset(void) +{ + memset(g_label_pool, 0, sizeof(g_label_pool)); + g_label_count = 0; +} + +/* --- Memory accessors with offset-range fallback ------------------ */ + +/* LDR/STR with [x0, #off] when the size-scaled 12-bit immediate fits, + * falling back to MOV W16, off + register-offset form otherwise. The + * fallback path stays out of every step's hot work -- DSP field offsets + * sit in low KB and fit the direct form; only RAM (~1 MB) needs it. */ +static void emit_ldr_w(unsigned dst, uint32_t off) +{ + if((off & 3) == 0 && off <= 16380) a64_ldr_w_imm(g_cg, dst, X0, off); + else { a64_mov_w_imm(g_cg, W16, off); a64_ldr_w_reg(g_cg, dst, X0, X16); } +} +static void emit_str_w(unsigned src, uint32_t off) +{ + if((off & 3) == 0 && off <= 16380) a64_str_w_imm(g_cg, src, X0, off); + else { a64_mov_w_imm(g_cg, W16, off); a64_str_w_reg(g_cg, src, X0, X16); } +} +static void emit_ldrh_w(unsigned dst, uint32_t off) +{ + if((off & 1) == 0 && off <= 8190) a64_ldrh_w_imm(g_cg, dst, X0, off); + else { a64_mov_w_imm(g_cg, W16, off); a64_ldrh_w_reg(g_cg, dst, X0, X16); } +} +static void emit_strh_w(unsigned src, uint32_t off) +{ + if((off & 1) == 0 && off <= 8190) a64_strh_w_imm(g_cg, src, X0, off); + else { a64_mov_w_imm(g_cg, W16, off); a64_strh_w_reg(g_cg, src, X0, X16); } +} +static void emit_ldrb_w(unsigned dst, uint32_t off) +{ + if(off <= 4095) a64_ldrb_w_imm(g_cg, dst, X0, off); + else { a64_mov_w_imm(g_cg, W16, off); a64_ldr_w_reg(g_cg, dst, X0, X16); } +} +static void emit_strb_w(unsigned src, uint32_t off) +{ + if(off <= 4095) a64_strb_w_imm(g_cg, src, X0, off); + else { a64_mov_w_imm(g_cg, W16, off); a64_str_w_reg(g_cg, src, X0, X16); } +} + +/* O(RAM) is larger than the ADD-imm direct range. */ +static void emit_load_ram_base(void) +{ + a64_mov_w_imm(g_cg, W17, O(RAM)); + a64_add_x_reg(g_cg, X17, X0, X17); +} + +/* ADD Wd, Wn, #imm with a MOV+ADD reg fallback when `imm` doesn't fit + * the AddSubImm encoding. W16 is the canonical transient register + * (also used by the LDR/STR fallbacks above and by emit_step_helper_bl + * for MOVP2R staging), so wd/wn must not alias W16. */ +static void emit_add_w_imm_safe(unsigned wd, unsigned wn, uint32_t imm) +{ + if(!a64_try_add_w_imm(g_cg, wd, wn, imm)) + { + a64_mov_w_imm(g_cg, W16, imm); + a64_add_w_reg(g_cg, wd, wn, W16); + } +} + +/* --- Frames ------------------------------------------------------- */ + +static void emit_min_prologue(void) +{ + a64_stp_x_pre(g_cg, X29, X30, -16); + a64_mov_x_sp(g_cg, X29); +} +static void emit_min_epilogue(void) +{ + a64_ldp_x_post(g_cg, X29, X30, 16); + a64_ret(g_cg); +} + +static void emit_full_prologue(void) +{ + a64_stp_x_pre(g_cg, X29, X30, -96); + a64_mov_x_sp(g_cg, X29); + a64_stp_x_off(g_cg, X19, X20, SP_REG, 16); + a64_stp_x_off(g_cg, X21, X22, SP_REG, 32); + a64_stp_x_off(g_cg, X23, X24, SP_REG, 48); + a64_stp_x_off(g_cg, X25, X26, SP_REG, 64); + a64_stp_x_off(g_cg, X27, X28, SP_REG, 80); + + emit_ldrh_w(W19, O(DSP.MDEC_CT)); + emit_ldr_w (W20, O(DSP.SFT_REG)); + emit_ldrh_w(W21, O(DSP.FRC_REG)); + emit_ldr_w (W22, O(DSP.Y_REG)); + emit_ldrh_w(W23, O(DSP.ADRS_REG)); + emit_ldr_w (W24, O(DSP.INPUTS)); + emit_ldr_w (W25, O(DSP.RWAddr)); + emit_ldrb_w(W26, O(DSP.ReadPending)); + emit_ldrb_w(W27, O(DSP.WritePending)); + emit_ldr_w (W28, O(DSP.ReadValue)); +} +static void emit_full_epilogue(void) +{ + emit_strh_w(W19, O(DSP.MDEC_CT)); + emit_str_w (W20, O(DSP.SFT_REG)); + emit_strh_w(W21, O(DSP.FRC_REG)); + emit_str_w (W22, O(DSP.Y_REG)); + emit_strh_w(W23, O(DSP.ADRS_REG)); + emit_str_w (W24, O(DSP.INPUTS)); + emit_str_w (W25, O(DSP.RWAddr)); + emit_strb_w(W26, O(DSP.ReadPending)); + emit_strb_w(W27, O(DSP.WritePending)); + emit_str_w (W28, O(DSP.ReadValue)); + + a64_ldp_x_off(g_cg, X27, X28, SP_REG, 80); + a64_ldp_x_off(g_cg, X25, X26, SP_REG, 64); + a64_ldp_x_off(g_cg, X23, X24, SP_REG, 48); + a64_ldp_x_off(g_cg, X21, X22, SP_REG, 32); + a64_ldp_x_off(g_cg, X19, X20, SP_REG, 16); + a64_ldp_x_post(g_cg, X29, X30, 96); + a64_ret(g_cg); +} + +/* Around helper BLs: trampoline sees the live state via memory; the + * subsequent emit_pin_reload picks up its updates. */ +static void emit_pin_flush(void) +{ + emit_strh_w(W19, O(DSP.MDEC_CT)); + emit_str_w (W20, O(DSP.SFT_REG)); + emit_strh_w(W21, O(DSP.FRC_REG)); + emit_str_w (W22, O(DSP.Y_REG)); + emit_strh_w(W23, O(DSP.ADRS_REG)); + emit_str_w (W24, O(DSP.INPUTS)); + emit_str_w (W25, O(DSP.RWAddr)); + emit_strb_w(W26, O(DSP.ReadPending)); + emit_strb_w(W27, O(DSP.WritePending)); + emit_str_w (W28, O(DSP.ReadValue)); +} +static void emit_pin_reload(void) +{ + emit_ldrh_w(W19, O(DSP.MDEC_CT)); + emit_ldr_w (W20, O(DSP.SFT_REG)); + emit_ldrh_w(W21, O(DSP.FRC_REG)); + emit_ldr_w (W22, O(DSP.Y_REG)); + emit_ldrh_w(W23, O(DSP.ADRS_REG)); + emit_ldr_w (W24, O(DSP.INPUTS)); + emit_ldr_w (W25, O(DSP.RWAddr)); + emit_ldrb_w(W26, O(DSP.ReadPending)); + emit_ldrb_w(W27, O(DSP.WritePending)); + emit_ldr_w (W28, O(DSP.ReadValue)); +} + +/* --- Helper-BL fallback ------------------------------------------- */ + +static void emit_step_helper_bl(unsigned step) +{ + a64_mov_w_imm(g_cg, W1, step); + a64_movp2r_pool(g_cg, X16, (const void*)&SCSP_DSP_run_step); + a64_blr(g_cg, X16); +} + +/* --- DSP-float helpers (inline in emitted code) ------------------- */ + +/* Mirrors scsp.inc::dspfloat_to_int. w_tmp_a/w_tmp_b must not alias + * w_out prematurely. */ +static void emit_dspfloat_to_int(unsigned w_out, unsigned w_in, + unsigned w_tmp_a, unsigned w_tmp_b) +{ + /* sign_xor = (inv & 0x8000) ? 0xC0000000 : 0 + * SBFX sign-broadcast -> 0xFFFFFFFF or 0, then LSL 30 gives 0xC0000000. */ + a64_sbfx_w(g_cg, w_tmp_a, w_in, 15, 1); + a64_lsl_w_imm(g_cg, w_tmp_a, w_tmp_a, 30); + /* exp = (inv >> 11) & 0xF */ + a64_ubfx_w(g_cg, w_tmp_b, w_in, 11, 4); + /* ret = inv & 0x7FF */ + a64_and_w_imm(g_cg, w_out, w_in, 0x7FFu); + /* if (exp < 12) ret |= 0x800 */ + a64_mov_w_imm(g_cg, W15, 0x800u); + a64_cmp_w_imm(g_cg, w_tmp_b, 12); + a64_csel_w(g_cg, W15, WZR, W15, A64_COND_GE); + a64_orr_w_reg(g_cg, w_out, w_out, W15); + /* ret <<= 19 */ + a64_lsl_w_imm(g_cg, w_out, w_out, 19); + /* ret ^= sign_xor */ + a64_eor_w_reg(g_cg, w_out, w_out, w_tmp_a); + /* shift = 8 + min(11, exp) */ + a64_mov_w_imm(g_cg, W15, 11u); + a64_cmp_w_imm(g_cg, w_tmp_b, 11); + a64_csel_w(g_cg, W15, w_tmp_b, W15, A64_COND_LE); + a64_add_w_imm(g_cg, W15, W15, 8); + /* ret = (int32)ret >> shift */ + a64_asr_w_reg(g_cg, w_out, w_out, W15); + /* return ret & 0xFFFFFF */ + a64_and_w_imm(g_cg, w_out, w_out, 0xFFFFFFu); +} + +/* int_to_dspfloat(W_in32) -> W_out16. */ +static void emit_int_to_dspfloat(unsigned w_out, unsigned w_in, + unsigned w_tmp_a, unsigned w_tmp_b) +{ + /* invsl8 = inv << 8 */ + a64_lsl_w_imm(g_cg, w_tmp_a, w_in, 8); + /* sign_xor = (int32)invsl8 >> 31 */ + a64_asr_w_imm(g_cg, w_tmp_b, w_tmp_a, 31); + /* base = ((invsl8 ^ sign_xor) << 1) | (1 << 19) */ + a64_eor_w_reg(g_cg, W15, w_tmp_a, w_tmp_b); + a64_lsl_w_imm(g_cg, W15, W15, 1); + a64_orr_w_imm(g_cg, W15, W15, 0x80000u); + /* exp = clz32(base) */ + a64_clz_w(g_cg, W15, W15); + /* shift = exp - (exp == 12 ? 1 : 0) */ + a64_sub_w_imm(g_cg, w_tmp_b, W15, 1); + a64_cmp_w_imm(g_cg, W15, 12); + a64_csel_w(g_cg, w_tmp_b, w_tmp_b, W15, A64_COND_EQ); + /* shift_amt = 19 - shift */ + a64_mov_w_imm(g_cg, w_out, 19u); + a64_sub_w_reg(g_cg, w_tmp_b, w_out, w_tmp_b); + /* ret = (int32)invsl8 >> shift_amt */ + a64_asr_w_reg(g_cg, w_tmp_a, w_tmp_a, w_tmp_b); + /* ret = (ret & 0x87FF) | (exp << 11) + * 0x87FF is two disjoint bit-runs (not a valid logical-imm), so + * materialise it explicitly. */ + a64_mov_w_imm(g_cg, w_out, 0x87FFu); + a64_and_w_reg(g_cg, w_tmp_a, w_tmp_a, w_out); + a64_lsl_w_imm(g_cg, W15, W15, 11); + a64_orr_w_reg(g_cg, w_out, w_tmp_a, W15); +} + +/* --- MDEC_CT update ----------------------------------------------- */ + +static void emit_mdec_ct_update_pin(uint8_t rbl) +{ + const uint32_t reload = 0x2000u << (rbl & 3); + a64_label* skip = label_new(); + a64_cbnz_w(g_cg, W19, skip); + a64_mov_w_imm(g_cg, W19, reload); + label_bind(skip); + a64_sub_w_imm(g_cg, W19, W19, 1); + labels_reset(); +} +static void emit_mdec_ct_update_mem(uint8_t rbl) +{ + const uint32_t reload = 0x2000u << (rbl & 3); + a64_label* skip = label_new(); + emit_ldrh_w(W3, O(DSP.MDEC_CT)); + a64_cbnz_w(g_cg, W3, skip); + a64_mov_w_imm(g_cg, W3, reload); + label_bind(skip); + a64_sub_w_imm(g_cg, W3, W3, 1); + emit_strh_w(W3, O(DSP.MDEC_CT)); + labels_reset(); +} + +/* --- Native per-step body ----------------------------------------- */ + +/* Mirrors scsp.inc::RunDSPStep verbatim -- each `if(f & DSPF_X)` + * becomes a compile-time decision to emit that branch's body. + * + * Per-step scratch registers (not preserved across steps): + * W1 = INPUTS_sxt (sxt24 of W24) + * W2 = y_input + * W3 = ShifterOutput (24-bit unsigned) + * W4 = TEMP read address + * W5 = TEMP_sxt + * W7 = y_sxt13 + * X8 = Product + * W9 = SGAOutput + * W10 = TEMP write address / staging + * W11 = MADRS accumulator + * W12,W13,W14,W15 = RAM-pipeline + dspfloat scratches + * W16,X16,W17,X17 = MOVP2R staging + RAM base */ +static void emit_step_native(const SS_SCSP_DSPStep* s, + uint8_t rbl, uint8_t rbp) +{ + const uint32_t f = s->flags; + const unsigned IRA = s->IRA; + + /* IRA decode -- compile-time pick. */ + if(IRA & 0x20) { + if(IRA & 0x10) { + if(!(IRA & 0xE)) { + emit_ldrh_w(W24, O(EXTS) + (IRA & 0x1) * 2); + a64_lsl_w_imm(g_cg, W24, W24, 8); + } + /* else: INPUTS unchanged */ + } else { + emit_ldr_w(W24, O(DSP.MIXS) + (IRA & 0xF) * 4); + a64_lsl_w_imm(g_cg, W24, W24, 4); + } + } else { + emit_ldr_w(W24, O(DSP.MEMS) + (IRA & 0x1F) * 4); + } + + /* Always emitted: X_SEL, ADRL, and the interpreter's INPUTS_sxt all + * read this even when DSP.INPUTS wasn't updated this step. */ + a64_sbfx_w(g_cg, W1, W24, 0, 24); + + /* Y selector -- compile-time pick on YSEL. */ + switch(s->YSEL & 3) { + case 0: + a64_mov_w_reg(g_cg, W2, W21); + break; + case 1: + emit_ldrh_w(W2, O(DSP.COEF) + s->CRA * 2); + break; + case 2: + a64_ubfx_w(g_cg, W2, W22, 11, 13); + break; + case 3: + a64_ubfx_w(g_cg, W2, W22, 4, 12); + break; + } + + /* YRL: Y_REG <- INPUTS & 0xFFFFFF. W24 holds the raw DSP.INPUTS + * (already <= 0xFFFFFF), so AND-mask is enough. */ + if(f & DSPF_YRL) + a64_and_w_imm(g_cg, W22, W24, 0xFFFFFFu); + + /* Shifter: + * shft0 = (f >> 7) & 1 + * shft1 = (f >> 8) & 1 + * ShifterOutput = ((int32)sxt26(SFT_REG)) << (shft0 ^ shft1) + * if (!shft1) saturate to [-0x800000, 0x7FFFFF] + * ShifterOutput &= 0xFFFFFF + * + * shft0/shft1 are compile-time, so the shift amount, the saturate + * check, and the FRCL/ADRL branch-select downstream all collapse. */ + const unsigned shft0 = (f >> 7) & 1; + const unsigned shft1 = (f >> 8) & 1; + const unsigned shift_amt = shft0 ^ shft1; + a64_sbfx_w(g_cg, W3, W20, 0, 26); + if(shift_amt) + a64_lsl_w_imm(g_cg, W3, W3, shift_amt); + if(!shft1) { + /* Clamp signed-32 to [-0x800000, 0x7FFFFF]. */ + a64_mov_w_imm(g_cg, W10, 0x7FFFFFu); + a64_cmp_w_reg(g_cg, W3, W10); + a64_csel_w(g_cg, W3, W10, W3, A64_COND_GT); + a64_mov_w_imm(g_cg, W10, 0xFF800000u); /* int32 -0x800000 as bit pattern */ + a64_cmp_w_reg(g_cg, W3, W10); + a64_csel_w(g_cg, W3, W10, W3, A64_COND_LT); + } + a64_and_w_imm(g_cg, W3, W3, 0xFFFFFFu); + + /* FRCL: FRC_REG <- (shft0&shft1) ? (Shifter & 0xFFF) : (Shifter >> 11) */ + if(f & DSPF_FRCL) { + if(shft0 && shft1) + a64_and_w_imm(g_cg, W21, W3, 0xFFFu); + else + a64_lsr_w_imm(g_cg, W21, W3, 11); + } + + /* Multiplier-adder: + * TEMP[TEMPReadAddr] read, sxt24 -> TEMP_sxt + * x_input = XSEL ? INPUTS_sxt : TEMP_sxt + * Product = (sxt13(y_input) * x_input) >> 12 + * SGAOutput = ZERO ? 0 : NEGB ? -B : B (B = BSEL ? SFT_REG : TEMP_sxt) + * SFT_REG = (Product + SGAOutput) & 0x3FFFFFF + * + * TEMPReadAddr = (TRA + MDEC_CT) & 0x7F -- TRA compile-time. */ + emit_add_w_imm_safe(W4, W19, s->TRA); + a64_and_w_imm(g_cg, W4, W4, 0x7Fu); + { + /* X17 <- &DSP.TEMP[0]; X17 + W4*4 = &TEMP[idx] */ + a64_mov_w_imm(g_cg, W17, O(DSP.TEMP)); + a64_add_x_reg(g_cg, X17, X0, X17); + a64_ldr_w_uxtw(g_cg, W5, X17, W4, 2); + } + a64_sbfx_w(g_cg, W5, W5, 0, 24); + const unsigned w_x_input = (f & DSPF_XSEL) ? W1 : W5; + a64_sbfx_w(g_cg, W7, W2, 0, 13); + a64_smull(g_cg, X8, W7, w_x_input); + a64_asr_x_imm(g_cg, X8, X8, 12); + /* The AND 0x3FFFFFF below truncates, so we read W8 (low 32). */ + if(f & DSPF_ZERO) { + a64_mov_w_imm(g_cg, W9, 0u); + } else { + const unsigned w_b = (f & DSPF_BSEL) ? W20 : W5; + if(f & DSPF_NEGB) + a64_neg_w(g_cg, W9, w_b); + else + a64_mov_w_reg(g_cg, W9, w_b); + } + a64_add_w_reg(g_cg, W20, W8, W9); + a64_and_w_imm(g_cg, W20, W20, 0x3FFFFFFu); + + /* EWT: EFREG[EWA] <- ShifterOutput >> 8 */ + if(f & DSPF_EWT) { + a64_lsr_w_imm(g_cg, W10, W3, 8); + emit_strh_w(W10, O(DSP.EFREG) + s->EWA * 2); + } + + /* TWT: TEMP[(TWA + MDEC_CT) & 0x7F] <- ShifterOutput */ + if(f & DSPF_TWT) { + emit_add_w_imm_safe(W10, W19, s->TWA); + a64_and_w_imm(g_cg, W10, W10, 0x7Fu); + a64_mov_w_imm(g_cg, W17, O(DSP.TEMP)); + a64_add_x_reg(g_cg, X17, X0, X17); + a64_str_w_uxtw(g_cg, W3, X17, W10, 2); + } + + /* IWT: MEMS[IWA] <- ReadValue (pin W28) */ + if(f & DSPF_IWT) + emit_str_w(W28, O(DSP.MEMS) + s->IWA * 4); + + /* RAM pipeline (data-dependent): + * if (ReadPending) { tmp=RAM[RWAddr]; ReadValue = ...; RP=0; } + * elif(WritePending){ if(!(RWAddr&0x40000)) RAM[RWAddr]=WV; WP=0;} + * + * The branchiness can't be folded -- both ReadPending and + * WritePending depend on flags set in earlier steps. */ + { + a64_label* ram_done = label_new(); + a64_label* ram_read = label_new(); + a64_label* ram_write_skip = label_new(); + + a64_cbnz_w(g_cg, W26, ram_read); + a64_cbz_w (g_cg, W27, ram_done); + + /* Write path: skip if RWAddr & 0x40000 (bit 18). */ + a64_tbnz_w(g_cg, W25, 18, ram_write_skip); + emit_load_ram_base(); + emit_ldrh_w(W12, O(DSP.WriteValue)); + a64_strh_w_uxtw(g_cg, W12, X17, W25, 1); + label_bind(ram_write_skip); + a64_mov_w_imm(g_cg, W27, 0u); + a64_b(g_cg, ram_done); + + /* Read path: + * tmp = RAM[RWAddr] + * ReadValue = (ReadPending == 2) ? (tmp << 8) : dspfloat_to_int(tmp) + * ReadPending = 0 */ + label_bind(ram_read); + emit_load_ram_base(); + a64_ldrh_w_uxtw(g_cg, W14, X17, W25, 1); + /* Inline dspfloat path into W12, NOFL path into W13, CSEL into W28. */ + emit_dspfloat_to_int(W12, W14, W13, W11); + a64_lsl_w_imm(g_cg, W13, W14, 8); + a64_cmp_w_imm(g_cg, W26, 2); + a64_csel_w(g_cg, W28, W13, W12, A64_COND_EQ); + a64_mov_w_imm(g_cg, W26, 0u); + label_bind(ram_done); + } + + /* MADRS / RWAddr update: + * addr = MADRS[MASA] + * if (NXADDR) addr += 1 + * if (ADRGB) addr += sxt12(ADRS_REG) + * if (!TABLE) addr += MDEC_CT; addr &= (0x2000<MASA * 2); + if(f & DSPF_NXADDR) + a64_add_w_imm(g_cg, W11, W11, 1u); + if(f & DSPF_ADRGB) { + a64_sbfx_w(g_cg, W12, W23, 0, 12); + a64_add_w_reg(g_cg, W11, W11, W12); + } + if(!(f & DSPF_TABLE)) { + a64_add_w_reg(g_cg, W11, W11, W19); + a64_and_w_imm(g_cg, W11, W11, (0x2000u << (rbl & 3)) - 1u); + } else { + /* Interpreter holds addr as uint16_t; the non-TABLE mask above + * incidentally wraps to 0xFFFF, so TABLE must do it explicitly. */ + a64_and_w_imm(g_cg, W11, W11, 0xFFFFu); + } + /* (RBP << 12) is 0..0x7F000 -- fits AddSubImm shifted-by-12. */ + { + const uint32_t rbp_off = (uint32_t)(rbp & 0x7F) << 12; + if(rbp_off) + emit_add_w_imm_safe(W11, W11, rbp_off); + } + a64_and_w_imm(g_cg, W25, W11, 0x7FFFFu); + + /* MRT: ReadPending <- NOFL ? 2 : 1 */ + if(f & DSPF_MRT) + a64_mov_w_imm(g_cg, W26, (f & DSPF_NOFL) ? 2u : 1u); + + /* MWT: WritePending <- 1; WriteValue <- NOFL ? (Shifter>>8) : int_to_dspfloat(Shifter) */ + if(f & DSPF_MWT) { + a64_mov_w_imm(g_cg, W27, 1u); + if(f & DSPF_NOFL) { + a64_lsr_w_imm(g_cg, W12, W3, 8); + emit_strh_w(W12, O(DSP.WriteValue)); + } else { + emit_int_to_dspfloat(W12, W3, W13, W11); + emit_strh_w(W12, O(DSP.WriteValue)); + } + } + + /* ADRL: ADRS_REG <- (shft0&shft1) ? (Shifter>>12) : (INPUTS_sxt>>16) & 0xFFF */ + if(f & DSPF_ADRL) { + if(shft0 && shft1) { + a64_lsr_w_imm(g_cg, W23, W3, 12); + } else { + a64_lsr_w_imm(g_cg, W23, W1, 16); + a64_and_w_imm(g_cg, W23, W23, 0xFFFu); + } + } + + /* Reclaim label-pool slots; without this the pool overflows after + * ~22 live steps. */ + labels_reset(); +} + +/* --- Public API --------------------------------------------------- */ + +void SCSP_DSP_JIT_Init(struct SS_SCSP* scsp) +{ + (void)scsp; + if(!g_cg) { + g_cg = a64_codegen_create(SCSP_JIT_CODE_BYTES); + if(g_cg) g_seg_start = a64_codegen_wptr(g_cg); + } + SCSP_DSP_JIT_Entry = NULL; +} + +void SCSP_DSP_JIT_Reset(struct SS_SCSP* scsp) +{ + if(!g_cg) SCSP_DSP_JIT_Init(scsp); + SCSP_DSP_JIT_Entry = NULL; +} + +void SCSP_DSP_JIT_Compile(struct SS_SCSP* scsp) +{ + if(!g_cg) + return; + + labels_reset(); + a64_codegen_set_wptr(g_cg, g_seg_start); + void* const entry_addr = a64_codegen_wptr(g_cg); + + const uint8_t rbl = scsp->RBL; + const uint8_t rbp = scsp->RBP; + + const unsigned max_native = + (SCSP_DSP_JIT_MAX_NATIVE_STEPS < 128u) + ? (unsigned)SCSP_DSP_JIT_MAX_NATIVE_STEPS : 128u; + + /* Any live step within max_native picks pin-based Mode A; + * otherwise fall through to all-helper Mode B. */ + bool mode_a = false; + for(unsigned i = 0; i < max_native; ++i) { + if(scsp->DSP.MPROG_Decoded[i].live) { mode_a = true; break; } + } + + if(mode_a) { + emit_full_prologue(); + for(unsigned step = 0; step < 128u; ++step) { + const SS_SCSP_DSPStep* s = &scsp->DSP.MPROG_Decoded[step]; + if(!s->live) continue; + if(step < max_native) { + emit_step_native(s, rbl, rbp); + } else { + emit_pin_flush(); + emit_step_helper_bl(step); + emit_pin_reload(); + } + } + emit_mdec_ct_update_pin(rbl); + emit_full_epilogue(); + } else { + emit_min_prologue(); + for(unsigned step = 0; step < 128u; ++step) { + if(scsp->DSP.MPROG_Decoded[step].live) + emit_step_helper_bl(step); + } + emit_mdec_ct_update_mem(rbl); + emit_min_epilogue(); + } + + /* Resolve every queued movp2r_pool site. Both epilogue paths above + * end in RET, so the pool data emitted here is unreachable code. */ + a64_pool_flush(g_cg); + + void* const end_addr = a64_codegen_wptr(g_cg); + const size_t code_bytes = (size_t)((char*)end_addr - (char*)entry_addr); + a64_codegen_invalidate(g_cg, entry_addr, code_bytes); + + /* Publish to perf jitdump. perf inject --jit will resolve samples + * landing anywhere in [entry_addr, end_addr) to this symbol. The + * code_index counter in the shared writer disambiguates successive + * MPROG_Dirty recompiles that reuse the same address. */ + SS_JitDump_Open(); + SS_JitDump_Emit("scsp_mprog", entry_addr, code_bytes); + + SCSP_DSP_JIT_Entry = (void(*)(struct SS_SCSP*))entry_addr; +} + +#else /* non-aarch64 or JIT not requested: stub everything */ + +void SCSP_DSP_JIT_Init (struct SS_SCSP* z) { (void)z; } +void SCSP_DSP_JIT_Reset (struct SS_SCSP* z) { (void)z; } +void SCSP_DSP_JIT_Compile(struct SS_SCSP* z) { (void)z; } + +#endif diff --git a/mednafen/ss/scsp_dsp_jit.h b/mednafen/ss/scsp_dsp_jit.h new file mode 100644 index 00000000..fd051e7a --- /dev/null +++ b/mednafen/ss/scsp_dsp_jit.h @@ -0,0 +1,37 @@ +/******************************************************************************/ +/* Mednafen Sega Saturn Emulation Module */ +/******************************************************************************/ +/* scsp_dsp_jit.h - SCSP DSP JIT (aarch64 backend) public interface +** Copyright (C) 2026 pstef +*/ + +#ifndef __MDFN_SS_SCSP_DSP_JIT_H +#define __MDFN_SS_SCSP_DSP_JIT_H + +#ifndef __cplusplus +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct SS_SCSP; + +extern bool setting_jit_scsp; + +void SCSP_DSP_JIT_Init(struct SS_SCSP* scsp); +void SCSP_DSP_JIT_Reset(struct SS_SCSP* scsp); + +/* Caller must already have run DecodeMPROG. Leaves SCSP_DSP_JIT_Entry + * NULL when the JIT isn't available on this platform. */ +void SCSP_DSP_JIT_Compile(struct SS_SCSP* scsp); + +/* NULL on non-aarch64 builds or before the first compile. */ +extern void (*SCSP_DSP_JIT_Entry)(struct SS_SCSP*); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/mednafen/ss/scu.h b/mednafen/ss/scu.h index c905532a..77e169df 100644 --- a/mednafen/ss/scu.h +++ b/mednafen/ss/scu.h @@ -27,16 +27,12 @@ #include "ss_c_abi.h" #include -/* C++ has 'bool' built in; C inclusion (vdp1.c already does, and - * future C-converted modules will) needs the stdbool keyword macros. */ -#ifndef __cplusplus -#include -#endif +#include /* MDFN_COLD attribute macro. C++ TUs got it transitively via ss.h / * mednafen.h; C consumers need it directly. */ #include "../mednafen-types.h" /* StateMem typedef for the SCU_StateAction prototype below. Phase-7d. */ -#include +#include "../state.h" #ifdef __cplusplus extern "C" { diff --git a/mednafen/ss/scu.inc b/mednafen/ss/scu.inc index d54fd17d..5fb71d19 100644 --- a/mednafen/ss/scu.inc +++ b/mednafen/ss/scu.inc @@ -101,6 +101,15 @@ enum { DMA_UpdateTimingGran = 127 }; enum { DSP_UpdateTimingGran = 64 }; // Probably should keep it a multiple of 2. +/* C-compat typedefs for the two DMA struct types defined below. + * In C the struct tag is not auto-aliased to a type name, so + * uses of `DMAWriteTabS` / `DMALevelS` without the `struct` keyword + * fail to parse. Forward-declare both typedefs here so the + * struct bodies below and the function-level uses (`DMALevelS* + * d = &DMALevel[...]`) all resolve in either language. */ +typedef struct DMAWriteTabS DMAWriteTabS; +typedef struct DMALevelS DMALevelS; + struct DMAWriteTabS { int16_t write_addr_delta; @@ -207,7 +216,7 @@ static INLINE bool CheckDoMasterInt(void) 0x0 }; - static const uint8_t external_tab[16 + 1] + static const uint8_t external_tab[16 + 1] = { 0x7, 0x7, 0x7, 0x7, 0x4, 0x4, 0x4, 0x4, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, @@ -390,10 +399,11 @@ void SCU_AdjustTS(const int32_t delta) { SCU_DMA_TimeCounter += delta; SCU_DMA_RunUntil += delta; - for(auto& d : DMALevel) + for(unsigned level___ = 0; level___ < 3; level___++) { - if(d.Active < 0) - d.FinishTime += delta; + DMALevelS* d = &DMALevel[level___]; + if(d->Active < 0) + d->FinishTime += delta; } // @@ -425,10 +435,6 @@ void SCU_AdjustTS(const int32_t delta) /* === SCU_RegRW_DB variants (3 T x 2 IsWrite) === */ static INLINE void SCU_RegRW_DB_u8_W0(uint32_t A, uint32_t* DB) { - unsigned mask; - - mask = 0xFF << (((A & 3) ^ 3) << 3); - switch(A & 0xFC) { default: @@ -457,9 +463,9 @@ static INLINE void SCU_RegRW_DB_u8_W0(uint32_t A, uint32_t* DB) case 0x20: case 0x40: { - auto const& d = DMALevel[(A >> 5) & 0x3]; + const DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; - *DB = d.StartReadAddr; + *DB = d->StartReadAddr; } break; @@ -467,9 +473,9 @@ static INLINE void SCU_RegRW_DB_u8_W0(uint32_t A, uint32_t* DB) case 0x24: case 0x44: { - auto const& d = DMALevel[(A >> 5) & 0x3]; + const DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; - *DB = d.StartWriteAddr; + *DB = d->StartWriteAddr; } break; @@ -479,9 +485,9 @@ static INLINE void SCU_RegRW_DB_u8_W0(uint32_t A, uint32_t* DB) for(unsigned level = 0; level < 3; level++) { - auto& d = DMALevel[level]; + DMALevelS* d = &DMALevel[level]; - if(d.Active) + if(d->Active) { tmp |= 0x10 << (level << 2); } @@ -520,7 +526,7 @@ static INLINE void SCU_RegRW_DB_u8_W0(uint32_t A, uint32_t* DB) case 0x8C: if(!DSPS_IsRunning(&DSP)) - *DB = MDAP(DSP.DataRAM)[DSP.RA++]; + *DB = ((uint32_t*)DSP.DataRAM)[DSP.RA++]; else *DB = 0xFFFFFFFF; break; @@ -618,9 +624,9 @@ static INLINE void SCU_RegRW_DB_u8_W1(uint32_t A, uint32_t* DB) case 0x20: case 0x40: { - auto& d = DMALevel[(A >> 5) & 0x3]; + DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; - d.StartReadAddr = (d.StartReadAddr &~ mask) | (*DB & mask & 0x07FFFFFF); + d->StartReadAddr = (d->StartReadAddr &~ mask) | (*DB & mask & 0x07FFFFFF); } break; @@ -628,9 +634,9 @@ static INLINE void SCU_RegRW_DB_u8_W1(uint32_t A, uint32_t* DB) case 0x24: case 0x44: { - auto& d = DMALevel[(A >> 5) & 0x3]; + DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; - d.StartWriteAddr = (d.StartWriteAddr &~ mask) | (*DB & mask & 0x07FFFFFF); + d->StartWriteAddr = (d->StartWriteAddr &~ mask) | (*DB & mask & 0x07FFFFFF); } break; @@ -639,9 +645,9 @@ static INLINE void SCU_RegRW_DB_u8_W1(uint32_t A, uint32_t* DB) case 0x48: { const unsigned level = (A >> 5) & 0x3; - auto& d = DMALevel[level]; + DMALevelS* d = &DMALevel[level]; - d.StartByteCount = (d.StartByteCount &~ mask) | (*DB & mask & (level ? 0x00000FFF : 0x000FFFFF)); + d->StartByteCount = (d->StartByteCount &~ mask) | (*DB & mask & (level ? 0x00000FFF : 0x000FFFFF)); } break; @@ -649,13 +655,13 @@ static INLINE void SCU_RegRW_DB_u8_W1(uint32_t A, uint32_t* DB) case 0x2C: case 0x4C: { - auto& d = DMALevel[(A >> 5) & 0x3]; - uint32_t tmp = (d.ReadAdd << 8) | (d.WriteAdd << 0); + DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; + uint32_t tmp = (d->ReadAdd << 8) | (d->WriteAdd << 0); tmp = (tmp &~ mask) | (*DB & mask); - d.ReadAdd = (tmp >> 8) & 0x1; - d.WriteAdd = (tmp >> 0) & 0x7; + d->ReadAdd = (tmp >> 8) & 0x1; + d->WriteAdd = (tmp >> 0) & 0x7; } break; @@ -664,18 +670,18 @@ static INLINE void SCU_RegRW_DB_u8_W1(uint32_t A, uint32_t* DB) case 0x50: { const unsigned level = (A >> 5) & 0x3; - auto& d = DMALevel[level]; - uint32_t tmp = (d.Enable << 8); + DMALevelS* d = &DMALevel[level]; + uint32_t tmp = (d->Enable << 8); tmp = (tmp &~ mask) | (*DB & mask); - d.Enable = (tmp >> 8) & 0x1; + d->Enable = (tmp >> 8) & 0x1; - if((tmp & 0x1) && d.Enable && d.SF == 0x7) + if((tmp & 0x1) && d->Enable && d->SF == 0x7) { SCU_UpdateDMA(SH7095_mem_timestamp); - d.GoGoGadget = true; - CheckDMAStart(&d); + d->GoGoGadget = true; + CheckDMAStart(d); SS_SetEventNT(&events[SS_EVENT_SCU_DMA], SCU_UpdateDMA(SH7095_mem_timestamp)); } @@ -686,15 +692,15 @@ static INLINE void SCU_RegRW_DB_u8_W1(uint32_t A, uint32_t* DB) case 0x34: case 0x54: { - auto& d = DMALevel[(A >> 5) & 0x3]; - uint32_t tmp = (d.Indirect << 24) | (d.ReadUpdate << 16) | (d.WriteUpdate << 8) | (d.SF << 0); + DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; + uint32_t tmp = (d->Indirect << 24) | (d->ReadUpdate << 16) | (d->WriteUpdate << 8) | (d->SF << 0); tmp = (tmp &~ mask) | (*DB & mask); - d.Indirect = (tmp >> 24) & 0x1; - d.ReadUpdate = (tmp >> 16) & 0x1; - d.WriteUpdate = (tmp >> 8) & 0x1; - d.SF = (tmp >> 0) & 0x7; + d->Indirect = (tmp >> 24) & 0x1; + d->ReadUpdate = (tmp >> 16) & 0x1; + d->WriteUpdate = (tmp >> 8) & 0x1; + d->SF = (tmp >> 0) & 0x7; } break; @@ -705,10 +711,10 @@ static INLINE void SCU_RegRW_DB_u8_W1(uint32_t A, uint32_t* DB) SCU_DMA_ReadOverhead = 0; for(unsigned level = 0; level < 3; level++) { - auto& d = DMALevel[level]; + DMALevelS* d = &DMALevel[level]; - d.Active = false; - d.GoGoGadget = false; + d->Active = false; + d->GoGoGadget = false; } RecalcDMAHalt(); } @@ -743,7 +749,12 @@ static INLINE void SCU_RegRW_DB_u8_W1(uint32_t A, uint32_t* DB) { if(DSP.State == 0) { - ((void (*)(DSPS*))(DSP_INSTR_BASE_UIPT + (uintptr_t)(DSP_INSTR_RECOVER_TCAST)DSP.NextInstr))(&DSP); +#ifdef WANT_JIT + if(MDFN_LIKELY(setting_jit_scu && SCU_DSP_JIT_Entry != NULL)) + SCU_DSP_JIT_Entry(&DSP); + else +#endif + ((void (*)(DSPS*))(DSP_INSTR_BASE_UIPT + (uintptr_t)(DSP_INSTR_RECOVER_TCAST)DSP.NextInstr))(&DSP); if(DSP.CycleCounter < -(DSP_EndCCSubVal / 2)) // Ugh DSP.CycleCounter += DSP_EndCCSubVal; } @@ -762,7 +773,10 @@ static INLINE void SCU_RegRW_DB_u8_W1(uint32_t A, uint32_t* DB) case 0x84: if(!DSPS_IsRunning(&DSP)) - DSP.ProgRAM[DSP.PC++] = DSP_DecodeInstruction(*DB, false); + { + const uint8_t slot = DSP.PC++; + DSP.ProgRAM[slot] = DSP_DecodeSlotInstruction(slot, *DB, false); + } break; case 0x88: @@ -771,7 +785,7 @@ static INLINE void SCU_RegRW_DB_u8_W1(uint32_t A, uint32_t* DB) case 0x8C: if(!DSPS_IsRunning(&DSP)) - MDAP(DSP.DataRAM)[DSP.RA++] = *DB; + ((uint32_t*)DSP.DataRAM)[DSP.RA++] = *DB; break; } @@ -779,10 +793,6 @@ static INLINE void SCU_RegRW_DB_u8_W1(uint32_t A, uint32_t* DB) static INLINE void SCU_RegRW_DB_u16_W0(uint32_t A, uint32_t* DB) { - unsigned mask; - - mask = 0xFFFF << (((A & 2) ^ 2) << 3); - switch(A & 0xFC) { default: @@ -811,9 +821,9 @@ static INLINE void SCU_RegRW_DB_u16_W0(uint32_t A, uint32_t* DB) case 0x20: case 0x40: { - auto const& d = DMALevel[(A >> 5) & 0x3]; + const DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; - *DB = d.StartReadAddr; + *DB = d->StartReadAddr; } break; @@ -821,9 +831,9 @@ static INLINE void SCU_RegRW_DB_u16_W0(uint32_t A, uint32_t* DB) case 0x24: case 0x44: { - auto const& d = DMALevel[(A >> 5) & 0x3]; + const DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; - *DB = d.StartWriteAddr; + *DB = d->StartWriteAddr; } break; @@ -833,9 +843,9 @@ static INLINE void SCU_RegRW_DB_u16_W0(uint32_t A, uint32_t* DB) for(unsigned level = 0; level < 3; level++) { - auto& d = DMALevel[level]; + DMALevelS* d = &DMALevel[level]; - if(d.Active) + if(d->Active) { tmp |= 0x10 << (level << 2); } @@ -874,7 +884,7 @@ static INLINE void SCU_RegRW_DB_u16_W0(uint32_t A, uint32_t* DB) case 0x8C: if(!DSPS_IsRunning(&DSP)) - *DB = MDAP(DSP.DataRAM)[DSP.RA++]; + *DB = ((uint32_t*)DSP.DataRAM)[DSP.RA++]; else *DB = 0xFFFFFFFF; break; @@ -972,9 +982,9 @@ static INLINE void SCU_RegRW_DB_u16_W1(uint32_t A, uint32_t* DB) case 0x20: case 0x40: { - auto& d = DMALevel[(A >> 5) & 0x3]; + DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; - d.StartReadAddr = (d.StartReadAddr &~ mask) | (*DB & mask & 0x07FFFFFF); + d->StartReadAddr = (d->StartReadAddr &~ mask) | (*DB & mask & 0x07FFFFFF); } break; @@ -982,9 +992,9 @@ static INLINE void SCU_RegRW_DB_u16_W1(uint32_t A, uint32_t* DB) case 0x24: case 0x44: { - auto& d = DMALevel[(A >> 5) & 0x3]; + DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; - d.StartWriteAddr = (d.StartWriteAddr &~ mask) | (*DB & mask & 0x07FFFFFF); + d->StartWriteAddr = (d->StartWriteAddr &~ mask) | (*DB & mask & 0x07FFFFFF); } break; @@ -993,9 +1003,9 @@ static INLINE void SCU_RegRW_DB_u16_W1(uint32_t A, uint32_t* DB) case 0x48: { const unsigned level = (A >> 5) & 0x3; - auto& d = DMALevel[level]; + DMALevelS* d = &DMALevel[level]; - d.StartByteCount = (d.StartByteCount &~ mask) | (*DB & mask & (level ? 0x00000FFF : 0x000FFFFF)); + d->StartByteCount = (d->StartByteCount &~ mask) | (*DB & mask & (level ? 0x00000FFF : 0x000FFFFF)); } break; @@ -1003,13 +1013,13 @@ static INLINE void SCU_RegRW_DB_u16_W1(uint32_t A, uint32_t* DB) case 0x2C: case 0x4C: { - auto& d = DMALevel[(A >> 5) & 0x3]; - uint32_t tmp = (d.ReadAdd << 8) | (d.WriteAdd << 0); + DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; + uint32_t tmp = (d->ReadAdd << 8) | (d->WriteAdd << 0); tmp = (tmp &~ mask) | (*DB & mask); - d.ReadAdd = (tmp >> 8) & 0x1; - d.WriteAdd = (tmp >> 0) & 0x7; + d->ReadAdd = (tmp >> 8) & 0x1; + d->WriteAdd = (tmp >> 0) & 0x7; } break; @@ -1018,18 +1028,18 @@ static INLINE void SCU_RegRW_DB_u16_W1(uint32_t A, uint32_t* DB) case 0x50: { const unsigned level = (A >> 5) & 0x3; - auto& d = DMALevel[level]; - uint32_t tmp = (d.Enable << 8); + DMALevelS* d = &DMALevel[level]; + uint32_t tmp = (d->Enable << 8); tmp = (tmp &~ mask) | (*DB & mask); - d.Enable = (tmp >> 8) & 0x1; + d->Enable = (tmp >> 8) & 0x1; - if((tmp & 0x1) && d.Enable && d.SF == 0x7) + if((tmp & 0x1) && d->Enable && d->SF == 0x7) { SCU_UpdateDMA(SH7095_mem_timestamp); - d.GoGoGadget = true; - CheckDMAStart(&d); + d->GoGoGadget = true; + CheckDMAStart(d); SS_SetEventNT(&events[SS_EVENT_SCU_DMA], SCU_UpdateDMA(SH7095_mem_timestamp)); } @@ -1040,15 +1050,15 @@ static INLINE void SCU_RegRW_DB_u16_W1(uint32_t A, uint32_t* DB) case 0x34: case 0x54: { - auto& d = DMALevel[(A >> 5) & 0x3]; - uint32_t tmp = (d.Indirect << 24) | (d.ReadUpdate << 16) | (d.WriteUpdate << 8) | (d.SF << 0); + DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; + uint32_t tmp = (d->Indirect << 24) | (d->ReadUpdate << 16) | (d->WriteUpdate << 8) | (d->SF << 0); tmp = (tmp &~ mask) | (*DB & mask); - d.Indirect = (tmp >> 24) & 0x1; - d.ReadUpdate = (tmp >> 16) & 0x1; - d.WriteUpdate = (tmp >> 8) & 0x1; - d.SF = (tmp >> 0) & 0x7; + d->Indirect = (tmp >> 24) & 0x1; + d->ReadUpdate = (tmp >> 16) & 0x1; + d->WriteUpdate = (tmp >> 8) & 0x1; + d->SF = (tmp >> 0) & 0x7; } break; @@ -1059,10 +1069,10 @@ static INLINE void SCU_RegRW_DB_u16_W1(uint32_t A, uint32_t* DB) SCU_DMA_ReadOverhead = 0; for(unsigned level = 0; level < 3; level++) { - auto& d = DMALevel[level]; + DMALevelS* d = &DMALevel[level]; - d.Active = false; - d.GoGoGadget = false; + d->Active = false; + d->GoGoGadget = false; } RecalcDMAHalt(); } @@ -1125,7 +1135,7 @@ static INLINE void SCU_RegRW_DB_u16_W1(uint32_t A, uint32_t* DB) case 0x8C: if(!DSPS_IsRunning(&DSP)) - MDAP(DSP.DataRAM)[DSP.RA++] = *DB; + ((uint32_t*)DSP.DataRAM)[DSP.RA++] = *DB; break; } @@ -1133,10 +1143,6 @@ static INLINE void SCU_RegRW_DB_u16_W1(uint32_t A, uint32_t* DB) static INLINE void SCU_RegRW_DB_u32_W0(uint32_t A, uint32_t* DB) { - unsigned mask; - - mask = 0xFFFFFFFF; - switch(A & 0xFC) { default: @@ -1165,9 +1171,9 @@ static INLINE void SCU_RegRW_DB_u32_W0(uint32_t A, uint32_t* DB) case 0x20: case 0x40: { - auto const& d = DMALevel[(A >> 5) & 0x3]; + const DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; - *DB = d.StartReadAddr; + *DB = d->StartReadAddr; } break; @@ -1175,9 +1181,9 @@ static INLINE void SCU_RegRW_DB_u32_W0(uint32_t A, uint32_t* DB) case 0x24: case 0x44: { - auto const& d = DMALevel[(A >> 5) & 0x3]; + const DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; - *DB = d.StartWriteAddr; + *DB = d->StartWriteAddr; } break; @@ -1187,9 +1193,9 @@ static INLINE void SCU_RegRW_DB_u32_W0(uint32_t A, uint32_t* DB) for(unsigned level = 0; level < 3; level++) { - auto& d = DMALevel[level]; + DMALevelS* d = &DMALevel[level]; - if(d.Active) + if(d->Active) { tmp |= 0x10 << (level << 2); } @@ -1228,7 +1234,7 @@ static INLINE void SCU_RegRW_DB_u32_W0(uint32_t A, uint32_t* DB) case 0x8C: if(!DSPS_IsRunning(&DSP)) - *DB = MDAP(DSP.DataRAM)[DSP.RA++]; + *DB = ((uint32_t*)DSP.DataRAM)[DSP.RA++]; else *DB = 0xFFFFFFFF; break; @@ -1326,9 +1332,9 @@ static INLINE void SCU_RegRW_DB_u32_W1(uint32_t A, uint32_t* DB) case 0x20: case 0x40: { - auto& d = DMALevel[(A >> 5) & 0x3]; + DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; - d.StartReadAddr = (d.StartReadAddr &~ mask) | (*DB & mask & 0x07FFFFFF); + d->StartReadAddr = (d->StartReadAddr &~ mask) | (*DB & mask & 0x07FFFFFF); } break; @@ -1336,9 +1342,9 @@ static INLINE void SCU_RegRW_DB_u32_W1(uint32_t A, uint32_t* DB) case 0x24: case 0x44: { - auto& d = DMALevel[(A >> 5) & 0x3]; + DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; - d.StartWriteAddr = (d.StartWriteAddr &~ mask) | (*DB & mask & 0x07FFFFFF); + d->StartWriteAddr = (d->StartWriteAddr &~ mask) | (*DB & mask & 0x07FFFFFF); } break; @@ -1347,9 +1353,9 @@ static INLINE void SCU_RegRW_DB_u32_W1(uint32_t A, uint32_t* DB) case 0x48: { const unsigned level = (A >> 5) & 0x3; - auto& d = DMALevel[level]; + DMALevelS* d = &DMALevel[level]; - d.StartByteCount = (d.StartByteCount &~ mask) | (*DB & mask & (level ? 0x00000FFF : 0x000FFFFF)); + d->StartByteCount = (d->StartByteCount &~ mask) | (*DB & mask & (level ? 0x00000FFF : 0x000FFFFF)); } break; @@ -1357,13 +1363,13 @@ static INLINE void SCU_RegRW_DB_u32_W1(uint32_t A, uint32_t* DB) case 0x2C: case 0x4C: { - auto& d = DMALevel[(A >> 5) & 0x3]; - uint32_t tmp = (d.ReadAdd << 8) | (d.WriteAdd << 0); + DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; + uint32_t tmp = (d->ReadAdd << 8) | (d->WriteAdd << 0); tmp = (tmp &~ mask) | (*DB & mask); - d.ReadAdd = (tmp >> 8) & 0x1; - d.WriteAdd = (tmp >> 0) & 0x7; + d->ReadAdd = (tmp >> 8) & 0x1; + d->WriteAdd = (tmp >> 0) & 0x7; } break; @@ -1372,18 +1378,18 @@ static INLINE void SCU_RegRW_DB_u32_W1(uint32_t A, uint32_t* DB) case 0x50: { const unsigned level = (A >> 5) & 0x3; - auto& d = DMALevel[level]; - uint32_t tmp = (d.Enable << 8); + DMALevelS* d = &DMALevel[level]; + uint32_t tmp = (d->Enable << 8); tmp = (tmp &~ mask) | (*DB & mask); - d.Enable = (tmp >> 8) & 0x1; + d->Enable = (tmp >> 8) & 0x1; - if((tmp & 0x1) && d.Enable && d.SF == 0x7) + if((tmp & 0x1) && d->Enable && d->SF == 0x7) { SCU_UpdateDMA(SH7095_mem_timestamp); - d.GoGoGadget = true; - CheckDMAStart(&d); + d->GoGoGadget = true; + CheckDMAStart(d); SS_SetEventNT(&events[SS_EVENT_SCU_DMA], SCU_UpdateDMA(SH7095_mem_timestamp)); } @@ -1394,15 +1400,15 @@ static INLINE void SCU_RegRW_DB_u32_W1(uint32_t A, uint32_t* DB) case 0x34: case 0x54: { - auto& d = DMALevel[(A >> 5) & 0x3]; - uint32_t tmp = (d.Indirect << 24) | (d.ReadUpdate << 16) | (d.WriteUpdate << 8) | (d.SF << 0); + DMALevelS* d = &DMALevel[(A >> 5) & 0x3]; + uint32_t tmp = (d->Indirect << 24) | (d->ReadUpdate << 16) | (d->WriteUpdate << 8) | (d->SF << 0); tmp = (tmp &~ mask) | (*DB & mask); - d.Indirect = (tmp >> 24) & 0x1; - d.ReadUpdate = (tmp >> 16) & 0x1; - d.WriteUpdate = (tmp >> 8) & 0x1; - d.SF = (tmp >> 0) & 0x7; + d->Indirect = (tmp >> 24) & 0x1; + d->ReadUpdate = (tmp >> 16) & 0x1; + d->WriteUpdate = (tmp >> 8) & 0x1; + d->SF = (tmp >> 0) & 0x7; } break; @@ -1413,10 +1419,10 @@ static INLINE void SCU_RegRW_DB_u32_W1(uint32_t A, uint32_t* DB) SCU_DMA_ReadOverhead = 0; for(unsigned level = 0; level < 3; level++) { - auto& d = DMALevel[level]; + DMALevelS* d = &DMALevel[level]; - d.Active = false; - d.GoGoGadget = false; + d->Active = false; + d->GoGoGadget = false; } RecalcDMAHalt(); } @@ -1479,7 +1485,7 @@ static INLINE void SCU_RegRW_DB_u32_W1(uint32_t A, uint32_t* DB) case 0x8C: if(!DSPS_IsRunning(&DSP)) - MDAP(DSP.DataRAM)[DSP.RA++] = *DB; + ((uint32_t*)DSP.DataRAM)[DSP.RA++] = *DB; break; } @@ -1499,7 +1505,7 @@ static INLINE void SCU_RegRW_DB_u32_W1(uint32_t A, uint32_t* DB) * for its tuple -- no constexpr-shadow indirection. */ /* === BBusRW_DB variants (6 used tuples) === */ -static INLINE void BBusRW_DB_u8_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) // add to time_thing, subtract from dma_time_thing +static INLINE void BBusRW_DB_u8_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) // add to time_thing, subtract from dma_time_thing { @@ -1625,7 +1631,7 @@ static INLINE void BBusRW_DB_u8_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_t } -static INLINE void BBusRW_DB_u16_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) // add to time_thing, subtract from dma_time_thing +static INLINE void BBusRW_DB_u16_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) // add to time_thing, subtract from dma_time_thing { @@ -1751,7 +1757,7 @@ static INLINE void BBusRW_DB_u16_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_ } -static INLINE void BBusRW_DB_u16_W1_SH1(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) // add to time_thing, subtract from dma_time_thing +static INLINE void BBusRW_DB_u16_W1_SH1(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) // add to time_thing, subtract from dma_time_thing { @@ -1872,7 +1878,7 @@ static INLINE void BBusRW_DB_u16_W1_SH1(uint32_t A, uint16_t* DB, int32_t* time_ } -static INLINE void BBusRW_DB_u32_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) // add to time_thing, subtract from dma_time_thing +static INLINE void BBusRW_DB_u32_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) // add to time_thing, subtract from dma_time_thing { @@ -1998,7 +2004,7 @@ static INLINE void BBusRW_DB_u32_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_ } -static INLINE void BBusRW_DB_u16_W0_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) // add to time_thing, subtract from dma_time_thing +static INLINE void BBusRW_DB_u16_W0_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) // add to time_thing, subtract from dma_time_thing { @@ -2107,7 +2113,7 @@ static INLINE void BBusRW_DB_u16_W0_SH0(uint32_t A, uint16_t* DB, int32_t* time_ *DB = 0; } -static INLINE void BBusRW_DB_u16_W0_SH1(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) // add to time_thing, subtract from dma_time_thing +static INLINE void BBusRW_DB_u16_W0_SH1(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) // add to time_thing, subtract from dma_time_thing { @@ -2216,7 +2222,7 @@ static INLINE void BBusRW_DB_u16_W0_SH1(uint32_t A, uint16_t* DB, int32_t* time_ /* === ABusRW_DB variants (5 used tuples) === */ -static INLINE void ABusRW_DB_u8_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) // add to time_thing, subtract from dma_time_thing +static INLINE void ABusRW_DB_u8_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) // add to time_thing, subtract from dma_time_thing { // // A-Bus CS0 and CS1 @@ -2317,7 +2323,7 @@ static INLINE void ABusRW_DB_u8_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_t *dma_time_thing -= 1; } -static INLINE void ABusRW_DB_u16_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) // add to time_thing, subtract from dma_time_thing +static INLINE void ABusRW_DB_u16_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) // add to time_thing, subtract from dma_time_thing { // // A-Bus CS0 and CS1 @@ -2418,7 +2424,7 @@ static INLINE void ABusRW_DB_u16_W1_SH0(uint32_t A, uint16_t* DB, int32_t* time_ *dma_time_thing -= 1; } -static INLINE void ABusRW_DB_u16_W1_SH1(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) // add to time_thing, subtract from dma_time_thing +static INLINE void ABusRW_DB_u16_W1_SH1(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) // add to time_thing, subtract from dma_time_thing { // // A-Bus CS0 and CS1 @@ -2519,7 +2525,7 @@ static INLINE void ABusRW_DB_u16_W1_SH1(uint32_t A, uint16_t* DB, int32_t* time_ *dma_time_thing -= 1; } -static INLINE void ABusRW_DB_u16_W0_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) // add to time_thing, subtract from dma_time_thing +static INLINE void ABusRW_DB_u16_W0_SH0(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) // add to time_thing, subtract from dma_time_thing { // // A-Bus CS0 and CS1 @@ -2601,7 +2607,6 @@ static INLINE void ABusRW_DB_u16_W0_SH0(uint32_t A, uint16_t* DB, int32_t* time_ if((A & 0x7FFF) < 0x1000) { const uint32_t offset = (A & 0x3F) >> 2; - const uint32_t mask = (true) ? 0xFFFF : (0xFF << (((A & 1) ^ 1) << 3)); if(true || !(A & 0x80000)) // CD block seems to effectively ignore second read access in 32-bit reads somehow, tested to occur HIRQ and the FIFO at least... *DB = CDB_Read(offset); @@ -2621,7 +2626,7 @@ static INLINE void ABusRW_DB_u16_W0_SH0(uint32_t A, uint16_t* DB, int32_t* time_ *dma_time_thing -= 1; } -static INLINE void ABusRW_DB_u16_W0_SH1(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) // add to time_thing, subtract from dma_time_thing +static INLINE void ABusRW_DB_u16_W0_SH1(uint32_t A, uint16_t* DB, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) // add to time_thing, subtract from dma_time_thing { // // A-Bus CS0 and CS1 @@ -2703,7 +2708,6 @@ static INLINE void ABusRW_DB_u16_W0_SH1(uint32_t A, uint16_t* DB, int32_t* time_ if((A & 0x7FFF) < 0x1000) { const uint32_t offset = (A & 0x3F) >> 2; - const uint32_t mask = (true) ? 0xFFFF : (0xFF << (((A & 1) ^ 1) << 3)); if(false || !(A & 0x80000)) // CD block seems to effectively ignore second read access in 32-bit reads somehow, tested to occur HIRQ and the FIFO at least... *DB = CDB_Read(offset); @@ -2743,21 +2747,21 @@ static INLINE void ABusRW_DB_u16_W0_SH1(uint32_t A, uint16_t* DB, int32_t* time_ * the u8/u16 bodies are now concrete-T template * instantiations (uint8_t / uint16_t respectively). */ -static INLINE void ABus_Write_DB32_u8(uint32_t A, uint32_t DB32, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) +static INLINE void ABus_Write_DB32_u8(uint32_t A, uint32_t DB32, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) { uint16_t tmp = DB32 >> (((A & 2) ^ 2) << 3); ABusRW_DB_u8_W1_SH0(A, &tmp, time_thing, dma_time_thing, sh2_dma_time_thing); } -static INLINE void ABus_Write_DB32_u16(uint32_t A, uint32_t DB32, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) +static INLINE void ABus_Write_DB32_u16(uint32_t A, uint32_t DB32, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) { uint16_t tmp = DB32 >> (((A & 2) ^ 2) << 3); ABusRW_DB_u16_W1_SH0(A, &tmp, time_thing, dma_time_thing, sh2_dma_time_thing); } -static INLINE void ABus_Write_DB32_u32(uint32_t A, uint32_t DB32, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) +static INLINE void ABus_Write_DB32_u32(uint32_t A, uint32_t DB32, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) { uint16_t tmp; @@ -2769,7 +2773,7 @@ static INLINE void ABus_Write_DB32_u32(uint32_t A, uint32_t DB32, int32_t* time_ } // Lower 2 bits of A should be 0 -static INLINE uint32_t ABus_Read(uint32_t A, int32_t* time_thing, int32_t* dma_time_thing = NULL, int32_t* sh2_dma_time_thing = NULL) +static INLINE uint32_t ABus_Read(uint32_t A, int32_t* time_thing, int32_t* dma_time_thing, int32_t* sh2_dma_time_thing) { uint32_t ret; uint16_t tmp = 0xFFFF; @@ -3198,7 +3202,7 @@ static INLINE void SCU_FromSH2_BusRW_DB_u32_W1(uint32_t A, uint32_t* DB, int32_t // static uint32_t DMA_ReadABus(uint32_t offset) { - return ABus_Read(offset, NULL, &SCU_DMA_ReadOverhead); + return ABus_Read(offset, NULL, &SCU_DMA_ReadOverhead, NULL); } static uint32_t DMA_ReadBBus(uint32_t offset) @@ -3206,10 +3210,10 @@ static uint32_t DMA_ReadBBus(uint32_t offset) uint32_t ret; uint16_t tmp = 0; - BBusRW_DB_u16_W0_SH0(offset | 0, &tmp, NULL, &SCU_DMA_ReadOverhead); + BBusRW_DB_u16_W0_SH0(offset | 0, &tmp, NULL, &SCU_DMA_ReadOverhead, NULL); ret = tmp << 16; - BBusRW_DB_u16_W0_SH1(offset | 2, &tmp, NULL, &SCU_DMA_ReadOverhead); + BBusRW_DB_u16_W0_SH1(offset | 2, &tmp, NULL, &SCU_DMA_ReadOverhead, NULL); ret |= tmp << 0; return ret; @@ -3408,12 +3412,12 @@ static void CheckDMASFByInt(unsigned int_which) for(unsigned level = 0; level < 3; level++) { - auto& d = DMALevel[level]; + DMALevelS* d = &DMALevel[level]; - if(d.Enable && d.SF < 0x7 && sf_to_int_tab[d.SF] == int_which) + if(d->Enable && d->SF < 0x7 && sf_to_int_tab[d->SF] == int_which) { - d.GoGoGadget = true; - CheckDMAStart(&d); + d->GoGoGadget = true; + CheckDMAStart(d); } } } @@ -3518,7 +3522,7 @@ static INLINE void DMA_Write_WB0_u8(DMALevelS* d, uint32_t DB) int32_t WriteOverhead = 0; /* Phase-8k: sizeof(T) folds at template-instantiation time. */ - ABus_Write_DB32_u8 (A, DB, NULL, &WriteOverhead); + ABus_Write_DB32_u8 (A, DB, NULL, &WriteOverhead, NULL); SCU_DMA_TimeCounter -= WriteOverhead; SCU_DMA_ReadOverhead = ((int32_t)(0) < (int32_t)(SCU_DMA_ReadOverhead - WriteOverhead) ? (int32_t)(0) : (int32_t)(SCU_DMA_ReadOverhead - WriteOverhead)); @@ -3531,7 +3535,7 @@ static INLINE void DMA_Write_WB0_u16(DMALevelS* d, uint32_t DB) int32_t WriteOverhead = 0; /* Phase-8k: sizeof(T) folds at template-instantiation time. */ - ABus_Write_DB32_u16(A, DB, NULL, &WriteOverhead); + ABus_Write_DB32_u16(A, DB, NULL, &WriteOverhead, NULL); SCU_DMA_TimeCounter -= WriteOverhead; SCU_DMA_ReadOverhead = ((int32_t)(0) < (int32_t)(SCU_DMA_ReadOverhead - WriteOverhead) ? (int32_t)(0) : (int32_t)(SCU_DMA_ReadOverhead - WriteOverhead)); @@ -3544,7 +3548,7 @@ static INLINE void DMA_Write_WB0_u32(DMALevelS* d, uint32_t DB) int32_t WriteOverhead = 0; /* Phase-8k: sizeof(T) folds at template-instantiation time. */ - ABus_Write_DB32_u32(A, DB, NULL, &WriteOverhead); + ABus_Write_DB32_u32(A, DB, NULL, &WriteOverhead, NULL); SCU_DMA_TimeCounter -= WriteOverhead; SCU_DMA_ReadOverhead = ((int32_t)(0) < (int32_t)(SCU_DMA_ReadOverhead - WriteOverhead) ? (int32_t)(0) : (int32_t)(SCU_DMA_ReadOverhead - WriteOverhead)); @@ -3560,7 +3564,7 @@ static INLINE void DMA_Write_WB1_u8(DMALevelS* d, uint32_t DB) DB16 = DB >> (((A & 0x2) ^ 0x2) * 8); /* Phase-8q1: sizeof(T) folds at DMA_Write template instantiation. */ - BBusRW_DB_u8_W1_SH0 (A, &DB16, NULL, &WriteOverhead); + BBusRW_DB_u8_W1_SH0 (A, &DB16, NULL, &WriteOverhead, NULL); SCU_DMA_TimeCounter -= WriteOverhead; SCU_DMA_ReadOverhead = ((int32_t)(0) < (int32_t)(SCU_DMA_ReadOverhead - WriteOverhead) ? (int32_t)(0) : (int32_t)(SCU_DMA_ReadOverhead - WriteOverhead)); @@ -3576,7 +3580,7 @@ static INLINE void DMA_Write_WB1_u16(DMALevelS* d, uint32_t DB) DB16 = DB >> (((A & 0x2) ^ 0x2) * 8); /* Phase-8q1: sizeof(T) folds at DMA_Write template instantiation. */ - BBusRW_DB_u16_W1_SH0(A, &DB16, NULL, &WriteOverhead); + BBusRW_DB_u16_W1_SH0(A, &DB16, NULL, &WriteOverhead, NULL); SCU_DMA_TimeCounter -= WriteOverhead; SCU_DMA_ReadOverhead = ((int32_t)(0) < (int32_t)(SCU_DMA_ReadOverhead - WriteOverhead) ? (int32_t)(0) : (int32_t)(SCU_DMA_ReadOverhead - WriteOverhead)); @@ -3592,7 +3596,7 @@ static INLINE void DMA_Write_WB1_u32(DMALevelS* d, uint32_t DB) DB16 = DB >> (((A & 0x2) ^ 0x2) * 8); /* Phase-8q1: sizeof(T) folds at DMA_Write template instantiation. */ - BBusRW_DB_u32_W1_SH0(A, &DB16, NULL, &WriteOverhead); + BBusRW_DB_u32_W1_SH0(A, &DB16, NULL, &WriteOverhead, NULL); SCU_DMA_TimeCounter -= WriteOverhead; SCU_DMA_ReadOverhead = ((int32_t)(0) < (int32_t)(SCU_DMA_ReadOverhead - WriteOverhead) ? (int32_t)(0) : (int32_t)(SCU_DMA_ReadOverhead - WriteOverhead)); @@ -3861,10 +3865,16 @@ static INLINE void CheckForceDMAFinish(void) * (extern "C" wrapped on the C++ side, plain decl on the C side), * defined here in ss.cpp's TU. `extern "C"` on the definition * forces the unmangled symbol so the scu_dsp_*.c files link - * against the same `DSP` global. */ + * against the same `DSP` global. Gated for C compilation: in C + * the linkage is already C (no mangling), so the `extern "C"` + * wrapper is C++-only syntax to suppress -- not semantics. */ +#ifdef __cplusplus extern "C" { +#endif struct DSPS DSP; +#ifdef __cplusplus } +#endif sscpu_timestamp_t SCU_UpdateDSP(sscpu_timestamp_t timestamp) { @@ -3887,7 +3897,14 @@ sscpu_timestamp_t SCU_UpdateDSP(sscpu_timestamp_t timestamp) // remaining cycles via DSP_TailDispatch and only returns once the // budget is exhausted (or the DSP stopped). if(MDFN_LIKELY(dsp->CycleCounter > 0)) - ((void (*)(DSPS*))(DSP_INSTR_BASE_UIPT + (uintptr_t)(DSP_INSTR_RECOVER_TCAST)dsp->NextInstr))(dsp); + { +#ifdef WANT_JIT + if(MDFN_LIKELY(setting_jit_scu && SCU_DSP_JIT_Entry != NULL)) + SCU_DSP_JIT_Entry(dsp); + else +#endif + ((void (*)(DSPS*))(DSP_INSTR_BASE_UIPT + (uintptr_t)(DSP_INSTR_RECOVER_TCAST)dsp->NextInstr))(dsp); + } } if(MDFN_UNLIKELY(!DSPS_IsRunning(&DSP))) @@ -3907,11 +3924,15 @@ static void DSP_Reset(bool powering_up) if(powering_up) { +#ifdef WANT_JIT + if(setting_jit_scu) + SCU_DSP_JIT_Reset(); +#endif for(unsigned i = 0; i < 256; i++) - DSP.ProgRAM[i] = DSP_DecodeInstruction(0, false); + DSP.ProgRAM[i] = DSP_DecodeSlotInstruction(i, 0, false); for(unsigned i = 0; i < 256; i++) - MDAP(DSP.DataRAM)[i] = 0; + ((uint32_t*)DSP.DataRAM)[i] = 0; } DSP.PC = 0; @@ -3951,11 +3972,17 @@ static void DSP_Reset(bool powering_up) * the same names. DSP_Init's address is also used as the base * pointer for the DSP_INSTR_BASE_UIPT macro on 64-bit hosts, so * the same symbol address must be visible to both compilers. */ +#ifdef __cplusplus extern "C" { +#endif MDFN_COLD void DSP_Init(void) { DSP.LastTS = 0; +#ifdef WANT_JIT + if(setting_jit_scu) + SCU_DSP_JIT_Init(); +#endif } void DSP_FinishPRAMDMA(void) @@ -3965,7 +3992,10 @@ void DSP_FinishPRAMDMA(void) DSP.T0_Until = DSP.CycleCounter; for(uint32_t i = 0; i < DSP.PRAMDMABufCount; i++) - DSP.ProgRAM[DSP.PC++] = DSP_DecodeInstruction(DSP.PRAMDMABuf[i & 0xFF], false); + { + const uint8_t slot = DSP.PC++; + DSP.ProgRAM[slot] = DSP_DecodeSlotInstruction(slot, DSP.PRAMDMABuf[i & 0xFF], false); + } DSP.PRAMDMABufCount = 0; // @@ -3973,7 +4003,9 @@ void DSP_FinishPRAMDMA(void) DSP.NextInstr = DSP_DecodeInstruction(0, false); } +#ifdef __cplusplus } /* extern "C" */ +#endif /* Phase-5d: was `template static NO_INLINE NO_CLONE void DMAInstr(DSPS* dsp)` -- @@ -4091,18 +4123,18 @@ void DSP_FinishPRAMDMA(void) uint16_t DB16; \ \ DB16 = DB >> 16; \ - BBusRW_DB_u16_W1_SH0(addr, &DB16, NULL, &dsp->T0_Until); \ + BBusRW_DB_u16_W1_SH0(addr, &DB16, NULL, &dsp->T0_Until, NULL); \ \ addr += addr_add_amount; \ \ DB16 = DB; \ - BBusRW_DB_u16_W1_SH1(addr, &DB16, NULL, &dsp->T0_Until); \ + BBusRW_DB_u16_W1_SH1(addr, &DB16, NULL, &dsp->T0_Until, NULL); \ \ addr += addr_add_amount; \ } \ else if(WriteBus == 0) \ { \ - ABus_Write_DB32_u32(addr, DB, NULL, &dsp->T0_Until); \ + ABus_Write_DB32_u32(addr, DB, NULL, &dsp->T0_Until, NULL); \ \ addr += addr_add_amount; \ } \ @@ -4142,17 +4174,17 @@ void DSP_FinishPRAMDMA(void) { \ uint16_t tmp = 0; \ \ - BBusRW_DB_u16_W0_SH0(addr | 0, &tmp, NULL, &dsp->T0_Until); \ + BBusRW_DB_u16_W0_SH0(addr | 0, &tmp, NULL, &dsp->T0_Until, NULL); \ DB = tmp << 16; \ \ - BBusRW_DB_u16_W0_SH1(addr | 2, &tmp, NULL, &dsp->T0_Until); \ + BBusRW_DB_u16_W0_SH1(addr | 2, &tmp, NULL, &dsp->T0_Until, NULL); \ DB |= tmp << 0; \ \ addr += 4; \ } \ else if(ReadBus == 0) \ { \ - DB = ABus_Read(addr, NULL, &dsp->T0_Until); \ + DB = ABus_Read(addr, NULL, &dsp->T0_Until, NULL); \ \ addr += addr_add_amount; \ } \ @@ -4214,12 +4246,16 @@ DDMA_FOR_EACH_HOLD(DEFINE_DMAInstr, 1) /* Phase-5e: extern "C" so the table-symbol name matches the unmangled * scu_dsp_common.inc declaration that the C-side scu_dsp_misc.c sees * (DSP_DecodeInstruction inlines a read of this table in the LPS path). */ +#ifdef __cplusplus extern "C" { -MDFN_HIDE extern void (*const DSP_DMAFuncTable[2][8][8])(struct DSPS*) = +#endif +MDFN_HIDE void (*const DSP_DMAFuncTable[2][8][8])(struct DSPS*) = { #include "scu_dsp_dmatab.inc" }; +#ifdef __cplusplus } /* extern "C" */ +#endif #undef DMAInstr_NAME #undef DMAInstr_BODY @@ -4273,23 +4309,24 @@ void SCU_Reset(bool powering_up) if(powering_up) memset(DMALevel, 0x00, sizeof(DMALevel)); - for(auto& d : DMALevel) + for(unsigned level___ = 0; level___ < 3; level___++) { - d.ReadAdd = true; - d.WriteAdd = 0x1; + DMALevelS* d = &DMALevel[level___]; + d->ReadAdd = true; + d->WriteAdd = 0x1; - d.Enable = false; - d.GoGoGadget = false; - d.Active = false; + d->Enable = false; + d->GoGoGadget = false; + d->Active = false; - d.Indirect = false; - d.ReadUpdate = false; - d.WriteUpdate = false; - d.SF = 0x7; + d->Indirect = false; + d->ReadUpdate = false; + d->WriteUpdate = false; + d->SF = 0x7; - d.WATable = &dma_write_tab.acb[0][0][0][0][0]; - d.ReadFunc = rftab[0]; - d.TableReadFunc = NULL; + d->WATable = &dma_write_tab.acb[0][0][0][0][0]; + d->ReadFunc = rftab[0]; + d->TableReadFunc = NULL; } //SCU_DMA_CycleCounter = 0; SCU_DMA_ReadOverhead = 0; @@ -4435,12 +4472,12 @@ MDFN_COLD void SCU_StateAction(StateMem* sm, const unsigned load, const bool dat for(unsigned level = 0; level < 3; level++) { - const auto& d = DMALevel[level]; + const DMALevelS* d = &DMALevel[level]; - if(d.WATable >= dwt_ptrs[0] && d.WATable < (dwt_ptrs[0] + dwt_counts[0])) - DMALevel_WATable[level] = d.WATable - dwt_ptrs[0]; - else if(d.WATable >= dwt_ptrs[1] && d.WATable < (dwt_ptrs[1] + dwt_counts[1])) - DMALevel_WATable[level] = 0x80000000 | (d.WATable - dwt_ptrs[1]); + if(d->WATable >= dwt_ptrs[0] && d->WATable < (dwt_ptrs[0] + dwt_counts[0])) + DMALevel_WATable[level] = d->WATable - dwt_ptrs[0]; + else if(d->WATable >= dwt_ptrs[1] && d->WATable < (dwt_ptrs[1] + dwt_counts[1])) + DMALevel_WATable[level] = 0x80000000 | (d->WATable - dwt_ptrs[1]); else abort(); // FIXME: NULL pointer on init... @@ -4449,7 +4486,7 @@ MDFN_COLD void SCU_StateAction(StateMem* sm, const unsigned load, const bool dat if(rb >= 3) abort(); - if(d.ReadFunc == rftab[rb]) + if(d->ReadFunc == rftab[rb]) { DMALevel_ReadFunc[level] = rb; break; @@ -4459,7 +4496,7 @@ MDFN_COLD void SCU_StateAction(StateMem* sm, const unsigned load, const bool dat DMALevel_TableReadFunc[level] = 0xFF; for(unsigned trb = 0; trb < 3; trb++) { - if(d.TableReadFunc == rftab[trb]) + if(d->TableReadFunc == rftab[trb]) { DMALevel_TableReadFunc[level] = trb; break; @@ -4485,8 +4522,12 @@ MDFN_COLD void SCU_StateAction(StateMem* sm, const unsigned load, const bool dat ILevel &= 0xF; // // +#ifdef WANT_JIT + if(setting_jit_scu) + SCU_DSP_JIT_Reset(); +#endif for(unsigned i = 0; i < 256; i++) - DSP.ProgRAM[i] = DSP_DecodeInstruction(DSP_ProgRAM[i], false); + DSP.ProgRAM[i] = DSP_DecodeSlotInstruction(i, DSP_ProgRAM[i], false); if(DSP_NextInstrLooped) DSP.NextInstr = DSP_DecodeInstruction(DSP_NextInstr, true); @@ -4507,16 +4548,16 @@ MDFN_COLD void SCU_StateAction(StateMem* sm, const unsigned load, const bool dat // for(unsigned level = 0; level < 3; level++) { - auto& d = DMALevel[level]; + DMALevelS* d = &DMALevel[level]; - d.StartReadAddr &= 0x07FFFFFF; - d.StartWriteAddr &= 0x07FFFFFF; - d.StartByteCount &= level ? 0x00000FFF : 0x000FFFFF; + d->StartReadAddr &= 0x07FFFFFF; + d->StartWriteAddr &= 0x07FFFFFF; + d->StartByteCount &= level ? 0x00000FFF : 0x000FFFFF; - d.ReadAdd &= 0x1; - d.WriteAdd &= 0x7; + d->ReadAdd &= 0x1; + d->WriteAdd &= 0x7; - d.WriteBus %= 3; + d->WriteBus %= 3; // // { @@ -4524,16 +4565,16 @@ MDFN_COLD void SCU_StateAction(StateMem* sm, const unsigned load, const bool dat uint32_t index = DMALevel_WATable[level] & 0x7FFFFFFF; if(index < dwt_counts[which]) // || !dwt_ptrs[which][index].write_size) - d.WATable = dwt_ptrs[which] + index; + d->WATable = dwt_ptrs[which] + index; } if(DMALevel_ReadFunc[level] < 3) - d.ReadFunc = rftab[DMALevel_ReadFunc[level]]; + d->ReadFunc = rftab[DMALevel_ReadFunc[level]]; if(DMALevel_TableReadFunc[level] == 0xFF) - d.TableReadFunc = NULL; + d->TableReadFunc = NULL; else if(DMALevel_TableReadFunc[level] < 3) - d.TableReadFunc = rftab[DMALevel_TableReadFunc[level]]; + d->TableReadFunc = rftab[DMALevel_TableReadFunc[level]]; } // // @@ -4589,9 +4630,9 @@ uint32_t SCU_GetRegister(const unsigned id, char* const special, const uint32_t case SCU_GSREG_D1MD: case SCU_GSREG_D2MD: { - auto& d = DMALevel[id - SCU_GSREG_D0MD]; + DMALevelS* d = &DMALevel[id - SCU_GSREG_D0MD]; - ret = (d.Indirect << 24) | (d.ReadUpdate << 16) | (d.WriteUpdate << 8) | (d.SF << 0); + ret = (d->Indirect << 24) | (d->ReadUpdate << 16) | (d->WriteUpdate << 8) | (d->SF << 0); } break; // diff --git a/mednafen/ss/scu_dsp_common.inc b/mednafen/ss/scu_dsp_common.inc index 023a43b0..9fb6054b 100644 --- a/mednafen/ss/scu_dsp_common.inc +++ b/mednafen/ss/scu_dsp_common.inc @@ -27,6 +27,18 @@ #define DSP_INSTR_RECOVER_TCAST uint32_t #endif +/* C-compat typedef for the DSPS state struct defined below. The C + * implementation files (scu_dsp_misc.c / scu_dsp_gen.c / scu_dsp_mvi.c) + * spell the type with the `struct` keyword everywhere already; the + * typedef lets scu.inc (included from C++ ss.cpp but now staged for + * a C-future) and any other future C consumer say plain `DSPS*`. */ +typedef struct DSPS DSPS; + +#ifdef WANT_JIT + #include "scu_dsp_jit.h" +extern bool setting_jit_scu; +#endif + // See loop in "SCU_UpdateDSP()" in scu.inc, and END/ENDI handling in scu_dsp_misc.cpp enum { DSP_EndCCSubVal = 1000000 }; @@ -229,6 +241,23 @@ static FORCE_INLINE uint64_t DSP_DecodeInstruction(const uint32_t instr, const b return ((uint64_t)instr << 32) | (uint32_t)((uintptr_t)aal - DSP_INSTR_BASE_UIPT); } +/* Same return format as DSP_DecodeInstruction; routes through the + * per-slot JIT cache and falls back to the C handler on miss. */ +static FORCE_INLINE uint64_t DSP_DecodeSlotInstruction(uint8_t pc, const uint32_t instr, const bool looped) +{ +#ifdef WANT_JIT + if(setting_jit_scu) + { + void (* const jit_entry)(struct DSPS*) = SCU_DSP_JIT_CompileSlot(pc, looped, instr); + if(jit_entry) + return ((uint64_t)instr << 32) | (uint32_t)((uintptr_t)jit_entry - DSP_INSTR_BASE_UIPT); + } +#else + (void)pc; +#endif + return DSP_DecodeInstruction(instr, looped); +} + /* Phase-5a: was `template static FORCE_INLINE uint32_t * DSP_InstrPre(DSPS* dsp)`. Caller passes `looped` as a compile-time * constant (template arg of the surrounding instruction-handler in diff --git a/mednafen/ss/scu_dsp_gen.c b/mednafen/ss/scu_dsp_gen.c index 615961f0..6afc59c4 100644 --- a/mednafen/ss/scu_dsp_gen.c +++ b/mednafen/ss/scu_dsp_gen.c @@ -331,7 +331,7 @@ DGI_FOR_EACH_ALU(DEFINE_GeneralInstr, 1) #undef DGI_FOR_EACH_Y #undef DGI_FOR_EACH_D1 -MDFN_HIDE extern void (*const DSP_GenFuncTable[2][16][8][8][4])(struct DSPS*) = +MDFN_HIDE void (*const DSP_GenFuncTable[2][16][8][8][4])(struct DSPS*) = { #include "scu_dsp_gentab.inc" }; diff --git a/mednafen/ss/scu_dsp_jit.c b/mednafen/ss/scu_dsp_jit.c new file mode 100644 index 00000000..ba53e28d --- /dev/null +++ b/mednafen/ss/scu_dsp_jit.c @@ -0,0 +1,1264 @@ +/******************************************************************************/ +/* Mednafen Sega Saturn Emulation Module */ +/******************************************************************************/ +/* scu_dsp_jit.c - SCU DSP JIT (aarch64 backend) implementation +** Copyright (C) 2026 pstef +*/ + +/* + * Register allocation map (AArch64, AAPCS). Anything not listed here + * is either standard caller-save scratch (x0-x18, used freely for + * intermediate values) or untouched. + * + * x0 = DSPS* throughout the chain. + * x3 = ALU.T scratch inside emit_gen. + * x4-x9, x12 = generic per-emitter scratches. + * x16, x17 = MOVP2R staging and BR/BLR targets. + * w20 = pinned dsp->LOP (12-bit loop counter). + * w21 = pinned dsp->CycleCounter. + * w22 = pinned dsp->State (read-only cache). + * w23 = pinned dsp->CT32 (4 packed 6-bit CT counters). + * w24 = pinned packed flag bytes (FlagZ/S/V/C at byte 0..3). + * w25 = pinned dsp->NextInstr.low32 (threaded-dispatch offset). + * x26 = pinned dsp->AC.T. + * w27 = pinned dsp->PC. + * x28 = pinned dsp->P.T. + * + * Frame layout for the entry stub: 96 bytes, callee-save preserves + * x19/x20 (LOP + dsp-ptr pair), x21/x22 (CC + State pair), x25/x26 + * (NI.low32 + AC pair), x23/x24 (CT32 + scratch pair) and x27/x28 (PC + + * P pair). Slot bodies don't push a frame of their own; their + * tail_dispatch B's straight to the exit stub, which RETs through the + * entry stub's after-BLR. + */ + +#include +#include +#include +#include + +#include "ss.h" +#include "scu.h" +#include "scu_dsp_jit.h" +#include "a64emit.h" +#include "jitdump.h" + +void (*SCU_DSP_JIT_Entry)(struct DSPS*) = NULL; + +#if defined(WANT_JIT) && (defined(__aarch64__) || defined(__arm64__)) + +#include "scu_dsp_common.inc" + +#ifdef WANT_DSP_JIT_PERF_DUMP +#include +#endif + +/* + * Single 1 MB code segment. Bump-allocated per slot; on overflow we + * rewind to the post-stubs offset and rewind_locked() recompiles every + * DSP.ProgRAM[] entry (plus DSP.NextInstr) so no JIT pointer cached in + * the DSP state outlives the bytes it points to. Relying on PRAM-write + * callers to refresh their own slot isn't enough -- PRAM is typically + * loaded once and then executed for millions of cycles, so the other + * 255 slots would dispatch into bytes overwritten by fresh compiles. + */ +#define SCU_JIT_CODE_SEGMENT_SIZE ((size_t)0x100000) +#define SCU_JIT_SLOT_MAX_BYTES ((size_t)1024) + +/* AArch64 register-index conventions. WZR/XZR/SP all encode as 31. + * Numeric in source so a64emit accepts them as plain `unsigned`s. */ +#define W0 0u +#define W1 1u +#define W2 2u +#define W3 3u +#define W4 4u +#define W5 5u +#define W6 6u +#define W7 7u +#define W8 8u +#define W9 9u +#define W10 10u +#define W11 11u +#define W12 12u +#define W13 13u +#define W14 14u +#define W15 15u +#define W16 16u +#define W17 17u +#define W18 18u +#define W19 19u +#define W20 20u +#define W21 21u +#define W22 22u +#define W23 23u +#define W24 24u +#define W25 25u +#define W26 26u +#define W27 27u +#define W28 28u +#define W29 29u +#define W30 30u +#define WZR 31u + +#define X0 0u +#define X1 1u +#define X2 2u +#define X3 3u +#define X4 4u +#define X5 5u +#define X6 6u +#define X7 7u +#define X8 8u +#define X9 9u +#define X10 10u +#define X11 11u +#define X12 12u +#define X16 16u +#define X17 17u +#define X19 19u +#define X20 20u +#define X21 21u +#define X22 22u +#define X23 23u +#define X24 24u +#define X25 25u +#define X26 26u +#define X27 27u +#define X28 28u +#define X29 29u +#define X30 30u +#define XZR 31u +#define SP_REG 31u + +/* --- DSPS field byte offsets ------------------------------------- */ +#define O_CC ((uint32_t)offsetof(struct DSPS, CycleCounter)) +#define O_T0_Until ((uint32_t)offsetof(struct DSPS, T0_Until)) +#define O_State ((uint32_t)offsetof(struct DSPS, State)) +#define O_NI ((uint32_t)offsetof(struct DSPS, NextInstr)) +#define O_PC ((uint32_t)offsetof(struct DSPS, PC)) +#define O_FZ ((uint32_t)offsetof(struct DSPS, FlagZ)) +#define O_FS ((uint32_t)offsetof(struct DSPS, FlagS)) +#define O_FV ((uint32_t)offsetof(struct DSPS, FlagV)) +#define O_FC ((uint32_t)offsetof(struct DSPS, FlagC)) +#define O_FlagEnd ((uint32_t)offsetof(struct DSPS, FlagEnd)) +#define O_LOP ((uint32_t)offsetof(struct DSPS, LOP)) +#define O_TOP ((uint32_t)offsetof(struct DSPS, TOP)) +#define O_AC ((uint32_t)offsetof(struct DSPS, AC)) +#define O_P ((uint32_t)offsetof(struct DSPS, P)) +#define O_P_L (O_P + 0u) +#define O_CT32 ((uint32_t)offsetof(struct DSPS, CT32)) +#define O_RX ((uint32_t)offsetof(struct DSPS, RX)) +#define O_RY ((uint32_t)offsetof(struct DSPS, RY)) +#define O_RAO ((uint32_t)offsetof(struct DSPS, RAO)) +#define O_WAO ((uint32_t)offsetof(struct DSPS, WAO)) +#define O_DRAM ((uint32_t)offsetof(struct DSPS, DataRAM)) +#define O_PRAM ((uint32_t)offsetof(struct DSPS, ProgRAM)) +#define O_PRAMDMACt ((uint32_t)offsetof(struct DSPS, PRAMDMABufCount)) + +/* --- Codegen + label pool ---------------------------------------- */ + +/* Per-Compile pool; emit_* sites use at most ~3 live labels per slot. */ +#define LABEL_POOL_SIZE 16u + +static a64_codegen* g_cg = NULL; +static void* g_seg_start = NULL; +static a64_label g_label_pool[LABEL_POOL_SIZE]; +static size_t g_label_count = 0; + +static a64_label* label_new(void) +{ + a64_label* p; + if(g_label_count >= LABEL_POOL_SIZE) return NULL; + p = &g_label_pool[g_label_count++]; + a64_label_reset(p); + return p; +} +static void label_bind(a64_label* lbl) { a64_label_bind(g_cg, lbl); } +static void labels_reset(void) +{ + memset(g_label_pool, 0, sizeof(g_label_pool)); + g_label_count = 0; +} + +/* AND Wd, Wn, #imm with a MOV+AND reg fallback when imm isn't encodable + * as an AArch64 logical immediate. */ +static void emit_and_w_imm_safe(unsigned wd, unsigned wn, uint32_t imm, unsigned scratch) +{ + if(!a64_and_w_imm(g_cg, wd, wn, imm)) + { + a64_mov_w_imm(g_cg, scratch, imm); + a64_and_w_reg(g_cg, wd, wn, scratch); + } +} + +/* ADD Xd, Xn, #imm with a MOV+ADD reg fallback when `imm` doesn't fit + * the AddSubImm encoding (i12 with optional shift-by-12). Defensive + * coverage for DSPS struct offsets -- if a future layout change pushes + * O_PRAM or O_DRAM out of the direct/shifted range, the fallback path + * keeps the JIT compiling instead of asserting. Requires xd != xn: + * the mov_w_imm staging writes the immediate into xd's low 32 bits + * (zero-extended), then add_x_reg reads xn unmolested. */ +static void emit_add_x_imm_safe(unsigned xd, unsigned xn, uint32_t imm) +{ + if(!a64_try_add_x_imm(g_cg, xd, xn, imm)) + { + a64_mov_w_imm(g_cg, xd, imm); + a64_add_x_reg(g_cg, xd, xn, xd); + } +} + +/* --- Stubs / globals --------------------------------------------- */ + +static const void* g_exit_stub_addr = NULL; +static size_t g_post_stub_byte_offset = 0; + +/* + * Looped-slot JIT cache. LPS dispatches the same instruction up to 4096 + * times; without a cache the loop body would run via the templated C + * handler. Keyed by pc, validated by the cached instr (so a PRAM write + * that swaps the body forces lazy recompile on the next LPS). Cleared + * by rewind_locked() because the bump-allocator invalidates every prior + * pointer in the segment. + */ +typedef struct { + void (*entry)(struct DSPS*); + uint32_t instr; +} LoopedSlot; +static LoopedSlot g_looped_cache[256]; + +/* --- Perf jitdump symbol-kind decoder ---------------------------- */ + +#ifdef WANT_DSP_JIT_PERF_DUMP +/* + * The actual writev path lives in jitdump.cpp (shared with the SCSP + * DSP JIT). This helper only decodes the opcode top-nibble into a + * short string so each slot shows up in `perf report` as + * dsp__pc_. Non-PERF_DUMP builds + * collapse this to "". + */ +static const char* jitdump_kind_str(uint32_t instr) +{ + const unsigned top = (instr >> 28) & 0xFu; + if(top <= 0x3u) return "gen"; + if(top >= 0x8u && top <= 0xBu) return "mvi"; + if(top == 0xCu) return "dma"; + if(top == 0xDu) return "jmp"; + if(top >= 0xEu) return "msc"; + return "unk"; +} +#else /* !WANT_DSP_JIT_PERF_DUMP */ +static inline const char* jitdump_kind_str(uint32_t i) { (void)i; return ""; } +#endif + +/* --- Local helpers ----------------------------------------------- */ + +/* + * Pick the templated C handler that DSP_DecodeInstruction would have + * returned, mirroring the opcode-kind switch in scu_dsp_common.inc. + */ +static void (*pick_c_handler(bool looped, uint32_t instr))(struct DSPS*) +{ + const unsigned li = looped ? 1u : 0u; + const unsigned top = (instr >> 28) & 0xF; + + switch(top) + { + case 0x0: case 0x1: case 0x2: case 0x3: + return DSP_GenFuncTable[li][(instr >> 26) & 0xF][(instr >> 23) & 0x7][(instr >> 17) & 0x7][(instr >> 12) & 0x3]; + + case 0x8: case 0x9: case 0xA: case 0xB: + return DSP_MVIFuncTable[li][(instr >> 26) & 0xF][(instr >> 19) & 0x7F]; + + case 0xC: + return DSP_DMAFuncTable[li][(instr >> 12) & 0x7][(instr >> 8) & 0x7]; + + case 0xD: + return DSP_JMPFuncTable[li][(instr >> 19) & 0x7F]; + + case 0xE: case 0xF: + return DSP_MiscFuncTable[li][(instr >> 27) & 0x3]; + + default: + return DSP_GenFuncTable[li][0][0][0][0]; + } +} + +static void rewind_locked(void) +{ + unsigned i; + if(!g_cg) return; + a64_codegen_set_wptr(g_cg, (char*)g_seg_start + g_post_stub_byte_offset); + labels_reset(); + for(i = 0; i < 256; ++i) g_looped_cache[i].entry = NULL; + + /* The rewind invalidated every JIT pointer cached in + * DSP.ProgRAM[].low32 / DSP.NextInstr.low32 -- those entries now alias + * bytes about to be overwritten by the next compiles. PRAM-write + * callers only refresh the slot(s) they touched, so re-decode every + * slot here using the raw instr cached in its own high32. Bounded + * recursion: 256 slots * SCU_JIT_SLOT_MAX_BYTES (1 KB) = 256 KB into a + * 1 MB segment, so no inner overflow can re-trigger rewind_locked. */ + for(i = 0; i < 256; ++i) + { + const uint32_t instr = (uint32_t)(DSP.ProgRAM[i] >> 32); + DSP.ProgRAM[i] = DSP_DecodeSlotInstruction((uint8_t)i, instr, false); + } + { + const uint32_t instr = (uint32_t)(DSP.NextInstr >> 32); + DSP.NextInstr = DSP_DecodeSlotInstruction(0, instr, false); + } +} + +/* --- Instruction emitters ---------------------------------------- */ + +/* Compile-time aggregation: x_op, y_op and the d1 alt-source switch + * each contribute to dr_read/ct_inc. The final d switch consults the + * accumulated dr_read to skip duplicate DataRAM writes; d in C..F + * folds a byte-clear into ct_inc. */ +typedef struct { + uint32_t dr_read; + uint32_t ct_inc; +} GenMeta; + +static GenMeta compute_meta(unsigned x_op, unsigned y_op, unsigned d1_op, uint32_t instr) +{ + GenMeta m; + m.dr_read = 0u; + m.ct_inc = 0u; + + if(x_op >= 0x3) + { + const unsigned s = (instr >> 20) & 0x7; + const unsigned drw = s & 0x3; + m.dr_read |= 1u << drw; + if(s & 0x4) m.ct_inc |= 1u << (drw * 8); + } + if(y_op >= 0x3) + { + const unsigned s = (instr >> 14) & 0x7; + const unsigned drw = s & 0x3; + m.dr_read |= 1u << drw; + if(s & 0x4) m.ct_inc |= 1u << (drw * 8); + } + if(d1_op & 0x1) + { + const unsigned d = (instr >> 8) & 0xF; + if(d1_op & 0x2) + { + switch(instr & 0xF) + { + case 0x0: m.dr_read |= 0x01; break; + case 0x1: m.dr_read |= 0x02; break; + case 0x2: m.dr_read |= 0x04; break; + case 0x3: m.dr_read |= 0x08; break; + case 0x4: m.dr_read |= 0x01; if(d != 0) m.ct_inc |= 1u << 0; break; + case 0x5: m.dr_read |= 0x02; if(d != 1) m.ct_inc |= 1u << 8; break; + case 0x6: m.dr_read |= 0x04; if(d != 2) m.ct_inc |= 1u << 16; break; + case 0x7: m.dr_read |= 0x08; if(d != 3) m.ct_inc |= 1u << 24; break; + default: break; + } + } + switch(d) + { + case 0x0: if(!(m.dr_read & 0x01)) m.ct_inc |= 1u << 0; break; + case 0x1: if(!(m.dr_read & 0x02)) m.ct_inc |= 1u << 8; break; + case 0x2: if(!(m.dr_read & 0x04)) m.ct_inc |= 1u << 16; break; + case 0x3: if(!(m.dr_read & 0x08)) m.ct_inc |= 1u << 24; break; + case 0xC: m.ct_inc &= ~0x000000FFu; break; + case 0xD: m.ct_inc &= ~0x0000FF00u; break; + case 0xE: m.ct_inc &= ~0x00FF0000u; break; + case 0xF: m.ct_inc &= ~0xFF000000u; break; + default: break; + } + } + return m; +} + +static void emit_instr_pre(bool looped) +{ + if(!looped) + { + /* W27 pin = dsp->PC byte (zero-extended). */ + emit_add_x_imm_safe(X4, X0, O_PRAM); + a64_ldr_x_idx_lsl(g_cg, X5, X4, X27, 3u); + a64_str_x_imm(g_cg, X5, X0, O_NI); + a64_mov_w_reg(g_cg, W25, W5); + a64_add_w_imm(g_cg, W27, W27, 1u); + a64_and_w_imm(g_cg, W27, W27, 0xFFu); + a64_strb_w_imm(g_cg, W27, X0, O_PC); + } + else + { + a64_label* skip_load = label_new(); + a64_cbnz_w(g_cg, W20, skip_load); + emit_add_x_imm_safe(X5, X0, O_PRAM); + a64_ldr_x_idx_lsl(g_cg, X6, X5, X27, 3u); + a64_str_x_imm(g_cg, X6, X0, O_NI); + a64_mov_w_reg(g_cg, W25, W6); + a64_add_w_imm(g_cg, W27, W27, 1u); + a64_and_w_imm(g_cg, W27, W27, 0xFFu); + a64_strb_w_imm(g_cg, W27, X0, O_PC); + label_bind(skip_load); + a64_sub_w_imm(g_cg, W20, W20, 1u); + a64_and_w_imm(g_cg, W20, W20, 0xFFFu); + a64_strh_w_imm(g_cg, W20, X0, O_LOP); + } +} + +/* + * Set FlagS/FlagZ from the most recent flag-setting op. W24 is the + * pinned packed-flags register; byte 0 = FlagZ, byte 1 = FlagS. + */ +static void emit_store_sz(void) +{ + a64_cset_w(g_cg, W7, A64_COND_MI); a64_bfi_w(g_cg, W24, W7, 8, 1); + a64_cset_w(g_cg, W7, A64_COND_EQ); a64_bfi_w(g_cg, W24, W7, 0, 1); +} + +/* + * FlagV |= V from the most recent flag-setting op (byte 2 of W24). + */ +static void emit_or_flagv(void) +{ + a64_cset_w(g_cg, W7, A64_COND_VS); + a64_orr_w_reg_lsl(g_cg, W24, W24, W7, 16u); +} + +/* + * Emit the alu sub-block. X3 holds ALU.T on entry (loaded by caller). + */ +static void emit_alu_op(unsigned alu_op) +{ + switch(alu_op) + { + case 0x01: /* AND */ + a64_ands_w_reg(g_cg, W6, W3, W28); + a64_bfi_w(g_cg, W24, WZR, 24, 1); + emit_store_sz(); + a64_bfi_x(g_cg, X3, X6, 0, 32); + break; + + case 0x02: /* OR */ + a64_orr_w_reg(g_cg, W6, W3, W28); + a64_bfi_w(g_cg, W24, WZR, 24, 1); + a64_tst_w_reg(g_cg, W6, W6); + emit_store_sz(); + a64_bfi_x(g_cg, X3, X6, 0, 32); + break; + + case 0x03: /* XOR */ + a64_eor_w_reg(g_cg, W6, W3, W28); + a64_bfi_w(g_cg, W24, WZR, 24, 1); + a64_tst_w_reg(g_cg, W6, W6); + emit_store_sz(); + a64_bfi_x(g_cg, X3, X6, 0, 32); + break; + + case 0x04: /* ADD */ + a64_adds_w_reg(g_cg, W6, W3, W28); + a64_cset_w(g_cg, W7, A64_COND_CS); a64_bfi_w(g_cg, W24, W7, 24, 1); + emit_store_sz(); + emit_or_flagv(); + a64_bfi_x(g_cg, X3, X6, 0, 32); + break; + + case 0x05: /* SUB */ + a64_subs_w_reg(g_cg, W6, W3, W28); + a64_cset_w(g_cg, W7, A64_COND_CC); a64_bfi_w(g_cg, W24, W7, 24, 1); + emit_store_sz(); + emit_or_flagv(); + a64_bfi_x(g_cg, X3, X6, 0, 32); + break; + + case 0x06: /* AD2 (48-bit add of ALU.T low 48 + P.T low 48) */ + { + const uint64_t mask48 = 0x0000FFFFFFFFFFFFULL; + a64_mov_x_imm(g_cg, X10, mask48); + a64_and_x_reg(g_cg, X11, X3, X10); + a64_and_x_reg(g_cg, X12, X28, X10); + a64_add_x_reg(g_cg, X6, X11, X12); + + /* FlagV |= ((~(a^b)) & (a^tmp)) >> 47 & 1 */ + a64_eor_x_reg(g_cg, X7, X3, X28); + a64_eor_x_reg(g_cg, X8, X3, X6); + a64_bic_x_reg(g_cg, X9, X8, X7); + a64_lsr_x_imm(g_cg, X9, X9, 47); + a64_and_w_imm(g_cg, W9, W9, 0x1u); + a64_orr_w_reg_lsl(g_cg, W24, W24, W9, 16u); + + /* C = (tmp >> 48) & 1 */ + a64_lsr_x_imm(g_cg, X9, X6, 48); + a64_and_w_imm(g_cg, W9, W9, 0x1u); + a64_bfi_w(g_cg, W24, W9, 24, 1); + + /* CalcZS48: val = tmp << 16; FlagS = (int64)val < 0; FlagZ = !val */ + a64_lsl_x_imm(g_cg, X10, X6, 16); + a64_tst_x_reg(g_cg, X10, X10); + emit_store_sz(); + + /* ALU.T = tmp */ + a64_mov_x_reg(g_cg, X3, X6); + break; + } + + case 0x08: /* SR */ + a64_and_w_imm(g_cg, W7, W3, 0x1u); + a64_bfi_w(g_cg, W24, W7, 24, 1); + a64_asr_w_imm(g_cg, W6, W3, 1); + a64_tst_w_reg(g_cg, W6, W6); + emit_store_sz(); + a64_bfi_x(g_cg, X3, X6, 0, 32); + break; + + case 0x09: /* RR */ + a64_and_w_imm(g_cg, W7, W3, 0x1u); + a64_bfi_w(g_cg, W24, W7, 24, 1); + a64_ror_w_imm(g_cg, W6, W3, 1); + a64_tst_w_reg(g_cg, W6, W6); + emit_store_sz(); + a64_bfi_x(g_cg, X3, X6, 0, 32); + break; + + case 0x0A: /* SL */ + a64_lsr_w_imm(g_cg, W7, W3, 31); + a64_bfi_w(g_cg, W24, W7, 24, 1); + a64_lsl_w_imm(g_cg, W6, W3, 1); + a64_tst_w_reg(g_cg, W6, W6); + emit_store_sz(); + a64_bfi_x(g_cg, X3, X6, 0, 32); + break; + + case 0x0B: /* RL */ + a64_lsr_w_imm(g_cg, W7, W3, 31); + a64_bfi_w(g_cg, W24, W7, 24, 1); + a64_ror_w_imm(g_cg, W6, W3, 31); /* ROR by 31 == ROL by 1 */ + a64_tst_w_reg(g_cg, W6, W6); + emit_store_sz(); + a64_bfi_x(g_cg, X3, X6, 0, 32); + break; + + case 0x0F: /* RL8 */ + a64_ubfx_w(g_cg, W7, W3, 24, 1); + a64_bfi_w(g_cg, W24, W7, 24, 1); + a64_ror_w_imm(g_cg, W6, W3, 24); /* ROR by 24 == ROL by 8 */ + a64_tst_w_reg(g_cg, W6, W6); + emit_store_sz(); + a64_bfi_x(g_cg, X3, X6, 0, 32); + break; + + default: /* 0x00, 0x07, 0x0C..0x0E -> NOP */ + break; + } +} + +static void emit_x_op(unsigned x_op, uint32_t instr) +{ + if((x_op & 0x3) == 0x2) + { + /* MAC: P = (int64)(int32)RX * (int32)RY */ + a64_ldr_w_imm(g_cg, W7, X0, O_RX); + a64_ldr_w_imm(g_cg, W8, X0, O_RY); + a64_smull(g_cg, X9, W7, W8); + a64_str_x_imm(g_cg, X9, X0, O_P); + a64_mov_x_reg(g_cg, X28, X9); + } + + if(x_op >= 0x3) + { + const unsigned drw = ((instr >> 20) & 0x7) & 0x3; + a64_ubfx_w(g_cg, W4, W23, 8u * drw, 6u); + emit_add_x_imm_safe(X5, X0, O_DRAM + drw * 256u); + a64_ldr_w_idx_lsl(g_cg, W12, X5, X4, 2u); + + if((x_op & 0x3) == 0x3) + { + a64_sxtw(g_cg, X9, W12); + a64_str_x_imm(g_cg, X9, X0, O_P); + a64_mov_x_reg(g_cg, X28, X9); + } + if(x_op & 0x4) + { + a64_str_w_imm(g_cg, W12, X0, O_RX); + } + } +} + +static void emit_y_op(unsigned y_op, uint32_t instr) +{ + if((y_op & 0x3) == 0x1) + { + a64_str_x_imm(g_cg, XZR, X0, O_AC); + a64_mov_x_reg(g_cg, X26, XZR); + } + else if((y_op & 0x3) == 0x2) + { + a64_str_x_imm(g_cg, X3, X0, O_AC); + a64_mov_x_reg(g_cg, X26, X3); + } + + if(y_op >= 0x3) + { + const unsigned drw = ((instr >> 14) & 0x7) & 0x3; + a64_ubfx_w(g_cg, W4, W23, 8u * drw, 6u); + emit_add_x_imm_safe(X5, X0, O_DRAM + drw * 256u); + a64_ldr_w_idx_lsl(g_cg, W12, X5, X4, 2u); + + if((y_op & 0x3) == 0x3) + { + a64_sxtw(g_cg, X9, W12); + a64_str_x_imm(g_cg, X9, X0, O_AC); + a64_mov_x_reg(g_cg, X26, X9); + } + if(y_op & 0x4) + { + a64_str_w_imm(g_cg, W12, X0, O_RY); + } + } +} + +static void emit_d1_op(bool looped, unsigned d1_op, uint32_t instr, const GenMeta* meta) +{ + unsigned d; + int32_t imm; + unsigned alt_src; + + if(!(d1_op & 0x1)) return; + + d = (instr >> 8) & 0xF; + imm = (int32_t)(int8_t)(uint8_t)instr; + alt_src = instr & 0xF; + + /* Resolve src_data into W12. */ + if(d1_op & 0x2) + { + switch(alt_src) + { + case 0x0: case 0x1: case 0x2: case 0x3: + case 0x4: case 0x5: case 0x6: case 0x7: + { + const unsigned drw = alt_src & 0x3; + a64_ubfx_w(g_cg, W4, W23, 8u * drw, 6u); + emit_add_x_imm_safe(X5, X0, O_DRAM + drw * 256u); + a64_ldr_w_idx_lsl(g_cg, W12, X5, X4, 2u); + break; + } + case 0x9: + a64_mov_w_reg(g_cg, W12, W3); + break; + case 0xA: + a64_lsr_x_imm(g_cg, X12, X3, 16); + break; + default: /* 0x8, 0xB..0xF -> 0xFFFFFFFF */ + a64_mov_w_imm(g_cg, W12, 0xFFFFFFFFu); + break; + } + } + else + { + /* src = sign-extended (int8)imm into a 32-bit register. */ + a64_mov_w_imm(g_cg, W12, (uint32_t)imm); + } + + /* Apply src_data to destination. */ + switch(d) + { + case 0x0: case 0x1: case 0x2: case 0x3: + if(!(meta->dr_read & (1u << d))) + { + a64_ubfx_w(g_cg, W4, W23, 8u * d, 6u); + emit_add_x_imm_safe(X5, X0, O_DRAM + d * 256u); + a64_str_w_idx_lsl(g_cg, W12, X5, X4, 2u); + } + break; + + case 0x4: + a64_str_w_imm(g_cg, W12, X0, O_RX); + break; + + case 0x5: + a64_sxtw(g_cg, X7, W12); + a64_str_x_imm(g_cg, X7, X0, O_P); + a64_mov_x_reg(g_cg, X28, X7); + break; + + case 0x6: + a64_str_w_imm(g_cg, W12, X0, O_RAO); + break; + + case 0x7: + a64_str_w_imm(g_cg, W12, X0, O_WAO); + break; + + case 0x8: + case 0x9: + break; + + case 0xA: + { + a64_and_w_imm(g_cg, W7, W12, 0xFFFu); + if(!looped) + { + a64_strh_w_imm(g_cg, W7, X0, O_LOP); + a64_mov_w_reg(g_cg, W20, W7); + } + else + { + a64_label* skip = label_new(); + a64_cmp_w_imm(g_cg, W20, 0xFFFu); + a64_b_cond(g_cg, A64_COND_NE, skip); + a64_strh_w_imm(g_cg, W7, X0, O_LOP); + a64_mov_w_reg(g_cg, W20, W7); + label_bind(skip); + } + break; + } + + case 0xB: + a64_and_w_imm(g_cg, W7, W12, 0xFFu); + a64_strb_w_imm(g_cg, W7, X0, O_TOP); + break; + + case 0xC: case 0xD: case 0xE: case 0xF: + { + const int byte_idx = (int)d - 0xC; + a64_bfi_w(g_cg, W23, W12, 8u * (unsigned)byte_idx, 8u); + break; + } + + default: + break; + } +} + +static void emit_ct32_update(unsigned x_op, unsigned y_op, unsigned d1_op, uint32_t ct_inc) +{ + if(!(x_op >= 0x3 || y_op >= 0x3 || (d1_op & 0x1))) return; + + if(ct_inc != 0u) + { + a64_mov_w_imm(g_cg, W9, ct_inc); + a64_add_w_reg(g_cg, W23, W23, W9); + } + emit_and_w_imm_safe(W23, W23, 0x3F3F3F3Fu, W9); + a64_str_w_imm(g_cg, W23, X0, O_CT32); +} + +static void emit_tail_dispatch(void) +{ + a64_label* exit_lbl = label_new(); + + /* Flush packed-flags pin to memory. O_FZ is byte-aligned. */ + a64_stur_w(g_cg, W24, X0, (int)O_FZ); + + /* Decrement pinned CC (2 cycles per slot), flush, branch on <= 0. */ + a64_subs_w_imm(g_cg, W21, W21, 2u); + a64_str_w_imm(g_cg, W21, X0, O_CC); + a64_b_cond(g_cg, A64_COND_LE, exit_lbl); + + /* DSPS_IsRunning() = State > 0 (signed). */ + a64_cmp_w_imm(g_cg, W22, 0u); + a64_b_cond(g_cg, A64_COND_LE, exit_lbl); + + /* W25 = pinned NI.low32; SXTW to recover the signed offset. */ + a64_sxtw(g_cg, X16, W25); + a64_movp2r_pool(g_cg, X17, (const void*)&DSP_Init); + a64_add_x_reg(g_cg, X16, X17, X16); + /* Skip the next slot's 5-LDR prelude (20 bytes). */ + a64_add_x_imm(g_cg, X16, X16, 20u); + a64_br(g_cg, X16); + + label_bind(exit_lbl); + a64_b_addr(g_cg, g_exit_stub_addr); +} + +/* --- Slot preludes ------------------------------------------------ */ +static void emit_load_cc_pin (void) { a64_ldr_w_imm (g_cg, W21, X0, O_CC); } +static void emit_load_ct32_pin (void) { a64_ldr_w_imm (g_cg, W23, X0, O_CT32); } +static void emit_load_flags_pin(void) { a64_ldur_w (g_cg, W24, X0, (int)O_FZ); } +static void emit_load_pc_pin (void) { a64_ldrb_w_imm(g_cg, W27, X0, O_PC); } +static void emit_load_lop_pin (void) { a64_ldrh_w_imm(g_cg, W20, X0, O_LOP); } + +static void emit_gen(bool looped, uint32_t instr) +{ + const unsigned alu_op = (instr >> 26) & 0xF; + const unsigned x_op = (instr >> 23) & 0x7; + const unsigned y_op = (instr >> 17) & 0x7; + const unsigned d1_op = (instr >> 12) & 0x3; + const GenMeta meta = compute_meta(x_op, y_op, d1_op, instr); + + emit_load_cc_pin(); + emit_load_ct32_pin(); + emit_load_flags_pin(); + emit_load_pc_pin(); + emit_load_lop_pin(); + emit_instr_pre(looped); + + /* X3 = ALU.T (mutated in place by alu_op). */ + a64_mov_x_reg(g_cg, X3, X26); + emit_alu_op(alu_op); + + emit_x_op(x_op, instr); + emit_y_op(y_op, instr); + emit_d1_op(looped, d1_op, instr, &meta); + emit_ct32_update(x_op, y_op, d1_op, meta.ct_inc); + emit_tail_dispatch(); +} + +static bool is_general_instr(uint32_t instr) { return ((instr >> 28) & 0xF) <= 0x3; } +static bool is_mvi_instr(uint32_t instr) +{ + const unsigned top = (instr >> 28) & 0xF; + return top >= 0x8 && top <= 0xB; +} +static bool is_jmp_instr(uint32_t instr) { return ((instr >> 28) & 0xF) == 0xD; } +static bool is_misc_instr(uint32_t instr) +{ + const unsigned top = (instr >> 28) & 0xF; + return top == 0xE || top == 0xF; +} + +/* --- Helpers BLR'd from JIT slots --------------------------------- */ +static void lps_helper(struct DSPS* dsp) +{ + const uint32_t instr = dsp->NextInstr >> 32; + const uint8_t pc = (dsp->PC - 1) & 0xFFu; + LoopedSlot* cached = &g_looped_cache[pc]; + + if(MDFN_UNLIKELY(!cached->entry || cached->instr != instr)) + { + void (*entry)(struct DSPS*); + + /* We were BLR'd from a live JIT slot, so LR points into the same + * segment the compile path may rewind+repack. Letting that happen + * here would overwrite our own return target -- crash on RET. When + * the segment is too tight to fit a fresh slot, fall back to the C + * handler for this LPS instance; a later PRAM-write triggers a safe + * rewind and the next LPS dispatch can JIT again. */ + if(a64_codegen_offset(g_cg) + SCU_JIT_SLOT_MAX_BYTES > SCU_JIT_CODE_SEGMENT_SIZE) + { + dsp->NextInstr = DSP_DecodeInstruction(instr, true); + return; + } + + entry = SCU_DSP_JIT_CompileSlot(pc, true, instr); + if(MDFN_UNLIKELY(!entry)) + { + dsp->NextInstr = DSP_DecodeInstruction(instr, true); + return; + } + cached->entry = entry; + cached->instr = instr; + } + + dsp->NextInstr = ((uint64_t)instr << 32) + | (uint32_t)((uintptr_t)cached->entry - DSP_INSTR_BASE_UIPT); +} + +static void misc_end_helper(struct DSPS* dsp, uint32_t is_endi) +{ + if(is_endi) + { + dsp->FlagEnd = true; + SCU_SetInt(SCU_INT_DSP, true); + } + + if(dsp->PRAMDMABufCount) + DSP_FinishPRAMDMA(); + else + { + dsp->State &= ~STATE_MASK_EXECUTE; + dsp->CycleCounter -= DSP_EndCCSubVal; + } +} + +/* + * Emit the DSP_TestCond chain. Branches to skip_label when the test + * fails. + */ +static void emit_test_cond(unsigned cond, a64_label* skip_label) +{ + if(!(cond & 0x40)) return; + + a64_mov_w_imm(g_cg, W7, 0u); + if(cond & 0x1) { a64_ubfx_w(g_cg, W8, W24, 0, 1); a64_orr_w_reg(g_cg, W7, W7, W8); } + if(cond & 0x2) { a64_ubfx_w(g_cg, W8, W24, 8, 1); a64_orr_w_reg(g_cg, W7, W7, W8); } + if(cond & 0x4) { a64_ubfx_w(g_cg, W8, W24, 24, 1); a64_orr_w_reg(g_cg, W7, W7, W8); } + if(cond & 0x8) + { + a64_ldr_w_imm(g_cg, W8, X0, O_T0_Until); + a64_cmp_w_reg(g_cg, W8, W21); + a64_cset_w(g_cg, W8, A64_COND_LT); + a64_orr_w_reg(g_cg, W7, W7, W8); + } + if(cond & 0x20) + a64_cbz_w(g_cg, W7, skip_label); + else + a64_cbnz_w(g_cg, W7, skip_label); +} + +/* + * Emit BLR to a C helper, preserving x0 and the link register across + * the call via the stack. Caller is responsible for setting up arg + * regs before calling this routine. + */ +static void emit_call_helper_addr(const void* helper_addr) +{ + a64_stp_x_pre(g_cg, X0, X30, -16); + a64_movp2r_pool(g_cg, X16, helper_addr); + a64_blr(g_cg, X16); + a64_ldp_x_post(g_cg, X0, X30, 16); + /* Re-sync pinned regs that helpers may have mutated in memory. */ + a64_ldr_w_imm(g_cg, W22, X0, O_State); + a64_ldr_w_imm(g_cg, W21, X0, O_CC); + a64_ldr_w_imm(g_cg, W23, X0, O_CT32); + a64_ldr_w_imm(g_cg, W25, X0, O_NI); + a64_ldrb_w_imm(g_cg, W27, X0, O_PC); +} + +static void emit_mvi(bool looped, uint32_t instr) +{ + const unsigned dest = (instr >> 26) & 0xF; + const unsigned cond = (instr >> 19) & 0x7F; + const int32_t imm = (cond & 0x40) + ? sign_x_to_s32(19, instr) + : sign_x_to_s32(25, instr); + a64_label* skip; + + emit_load_cc_pin(); + emit_load_ct32_pin(); + emit_load_flags_pin(); + emit_load_pc_pin(); + emit_load_lop_pin(); + emit_instr_pre(looped); + + skip = label_new(); + emit_test_cond(cond, skip); + + if(dest == 0x6 || dest == 0x7) + { + a64_label* nodma = label_new(); + a64_ldr_w_imm(g_cg, W8, X0, O_PRAMDMACt); + a64_cbz_w(g_cg, W8, nodma); + a64_sub_w_imm(g_cg, W9, W27, 1u); + a64_strb_w_imm(g_cg, W9, X0, O_PC); + emit_call_helper_addr((const void*)&DSP_FinishPRAMDMA); + label_bind(nodma); + } + + switch(dest) + { + case 0x0: case 0x1: case 0x2: case 0x3: + a64_ubfx_w(g_cg, W4, W23, 8u * dest, 6u); + a64_mov_w_imm(g_cg, W12, (uint32_t)imm); + emit_add_x_imm_safe(X5, X0, O_DRAM + dest * 256u); + a64_str_w_idx_lsl(g_cg, W12, X5, X4, 2u); + a64_add_w_imm(g_cg, W4, W4, 1u); + a64_and_w_imm(g_cg, W4, W4, 0x3Fu); + a64_bfi_w(g_cg, W23, W4, 8u * dest, 8u); + a64_str_w_imm(g_cg, W23, X0, O_CT32); + break; + + case 0x4: + a64_mov_w_imm(g_cg, W12, (uint32_t)imm); + a64_str_w_imm(g_cg, W12, X0, O_RX); + break; + + case 0x5: + /* P.T = (int64)(int32)imm -- sign-extended into 64-bit slot. */ + a64_mov_x_imm(g_cg, X12, (uint64_t)(int64_t)imm); + a64_str_x_imm(g_cg, X12, X0, O_P); + a64_mov_x_reg(g_cg, X28, X12); + break; + + case 0x6: + a64_mov_w_imm(g_cg, W12, (uint32_t)imm); + a64_str_w_imm(g_cg, W12, X0, O_RAO); + break; + + case 0x7: + a64_mov_w_imm(g_cg, W12, (uint32_t)imm); + a64_str_w_imm(g_cg, W12, X0, O_WAO); + break; + + case 0xA: + { + const uint32_t lop_val = (uint32_t)imm & 0xFFFu; + if(!looped) + { + a64_mov_w_imm(g_cg, W12, lop_val); + a64_strh_w_imm(g_cg, W12, X0, O_LOP); + a64_mov_w_reg(g_cg, W20, W12); + } + else + { + a64_label* sk = label_new(); + a64_cmp_w_imm(g_cg, W20, 0xFFFu); + a64_b_cond(g_cg, A64_COND_NE, sk); + a64_mov_w_imm(g_cg, W12, lop_val); + a64_strh_w_imm(g_cg, W12, X0, O_LOP); + a64_mov_w_reg(g_cg, W20, W12); + label_bind(sk); + } + break; + } + + case 0xC: + { + a64_label* nodma2 = label_new(); + a64_sub_w_imm(g_cg, W8, W27, 1u); + a64_strb_w_imm(g_cg, W8, X0, O_TOP); + a64_mov_w_imm(g_cg, W9, (uint32_t)imm & 0xFFu); + a64_strb_w_imm(g_cg, W9, X0, O_PC); + a64_mov_w_reg(g_cg, W27, W9); + a64_ldr_w_imm(g_cg, W10, X0, O_PRAMDMACt); + a64_cbz_w(g_cg, W10, nodma2); + emit_call_helper_addr((const void*)&DSP_FinishPRAMDMA); + label_bind(nodma2); + break; + } + + default: + /* dest = 0x8, 0x9, 0xB, 0xD, 0xE, 0xF -> no commit */ + break; + } + + label_bind(skip); + emit_tail_dispatch(); +} + +static void emit_jmp(bool looped, uint32_t instr) +{ + const unsigned cond = (instr >> 19) & 0x7F; + const uint8_t target = (uint8_t)instr; + a64_label* skip; + + emit_load_cc_pin(); + emit_load_ct32_pin(); + emit_load_flags_pin(); + emit_load_pc_pin(); + emit_load_lop_pin(); + emit_instr_pre(looped); + + skip = label_new(); + emit_test_cond(cond, skip); + + a64_mov_w_imm(g_cg, W12, (uint32_t)target); + a64_strb_w_imm(g_cg, W12, X0, O_PC); + a64_mov_w_reg(g_cg, W27, W12); + + label_bind(skip); + emit_tail_dispatch(); +} + +static void emit_misc(bool looped, uint32_t instr) +{ + const unsigned op = (instr >> 27) & 0x3; + + emit_load_cc_pin(); + emit_load_ct32_pin(); + emit_load_flags_pin(); + emit_load_pc_pin(); + emit_load_lop_pin(); + emit_instr_pre(looped); + + if(op == 2 || op == 3) /* END / ENDI */ + { + a64_mov_w_imm(g_cg, W1, (uint32_t)(op & 0x1)); + emit_call_helper_addr((const void*)&misc_end_helper); + } + else if(op == 0) /* BTM */ + { + a64_label* skip = label_new(); + a64_cbz_w(g_cg, W20, skip); + a64_ldrb_w_imm(g_cg, W5, X0, O_TOP); + a64_strb_w_imm(g_cg, W5, X0, O_PC); + a64_mov_w_reg(g_cg, W27, W5); + label_bind(skip); + a64_sub_w_imm(g_cg, W20, W20, 1u); + a64_and_w_imm(g_cg, W20, W20, 0xFFFu); + a64_strh_w_imm(g_cg, W20, X0, O_LOP); + } + else if(op == 1) /* LPS */ + { + emit_call_helper_addr((const void*)&lps_helper); + } + + emit_tail_dispatch(); +} + +/* --- Entry / exit stubs ------------------------------------------ */ + +/* + * Entry stub: called from SCU_UpdateDSP with x0 = DSPS*. Sets up an + * AAPCS-conformant frame, loads pinned State/AC.T/P.T/NI.low32, then + * BLR's to the first handler. + */ +static void emit_entry_stub(void) +{ + a64_stp_x_pre(g_cg, X29, X30, -96); + a64_stp_x_off(g_cg, X19, X20, SP_REG, 16); + a64_stp_x_off(g_cg, X21, X22, SP_REG, 32); + a64_stp_x_off(g_cg, X25, X26, SP_REG, 48); + a64_stp_x_off(g_cg, X23, X24, SP_REG, 64); + a64_stp_x_off(g_cg, X27, X28, SP_REG, 80); + + a64_ldr_w_imm(g_cg, W22, X0, O_State); + a64_ldr_x_imm(g_cg, X26, X0, O_AC); + a64_ldr_x_imm(g_cg, X28, X0, O_P); + + a64_ldrsw_x_imm(g_cg, X16, X0, O_NI); + a64_movp2r_pool(g_cg, X17, (const void*)&DSP_Init); + a64_add_x_reg(g_cg, X16, X17, X16); + a64_ldr_w_imm(g_cg, W25, X0, O_NI); + a64_blr(g_cg, X16); + + a64_ldp_x_off(g_cg, X27, X28, SP_REG, 80); + a64_ldp_x_off(g_cg, X23, X24, SP_REG, 64); + a64_ldp_x_off(g_cg, X25, X26, SP_REG, 48); + a64_ldp_x_off(g_cg, X21, X22, SP_REG, 32); + a64_ldp_x_off(g_cg, X19, X20, SP_REG, 16); + a64_ldp_x_post(g_cg, X29, X30, 96); + a64_ret(g_cg); +} + +static void emit_exit_stub(void) +{ + a64_ret(g_cg); +} + +/* --- Public API --------------------------------------------------- */ + +void SCU_DSP_JIT_Init(void) +{ + void* stubs_start; + void* entry_addr; + void* exit_addr; + void* post_stub_ptr; + + if(!g_cg) + { + g_cg = a64_codegen_create(SCU_JIT_CODE_SEGMENT_SIZE); + if(!g_cg) return; + g_seg_start = a64_codegen_wptr(g_cg); + + stubs_start = a64_codegen_wptr(g_cg); + + entry_addr = a64_codegen_wptr(g_cg); + emit_entry_stub(); + SCU_DSP_JIT_Entry = (void (*)(struct DSPS*))entry_addr; + + exit_addr = a64_codegen_wptr(g_cg); + emit_exit_stub(); + g_exit_stub_addr = exit_addr; + + /* Resolve the entry stub's pooled DSP_Init pointer (and any other + * stub-time pool refs). The pool data lives past the exit stub's + * RET, so it's unreachable -- but it sits below g_post_stub_byte_offset + * so rewind_locked() won't trample it. */ + a64_pool_flush(g_cg); + + post_stub_ptr = a64_codegen_wptr(g_cg); + g_post_stub_byte_offset = (size_t)((uintptr_t)post_stub_ptr - (uintptr_t)stubs_start); + a64_codegen_invalidate(g_cg, stubs_start, + (size_t)((uintptr_t)post_stub_ptr - (uintptr_t)stubs_start)); + + SS_JitDump_Open(); + SS_JitDump_Emit("dsp_entry_stub", entry_addr, + (size_t)((uintptr_t)exit_addr - (uintptr_t)entry_addr)); + SS_JitDump_Emit("dsp_exit_stub", exit_addr, + (size_t)((uintptr_t)post_stub_ptr - (uintptr_t)exit_addr)); + } + rewind_locked(); +} + +void SCU_DSP_JIT_Reset(void) +{ + if(!g_cg) + SCU_DSP_JIT_Init(); + else + rewind_locked(); +} + +void (*SCU_DSP_JIT_CompileSlot(uint8_t pc, bool looped, uint32_t instr))(struct DSPS*) +{ + void* start; + void* end; + typedef void (*EmitFn)(bool, uint32_t); + EmitFn emit_inline = NULL; + +#ifndef WANT_DSP_JIT_PERF_DUMP + (void)pc; +#endif + + if(!g_cg) + SCU_DSP_JIT_Init(); + if(!g_cg) + return NULL; + + if(a64_codegen_offset(g_cg) + SCU_JIT_SLOT_MAX_BYTES > SCU_JIT_CODE_SEGMENT_SIZE) + rewind_locked(); + + start = a64_codegen_wptr(g_cg); + + if(is_general_instr(instr)) emit_inline = &emit_gen; + else if(is_mvi_instr(instr)) emit_inline = &emit_mvi; + else if(is_jmp_instr(instr)) emit_inline = &emit_jmp; + else if(is_misc_instr(instr)) emit_inline = &emit_misc; + + if(emit_inline) + { + emit_inline(looped, instr); + } + else + { + /* DMA: tail-jump straight to the templated C handler. 5 leading + * NOPs are the skip-safe prelude (matches the 5-LDR slot prelude; + * JIT tail_dispatch BR's to target+20). */ + void (* const c_handler)(struct DSPS*) = pick_c_handler(looped, instr); + a64_nop(g_cg); + a64_nop(g_cg); + a64_nop(g_cg); + a64_nop(g_cg); + a64_nop(g_cg); + a64_movp2r_pool(g_cg, X16, (const void*)c_handler); + a64_br(g_cg, X16); + } + + /* Drain pool refs queued by this slot. Every code path above ends in + * an unconditional terminator (B/BR/RET via emit_tail_dispatch's exit + * branch, or the DMA fallback's BR X16), so the pool data emitted + * here is unreachable. */ + a64_pool_flush(g_cg); + + end = a64_codegen_wptr(g_cg); + a64_codegen_invalidate(g_cg, start, + (size_t)((uintptr_t)end - (uintptr_t)start)); + +#ifdef WANT_DSP_JIT_PERF_DUMP + { + char nm[40]; + snprintf(nm, sizeof(nm), "dsp_%c_pc%02x_%s", + looped ? 'l' : 'n', (unsigned)pc, + jitdump_kind_str(instr)); + SS_JitDump_Emit(nm, start, (size_t)((uintptr_t)end - (uintptr_t)start)); + } +#endif + + /* Labels are scoped to one Compile -- reset for the next call. */ + labels_reset(); + + return (void (*)(struct DSPS*))start; +} + +#else /* non-aarch64: stub everything */ + +void SCU_DSP_JIT_Init(void) {} +void SCU_DSP_JIT_Reset(void) {} + +void (*SCU_DSP_JIT_CompileSlot(uint8_t pc, bool looped, uint32_t instr))(struct DSPS*) +{ + (void)pc; (void)looped; (void)instr; + return NULL; +} + +#endif diff --git a/mednafen/ss/scu_dsp_jit.h b/mednafen/ss/scu_dsp_jit.h new file mode 100644 index 00000000..687096c5 --- /dev/null +++ b/mednafen/ss/scu_dsp_jit.h @@ -0,0 +1,38 @@ +/******************************************************************************/ +/* Mednafen Sega Saturn Emulation Module */ +/******************************************************************************/ +/* scu_dsp_jit.h - SCU DSP JIT (aarch64 backend) public interface +** Copyright (C) 2026 pstef +*/ + +#ifndef __MDFN_SS_SCU_DSP_JIT_H +#define __MDFN_SS_SCU_DSP_JIT_H + +#include +#ifndef __cplusplus +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct DSPS; + +void SCU_DSP_JIT_Init(void); +void SCU_DSP_JIT_Reset(void); + +/* Entry stub: sets up the callee-saved frame, loads pinned regs from + * DSPS, BLRs the handler at dsp->NextInstr.low32, then flushes pinned + * regs back on return. NULL on non-aarch64 builds or before init. */ +extern void (*SCU_DSP_JIT_Entry)(struct DSPS*); + +/* Returns NULL when JIT is not available; the caller must then use + * the templated handler returned by DSP_DecodeInstruction. */ +void (*SCU_DSP_JIT_CompileSlot(uint8_t pc, bool looped, uint32_t instr))(struct DSPS*); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/mednafen/ss/scu_dsp_jmp.c b/mednafen/ss/scu_dsp_jmp.c index 206094ef..139d3b99 100644 --- a/mednafen/ss/scu_dsp_jmp.c +++ b/mednafen/ss/scu_dsp_jmp.c @@ -135,7 +135,7 @@ DJI_FOR_EACH_COND(DEFINE_JMPInstr, 1) #undef DEFINE_JMPInstr #undef DJI_FOR_EACH_COND -MDFN_HIDE extern void (*const DSP_JMPFuncTable[2][128])(struct DSPS*) = +MDFN_HIDE void (*const DSP_JMPFuncTable[2][128])(struct DSPS*) = { #include "scu_dsp_jmptab.inc" }; diff --git a/mednafen/ss/scu_dsp_misc.c b/mednafen/ss/scu_dsp_misc.c index 52e67f4f..ce4670a4 100644 --- a/mednafen/ss/scu_dsp_misc.c +++ b/mednafen/ss/scu_dsp_misc.c @@ -84,7 +84,7 @@ DMI_FOR_EACH_OP(DEFINE_MiscInstr, 1) #undef DEFINE_MiscInstr #undef DMI_FOR_EACH_OP -MDFN_HIDE extern void (*const DSP_MiscFuncTable[2][4])(struct DSPS*) = +MDFN_HIDE void (*const DSP_MiscFuncTable[2][4])(struct DSPS*) = { #include "scu_dsp_misctab.inc" }; diff --git a/mednafen/ss/scu_dsp_mvi.c b/mednafen/ss/scu_dsp_mvi.c index 47bcf28e..410cc3c6 100644 --- a/mednafen/ss/scu_dsp_mvi.c +++ b/mednafen/ss/scu_dsp_mvi.c @@ -193,7 +193,7 @@ DMVI_FOR_EACH_DEST(DEFINE_MVIInstr, 1) #undef DMVI_FOR_EACH_DEST #undef DMVI_FOR_EACH_COND -MDFN_HIDE extern void (*const DSP_MVIFuncTable[2][16][128])(struct DSPS*) = +MDFN_HIDE void (*const DSP_MVIFuncTable[2][16][128])(struct DSPS*) = { #include "scu_dsp_mvitab.inc" }; diff --git a/mednafen/ss/sh7095.h b/mednafen/ss/sh7095.h index 22809a64..3c453614 100644 --- a/mednafen/ss/sh7095.h +++ b/mednafen/ss/sh7095.h @@ -22,7 +22,7 @@ #ifndef __MDFN_SH7095_H #define __MDFN_SH7095_H -#include +#include "../state.h" /* Phase-9b: class -> struct. See Phase-9a comment in scsp.h * for rationale. The `final` keyword is preserved (allowed on @@ -338,21 +338,20 @@ struct SH7095 * The 8 concrete callers (DMA paths) become direct named calls; * the 2 T-parametric callers (ExtBusRead_INLINE / ExtBusWrite_INLINE * bodies) become a sizeof(T) ladder that folds when those - * outer templates instantiate. */ - void INLINE BSC_BusWrite_u8 (uint32_t A, uint8_t V, const bool BurstHax, int32_t* SH2DMAHax); - void INLINE BSC_BusWrite_u16(uint32_t A, uint16_t V, const bool BurstHax, int32_t* SH2DMAHax); - void INLINE BSC_BusWrite_u32(uint32_t A, uint32_t V, const bool BurstHax, int32_t* SH2DMAHax); - - uint8_t INLINE BSC_BusRead_u8 (uint32_t A, const bool BurstHax, int32_t* SH2DMAHax); - uint16_t INLINE BSC_BusRead_u16(uint32_t A, const bool BurstHax, int32_t* SH2DMAHax); - uint32_t INLINE BSC_BusRead_u32(uint32_t A, const bool BurstHax, int32_t* SH2DMAHax); + * outer templates instantiate. + * + * Phase-9 follow-up: the six method-style forward decls that used + * to live here (`void INLINE BSC_BusWrite_u8(...)` etc.) declared + * class members that nothing ever defined -- the real free + * functions are static `SH7095_BSC_BusWrite_u8(SH7095* z, ...)` + * in sh7095.inc, called with explicit z at every site. Deleted. */ uint32_t UCRead_IF_Kludge; // // Exit/Resume stuff for slave CPU with icache emulation(RunSlaveUntil()) // - const void* ResumePoint; + uint16_t resume_id; SH7095_CacheEntry* Resume_cent; uint32_t Resume_instr; int Resume_way_match; @@ -507,11 +506,11 @@ struct SH7095 * `EmulateICache` as a local constexpr bool so the macro * expansions (FetchIF / DoID reference EmulateICache by name) * resolve cleanly without the previous template-parameter - * name lookup. */ - NO_INLINE void DoIDIF_NI_C0_I0(void) MDFN_HOT; - NO_INLINE void DoIDIF_NI_C0_I1(void) MDFN_HOT; - NO_INLINE void DoIDIF_NI_C1_I0(void) MDFN_HOT; - NO_INLINE void DoIDIF_NI_C1_I1(void) MDFN_HOT; + * name lookup. + * + * Phase-9 follow-up: dead member-style decls deleted (real fns + * are `SH7095_DoIDIF_NI_C0_I0(SH7095*)` and friends, defined in + * sh7095.inc and called with explicit z). */ /* Phase-8p: ExtBus*_INLINE retired into 12+6 named per-(SP, T, BH) * variants. See sh7095.inc body comments for source-fold @@ -525,10 +524,11 @@ struct SH7095 * stream the previous per-T template instantiations did. * The MemReadRT / MemWriteRT macro callsites dispatch by * sizeof(T) at template-instantiation time; the OnChipRegRead_NI - * forwarders (phase 8j) hard-code to the matching named variant. */ - NO_INLINE void OnChipRegWrite_u8 (uint32_t A, uint32_t V) MDFN_HOT; - NO_INLINE void OnChipRegWrite_u16(uint32_t A, uint32_t V) MDFN_HOT; - NO_INLINE void OnChipRegWrite_u32(uint32_t A, uint32_t V) MDFN_HOT; + * forwarders (phase 8j) hard-code to the matching named variant. + * + * Phase-9 follow-up: dead OnChipRegWrite_u{8,16,32} member-style + * decls deleted (real fns are `SH7095_OnChipRegWrite_u8(SH7095*, + * uint32_t, uint32_t)` and friends). */ // // @@ -544,7 +544,6 @@ struct SH7095 bool DM_Setting; uint32_t PC_IF, PC_ID; // Debug-related variables. const char* cpu_name; - const void*const* ResumeTableP[2]; }; /* Phase-9 step 4: SH7095 public API as free functions. */ diff --git a/mednafen/ss/sh7095.inc b/mednafen/ss/sh7095.inc index 8bd867a9..d68729f3 100644 --- a/mednafen/ss/sh7095.inc +++ b/mednafen/ss/sh7095.inc @@ -106,6 +106,7 @@ static void SH7095_WDT_StandbyReset(SH7095* z); static void SH7095_FRT_Reset(SH7095* z); static void SH7095_FRT_CheckOCR(SH7095* z); static void SH7095_FRT_ClockFRC(SH7095* z); +static INLINE void SH7095_FRT_TickFRC(SH7095* z, uint32_t N); static void SH7095_Cache_AssocPurge(SH7095* z, const uint32_t A); static int SH7095_Cache_FindWay(SH7095* z, SH7095_CacheEntry* const cent, const uint32_t ATM); static uint8_t SH7095_BSC_BusRead_u8 (SH7095* z, uint32_t A, const bool BurstHax, int32_t* SH2DMAHax); @@ -207,26 +208,13 @@ void SH7095_Construct(SH7095* z, const char* const name_arg, const unsigned even if(z == &CPU[1]) { - for(unsigned dm = 0; dm < 2; dm++) - { - /* Very fragile, be careful. */ - z->ResumePoint = NULL; - z->ResumeTableP[dm] = NULL; - z->timestamp = -1; - z->FRT_WDT_NextTS = 0x7FFFFFFF; - z->Pipe_ID = 0xFEU << 24; - - if(dm) - SH7095_RunSlaveUntil_Debug(z, 0); - else - SH7095_RunSlaveUntil(z, 0); - - assert(z->ResumePoint == NULL); - assert(z->ResumeTableP[dm] != NULL); - assert(z->timestamp == -1); - assert(z->FRT_WDT_NextTS == 0x7FFFFFFF); - assert(z->Pipe_ID == (0xFEU << 24)); - } + /* Slave CPU resume state: no runtime initialization needed any + * more. Phase-9 conversion replaced the GCC `&&Resume_NNNN` + * label-address table with a portable `switch (z->resume_id)` + * dispatch, so the one-time `SH7095_RunSlaveUntil(z, 0)` call + * with bound_timestamp=0 (whose only purpose was to walk into + * PSEUDO_DMABURST and populate z->ResumeTableP[]) is gone. */ + z->resume_id = 0; } SH7095_Init(z, false, false); @@ -374,7 +362,7 @@ void SH7095_Init(SH7095* z, const bool EmulateICache, const bool CacheBypassHack z->ExtHalt = false; z->ExtHaltDMA = false; - z->ResumePoint = NULL; + z->resume_id = 0; SH7095_TruePowerOn(z); } @@ -422,7 +410,7 @@ void SH7095_TruePowerOn(SH7095* z) // // // - z->ResumePoint = NULL; + z->resume_id = 0; z->Resume_cent = NULL; z->Resume_instr = 0; z->Resume_way_match = 0; @@ -478,7 +466,7 @@ void SH7095_TruePowerOn(SH7095* z) // // // - z->ResumePoint = NULL; + z->resume_id = 0; // // // @@ -914,6 +902,158 @@ static INLINE void SH7095_FRT_ClockFRC(SH7095* z) SH7095_FRT_CheckOCR(z); } +// +// Closed-form equivalent of calling FRT_ClockFRC() N times in a row. +// Produces the same end state for FRT.FRC, sticky OVF/OCFA/OCFB bits +// in FTCSR/FTCSRM, and at most one RecalcPendingIntPEX() invocation +// (the iterative version coalesces internally with no observer between +// calls, so collapsing them is observationally equivalent). +// +// Per-tick semantics, replicated here: +// FRC++ +// if FRC == 0: OVF bit (if newly set, RPI) +// if FRC == OCRA: if CCLRA (FTCSR&1): FRC = 0; OCFA bit +// if FRC == OCRB: OCFB bit // reads post-reset FRC +// +static INLINE void SH7095_FRT_TickFRC(SH7095* z, uint32_t N) +{ + if(!N) + return; + + const uint16_t OCRA = z->FRT.OCR[0]; + const uint16_t OCRB = z->FRT.OCR[1]; + const uint16_t FRC0 = z->FRT.FRC; + const bool cclra = (z->FRT.FTCSR & 0x0001) && (OCRA != 0); + + bool ovf_hit = false; + bool ocfa_hit = false; + bool ocfb_hit = false; + uint16_t FRC_end; + + if(!cclra) + { + // Free-running mod-0x10000 counter (OCRA does not reset FRC). + // OCRA==0 + CCLRA collapses here too: the reset would be a no-op, + // and OCFA still fires whenever FRC wraps to 0. + const uint32_t end_lin = (uint32_t)FRC0 + N; + FRC_end = (uint16_t)end_lin; + ovf_hit = (end_lin >> 16) != 0; + + // First i in [1, 0x10000] where (FRC0 + i) & 0xFFFF == target. + const uint32_t first_a = (uint32_t)(uint16_t)(OCRA - FRC0 - 1) + 1u; + const uint32_t first_b = (uint32_t)(uint16_t)(OCRB - FRC0 - 1) + 1u; + ocfa_hit = (N >= first_a); + ocfb_hit = (N >= first_b); + } + else + { + // CCLRA on, OCRA != 0. FRC oscillates [0..OCRA-1] with a momentary + // OCRA value at the reset tick (immediately cleared before the OCRB + // check). Pre-state may have FRC0 >= OCRA, in which case we first + // free-run up through 0xFFFF -> 0 (one OVF, possibly one OCRB hit + // along the way), then settle into the steady oscillation. + uint32_t remaining = N; + uint16_t frc = FRC0; + + if(frc >= OCRA) + { + const uint32_t ticks_to_wrap = 0x10000u - frc; + if(remaining < ticks_to_wrap) + { + FRC_end = (uint16_t)(frc + remaining); + // OCRB hit at i = OCRB - frc when OCRB > frc and within span. + if(OCRB > frc && (uint32_t)(OCRB - frc) <= remaining) + ocfb_hit = true; + goto apply; + } + ovf_hit = true; + // Along the way (i = OCRB - frc) and at the wrap (i = ticks_to_wrap, + // FRC = 0) we can pick up an OCRB match. OCRA cannot match in this + // span: OCRA != 0 and FRC only visits frc+1..0xFFFF, 0. + if(OCRB > frc || OCRB == 0) + ocfb_hit = true; + remaining -= ticks_to_wrap; + frc = 0; + } + + if(remaining > 0) + { + // Steady state: frc in [0, OCRA). First OCRA match at i = OCRA - frc. + const uint32_t to_first_ocra = (uint32_t)OCRA - frc; + if(remaining < to_first_ocra) + { + FRC_end = (uint16_t)(frc + remaining); + if(OCRB > frc && (uint32_t)(OCRB - frc) <= remaining) + ocfb_hit = true; + goto apply; + } + ocfa_hit = true; + // First partial run reaches OCRA, then resets to 0. Within ticks + // frc+1 .. OCRA-1, OCFB fires iff OCRB lies strictly between frc and + // OCRA. At the reset tick OCFB sees FRC=0, firing iff OCRB == 0. + if(OCRB > frc && OCRB < OCRA) + ocfb_hit = true; + if(OCRB == 0) + ocfb_hit = true; + remaining -= to_first_ocra; + // + // Now frc = 0; subsequent full periods of length OCRA each end with + // a reset. The first-partial OCRB check above started from FRC0, + // which may be > 0 and thus excludes some values OCRB could hit when + // a period starts from 0 -- so re-evaluate the full-period condition. + // + const uint32_t period = OCRA; + const uint32_t full_periods = remaining / period; + const uint32_t tail = remaining % period; + if(full_periods > 0) + { + if(OCRB == 0 || (OCRB > 0 && OCRB < OCRA)) + ocfb_hit = true; + } + if(tail > 0) + { + FRC_end = (uint16_t)tail; + if(OCRB > 0 && OCRB <= tail) + ocfb_hit = true; + } + else + { + FRC_end = 0; + } + } + else + { + FRC_end = frc; + } + } + +apply: + z->FRT.FRC = FRC_end; + + bool need_rpi = false; + if(ovf_hit && !(z->FRT.FTCSR & 0x02)) + { + z->FRT.FTCSR |= 0x02; + z->FRT.FTCSRM |= 0x02; + need_rpi = true; + } + if(ocfa_hit && !(z->FRT.FTCSR & 0x08)) + { + z->FRT.FTCSR |= 0x08; + z->FRT.FTCSRM |= 0x08; + need_rpi = true; + } + if(ocfb_hit && !(z->FRT.FTCSR & 0x04)) + { + z->FRT.FTCSR |= 0x04; + z->FRT.FTCSRM |= 0x04; + need_rpi = true; + } + if(need_rpi) + SH7095_RecalcPendingIntPEX(z); +} + + static const uint8_t wdt_cstab[8] = { 1, /**/ 6, 7, 8, 9, 10, /**/ 12, 13 }; // @@ -970,12 +1110,9 @@ static void SH7095_FRT_WDT_Update(SH7095* z) if((z->FRT.TCR & 0x3) != 0x3) // when == 3, count on rising edge of external clock(not handled here). { const uint32_t frt_clockshift = 3 + ((z->FRT.TCR & 0x3) << 1); // /8, /32, /128, count at falling edge - uint32_t divided_clocks = (z->FRT_WDT_ClockDivider >> frt_clockshift) - (PreAddCD >> frt_clockshift); + const uint32_t divided_clocks = (z->FRT_WDT_ClockDivider >> frt_clockshift) - (PreAddCD >> frt_clockshift); - while(divided_clocks-- > 0) - { - SH7095_FRT_ClockFRC(z); - } + SH7095_FRT_TickFRC(z, divided_clocks); } // z->WDT: @@ -3127,7 +3264,6 @@ static INLINE uint32_t SH7095_OnChipRegRead_INLINE_u32(SH7095* z, uint32_t A) else { const unsigned Am = (uint8_t)A; - const unsigned shift = ((((A & 1) ^ 1) << 3)); uint16_t ret = 0; if(Am < 0x20) @@ -3167,7 +3303,7 @@ static INLINE void SH7095_BSC_BusWrite_u8(SH7095* z, uint32_t A, uint8_t V, cons const uint32_t mask = 0xFFu << shift; DB = (DB & ~mask) | ((uint32_t)V << shift); - BusRW_DB_CS0_u8_W1(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS0_u8_W1(A, &DB, BurstHax, SH2DMAHax); } else if(A >= 0x06000000) // CS3; 32-bit { @@ -3175,7 +3311,7 @@ static INLINE void SH7095_BSC_BusWrite_u8(SH7095* z, uint32_t A, uint8_t V, cons const uint32_t mask = 0xFFu << shift; DB = (DB & ~mask) | ((uint32_t)V << shift); - BusRW_DB_CS3_u8_W1(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS3_u8_W1(A, &DB, BurstHax, SH2DMAHax); if(!BurstHax) { @@ -3197,7 +3333,7 @@ static INLINE void SH7095_BSC_BusWrite_u8(SH7095* z, uint32_t A, uint8_t V, cons const uint32_t mask = 0xFFu << shift; DB = (DB & ~mask) | ((uint32_t)V << shift); - BusRW_DB_CS12_u8_W1(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS12_u8_W1(A, &DB, BurstHax, SH2DMAHax); } SH7095_DB = DB; @@ -3224,7 +3360,7 @@ static INLINE void SH7095_BSC_BusWrite_u16(SH7095* z, uint32_t A, uint16_t V, co const uint32_t mask = 0xFFFFu << shift; DB = (DB & ~mask) | ((uint32_t)V << shift); - BusRW_DB_CS0_u16_W1(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS0_u16_W1(A, &DB, BurstHax, SH2DMAHax); } else if(A >= 0x06000000) // CS3; 32-bit { @@ -3232,7 +3368,7 @@ static INLINE void SH7095_BSC_BusWrite_u16(SH7095* z, uint32_t A, uint16_t V, co const uint32_t mask = 0xFFFFu << shift; DB = (DB & ~mask) | ((uint32_t)V << shift); - BusRW_DB_CS3_u16_W1(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS3_u16_W1(A, &DB, BurstHax, SH2DMAHax); if(!BurstHax) { @@ -3254,7 +3390,7 @@ static INLINE void SH7095_BSC_BusWrite_u16(SH7095* z, uint32_t A, uint16_t V, co const uint32_t mask = 0xFFFFu << shift; DB = (DB & ~mask) | ((uint32_t)V << shift); - BusRW_DB_CS12_u16_W1(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS12_u16_W1(A, &DB, BurstHax, SH2DMAHax); } SH7095_DB = DB; @@ -3281,10 +3417,10 @@ static INLINE void SH7095_BSC_BusWrite_u32(SH7095* z, uint32_t A, uint32_t V, co // SH7095_BusLock++; DB = (DB & 0xFFFF0000) | (V >> 16); - BusRW_DB_CS0_u16_W1(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS0_u16_W1(A, &DB, BurstHax, SH2DMAHax); DB = (DB & 0xFFFF0000) | (uint16_t)V; - BusRW_DB_CS0_u16_W1(A | 2, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS0_u16_W1(A | 2, &DB, BurstHax, SH2DMAHax); //if(!SH2DMAHax) // SH7095_BusLock--; @@ -3293,7 +3429,7 @@ static INLINE void SH7095_BSC_BusWrite_u32(SH7095* z, uint32_t A, uint32_t V, co { /* sizeof(uint32_t) == 4: shift = 0, mask = 0xFFFFFFFFu */ DB = V; - BusRW_DB_CS3_u32_W1(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS3_u32_W1(A, &DB, BurstHax, SH2DMAHax); if(!BurstHax) { @@ -3312,7 +3448,7 @@ static INLINE void SH7095_BSC_BusWrite_u32(SH7095* z, uint32_t A, uint32_t V, co else // CS1, CS2; 32-bit { DB = V; - BusRW_DB_CS12_u32_W1(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS12_u32_W1(A, &DB, BurstHax, SH2DMAHax); } SH7095_DB = DB; @@ -3338,12 +3474,12 @@ static INLINE uint8_t SH7095_BSC_BusRead_u8(SH7095* z, uint32_t A, const bool Bu if(A < 0x02000000) // CS0, configured as 16-bit { - BusRW_DB_CS0_u8_W0(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS0_u8_W0(A, &DB, BurstHax, SH2DMAHax); ret = DB >> (((A & 1) ^ 1) << 3); } else if(A >= 0x06000000) // CS3; 32-bit { - BusRW_DB_CS3_u8_W0(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS3_u8_W0(A, &DB, BurstHax, SH2DMAHax); ret = DB >> (((A & 3) ^ 3) << 3); if(!BurstHax) @@ -3361,7 +3497,7 @@ static INLINE uint8_t SH7095_BSC_BusRead_u8(SH7095* z, uint32_t A, const bool Bu } else // CS1, CS2; 32-bit { - BusRW_DB_CS12_u8_W0(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS12_u8_W0(A, &DB, BurstHax, SH2DMAHax); ret = DB >> (((A & 3) ^ 3) << 3); } @@ -3387,12 +3523,12 @@ static INLINE uint16_t SH7095_BSC_BusRead_u16(SH7095* z, uint32_t A, const bool if(A < 0x02000000) // CS0, configured as 16-bit { - BusRW_DB_CS0_u16_W0(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS0_u16_W0(A, &DB, BurstHax, SH2DMAHax); ret = DB >> ((A & 1) << 3); } else if(A >= 0x06000000) // CS3; 32-bit { - BusRW_DB_CS3_u16_W0(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS3_u16_W0(A, &DB, BurstHax, SH2DMAHax); ret = DB >> (((A & 3) ^ 2) << 3); if(!BurstHax) @@ -3410,7 +3546,7 @@ static INLINE uint16_t SH7095_BSC_BusRead_u16(SH7095* z, uint32_t A, const bool } else // CS1, CS2; 32-bit { - BusRW_DB_CS12_u16_W0(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS12_u16_W0(A, &DB, BurstHax, SH2DMAHax); ret = DB >> (((A & 3) ^ 2) << 3); } @@ -3440,10 +3576,10 @@ static INLINE uint32_t SH7095_BSC_BusRead_u32(SH7095* z, uint32_t A, const bool //if(!SH2DMAHax) // SH7095_BusLock++; - BusRW_DB_CS0_u16_W0(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS0_u16_W0(A, &DB, BurstHax, SH2DMAHax); ret = DB << 16; - BusRW_DB_CS0_u16_W0(A | 2, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS0_u16_W0(A | 2, &DB, BurstHax, SH2DMAHax); ret |= (uint16_t)DB; //if(!SH2DMAHax) @@ -3451,7 +3587,7 @@ static INLINE uint32_t SH7095_BSC_BusRead_u32(SH7095* z, uint32_t A, const bool } else if(A >= 0x06000000) // CS3; 32-bit { - BusRW_DB_CS3_u32_W0(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS3_u32_W0(A, &DB, BurstHax, SH2DMAHax); ret = DB; /* shift folds to 0 for sizeof(T)==4 */ if(!BurstHax) @@ -3469,7 +3605,7 @@ static INLINE uint32_t SH7095_BSC_BusRead_u32(SH7095* z, uint32_t A, const bool } else // CS1, CS2; 32-bit { - BusRW_DB_CS12_u32_W0(A, DB, BurstHax, SH2DMAHax); + BusRW_DB_CS12_u32_W0(A, &DB, BurstHax, SH2DMAHax); ret = DB; } @@ -4021,7 +4157,7 @@ static INLINE uint32_t cmov_eq_thing(const uint32_t reg_compval, const uint32_t static INLINE void SH7095_Cache_AssocPurge(SH7095* z, const uint32_t A) { const uint32_t ATM = A & (0x7FFFF << 10); - auto* cent = &z->Cache[(A >> 4) & 0x3F]; + SH7095_CacheEntry* cent = &z->Cache[(A >> 4) & 0x3F]; // Ignore two-way-mode bit in z->CCR here. cent->Tag[0] |= (ATM == cent->Tag[0]); // Set invalid bit to 1. @@ -4532,7 +4668,7 @@ NO_INLINE void SH7095_Reset(SH7095* z, bool power_on_reset, bool from_internal_w z->EPending = 0; SH7095_SetPEX(z, power_on_reset ? SH7095_PEX_POWERON : SH7095_PEX_RESET); z->Pipe_ID = z->EPending; - z->ResumePoint = NULL; + z->resume_id = 0; } static NO_INLINE void SH7095_INTC_Reset(SH7095* z) @@ -5022,25 +5158,25 @@ do { * the macro's `if(EmulateICache)` arm per-variant. */ static INLINE void SH7095_DoIDIF_INLINE_C0_I0(SH7095* z) { - constexpr bool EmulateICache = false; + const bool EmulateICache = false; DoIDIF_MACRO(false); } static INLINE void SH7095_DoIDIF_INLINE_C0_I1(SH7095* z) { - constexpr bool EmulateICache = false; + const bool EmulateICache = false; DoIDIF_MACRO(true); } static INLINE void SH7095_DoIDIF_INLINE_C1_I0(SH7095* z) { - constexpr bool EmulateICache = true; + const bool EmulateICache = true; DoIDIF_MACRO(false); } static INLINE void SH7095_DoIDIF_INLINE_C1_I1(SH7095* z) { - constexpr bool EmulateICache = true; + const bool EmulateICache = true; DoIDIF_MACRO(true); } @@ -5087,8 +5223,8 @@ static NO_INLINE MDFN_HOT void SH7095_DoIDIF_NI_C1_I1(SH7095* z) * previous template form. */ FORCE_INLINE void SH7095_Step_w0_C0(SH7095* z) { - constexpr unsigned which = 0; - constexpr bool EmulateICache = false; + const unsigned which = 0; + const bool EmulateICache = false; // // Ideally, we would place SPEPRecover: after the z->FRT event check, but doing // so causes gcc(multiple versions) to produce inconceivably awful code under certain conditions @@ -5115,8 +5251,8 @@ FORCE_INLINE void SH7095_Step_w0_C0(SH7095* z) FORCE_INLINE void SH7095_Step_w0_C1(SH7095* z) { - constexpr unsigned which = 0; - constexpr bool EmulateICache = true; + const unsigned which = 0; + const bool EmulateICache = true; SPEPRecover:; if(MDFN_UNLIKELY(z->timestamp >= z->FRT_WDT_NextTS)) @@ -5136,8 +5272,8 @@ FORCE_INLINE void SH7095_Step_w0_C1(SH7095* z) FORCE_INLINE void SH7095_Step_w1_C0(SH7095* z) { - constexpr unsigned which = 1; - constexpr bool EmulateICache = false; + const unsigned which = 1; + const bool EmulateICache = false; SPEPRecover:; if(MDFN_UNLIKELY(z->timestamp >= z->FRT_WDT_NextTS)) @@ -5214,7 +5350,7 @@ do { \ { \ if(z->timestamp >= bound_timestamp) \ { \ - z->ResumePoint = &&Resume_ ## n; \ + z->resume_id = (n); \ return; \ } \ Resume_ ## n:; \ @@ -5239,16 +5375,23 @@ do { \ #define CONST_VAR(T, n) T n; n /* RESUME_VAR(T, n) -- compile-time check that the declared type T * matches the actual type of `n` (which is z->Resume_ - * once the opexec_* macros have expanded). Two language paths: + * once the opexec_* macros have expanded). Three language paths: * * C++ -- std::is_same::value * (decltype is C++11, std::is_same lives in * which mednafen-types.h already pulls in for C++ TUs). * - * C -- __builtin_types_compatible_p(T, __typeof__(n)) + * C (GCC / clang) -- __builtin_types_compatible_p(T, __typeof__(n)) * (both are GCC extensions, available since gcc 2.x, also * accepted by clang -- no C11 / C23 dependency). * + * C (other, e.g. MSVC) -- sizeof(T) == sizeof(n) + * Partial fallback. Catches size mismatches (int8 vs int32) + * but not signedness or unrelated-types-with-same-size + * mismatches. Better than no check; preserves MSVC C89 + * buildability without dropping the assertion entirely on + * GCC / clang where the real check is available. + * * The condition expression is wrapped in an extra set of parens so * the comma inside `std::is_same` / inside * `__builtin_types_compatible_p(T, __typeof__(n))` is not seen by @@ -5258,8 +5401,10 @@ do { \ * is the macro's problem, not ours. */ #ifdef __cplusplus #define RESUME_VAR(T, n) MDFN_STATIC_ASSERT((std::is_same::value), "Resume variable type mismatch.") -#else +#elif defined(__GNUC__) || defined(__clang__) #define RESUME_VAR(T, n) MDFN_STATIC_ASSERT((__builtin_types_compatible_p(T, __typeof__(n))), "Resume variable type mismatch.") +#else + #define RESUME_VAR(T, n) MDFN_STATIC_ASSERT(sizeof(T) == sizeof(n), "Resume variable size mismatch (full type check unavailable on this compiler).") #endif #define SH7095_NEED_RESUME_TABLE_INIT 1 @@ -5474,20 +5619,26 @@ static void SH7095_StateAction_SlaveResume(SH7095* z, StateMem* sm, const unsign int32_t ResumePointI = -1; uint32_t Resume_cent_I = 0; + /* Savestate format compatibility: the persisted field is still + * `ResumePointI` (int32_t), holding the 0..511 index that the + * pre-conversion code used as a lookup into the now-deleted + * `ResumeTable[512]` array. The table mapped index N to label + * `&&Resume_(top - N)` where `top` is 5512 (non-debug) or 10512 + * (debug). Now that the resume mechanism stores an integer id + * directly (5001..5512 or 10001..10512), translate to/from the + * legacy index form on save/load: + * + * ResumePointI = top - resume_id (save) + * resume_id = top - ResumePointI (load) + * + * with -1 / 0 standing in for "no resume pending". Old saves + * load to the same instruction-handler resume point as before. */ + const uint16_t top = z->DM_Setting ? 10512 : 5512; + if(!load) { - if(z->ResumePoint) - { - for(uint32_t i = 0; i < 512; i++) - { - if(z->ResumeTableP[z->DM_Setting][i] == z->ResumePoint) - { - ResumePointI = i; - break; - } - } - assert(ResumePointI >= 0); - } + if(z->resume_id) + ResumePointI = (int32_t)top - (int32_t)z->resume_id; if(z->Resume_cent) { @@ -5535,13 +5686,9 @@ static void SH7095_StateAction_SlaveResume(SH7095* z, StateMem* sm, const unsign if(load) { - z->ResumePoint = NULL; - - if(ResumePointI >= 0) - { - z->ResumePoint = z->ResumeTableP[z->DM_Setting][ResumePointI & 511]; - assert(z->ResumePoint != NULL); - } + z->resume_id = (ResumePointI >= 0) + ? (uint16_t)((int32_t)top - (ResumePointI & 511)) + : 0; z->Resume_uint16_A &= ~1; z->Resume_uint32_A &= ~3; z->Resume_cent = &z->Cache[Resume_cent_I & 0x3F]; diff --git a/mednafen/ss/sh7095_ops.inc b/mednafen/ss/sh7095_ops.inc index 85029a25..4bf31588 100644 --- a/mednafen/ss/sh7095_ops.inc +++ b/mednafen/ss/sh7095_ops.inc @@ -2053,27 +2053,6 @@ switch(z->Pipe_ID >> 24) END_OP BEGIN_OP_DLYIDIF(PSEUDO_DMABURST) -#ifdef SH7095_NEED_RESUME_TABLE_INIT - if(MDFN_UNLIKELY(!z->ResumeTableP[DebugMode])) - { - MDFN_STATIC_ASSERT(__COUNTER__ == ((DebugMode ? 10000 : 5000) + 393), "Unexpected __COUNTER__"); - - static const void* const ResumeTable[512] = - { -#if SH7095_DEBUG_MODE - #include "sh7095s_ctable_dm.inc" -#else - #include "sh7095s_ctable.inc" -#endif - }; - //static_assert(__COUNTER__ <= 11025, "Unexpected __COUNTER__"); - - assert(z == &CPU[1]); - - z->ResumeTableP[DebugMode] = ResumeTable; - return; - } -#endif if(MDFN_LIKELY(SH7095_DMA_InBurst(z) || z->ExtHalt)) { z->timestamp += 7; @@ -2104,7 +2083,7 @@ z->PC += 2; SkipPCInc:; #ifdef SH7095_NEED_RESUME_TABLE_INIT -MDFN_STATIC_ASSERT(__COUNTER__ == ((DebugMode ? 10000 : 5000) + 393 + 512 + 1), "Unexpected __COUNTER__"); +MDFN_STATIC_ASSERT(__COUNTER__ == ((DebugMode ? 10000 : 5000) + 393), "Unexpected __COUNTER__"); #endif #undef PART_OP_NORMIDIF diff --git a/mednafen/ss/sh7095s_ctable.inc b/mednafen/ss/sh7095s_ctable.inc index df2bcdf4..da459654 100644 --- a/mednafen/ss/sh7095s_ctable.inc +++ b/mednafen/ss/sh7095s_ctable.inc @@ -1,1536 +1,401 @@ -#if __COUNTER__ >= 5514 - &&Resume_5512, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5511, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5510, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5509, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5508, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5507, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5506, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5505, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5504, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5503, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5502, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5501, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5500, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5499, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5498, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5497, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5496, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5495, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5494, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5493, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5492, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5491, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5490, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5489, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5488, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5487, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5486, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5485, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5484, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5483, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5482, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5481, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5480, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5479, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5478, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5477, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5476, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5475, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5474, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5473, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5472, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5471, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5470, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5469, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5468, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5467, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5466, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5465, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5464, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5463, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5462, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5461, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5460, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5459, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5458, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5457, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5456, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5455, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5454, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5453, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5452, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5451, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5450, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5449, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5448, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5447, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5446, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5445, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5444, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5443, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5442, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5441, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5440, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5439, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5438, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5437, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5436, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5435, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5434, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5433, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5432, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5431, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5430, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5429, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5428, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5427, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5426, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5425, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5424, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5423, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5422, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5421, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5420, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5419, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5418, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5417, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5416, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5415, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5414, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5413, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5412, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5411, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5410, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5409, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5408, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5407, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5406, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5405, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5404, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5403, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5402, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5401, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5400, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5399, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5398, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5397, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5396, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5395, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5394, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5393, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5392, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5391, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5390, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5389, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5388, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5387, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5386, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5385, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5384, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5383, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5382, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5381, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5380, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5379, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5378, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5377, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5376, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5375, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5374, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5373, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5372, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5371, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5370, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5369, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5368, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5367, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5366, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5365, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5364, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5363, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5362, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5361, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5360, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5359, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5358, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5357, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5356, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5355, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5354, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5353, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5352, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5351, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5350, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5349, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5348, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5347, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5346, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5345, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5344, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5343, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5342, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5341, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5340, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5339, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5338, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5337, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5336, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5335, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5334, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5333, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5332, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5331, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5330, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5329, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5328, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5327, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5326, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5325, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5324, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5323, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5322, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5321, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5320, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5319, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5318, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5317, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5316, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5315, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5314, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5313, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5312, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5311, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5310, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5309, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5308, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5307, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5306, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5305, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5304, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5303, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5302, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5301, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5300, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5299, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5298, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5297, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5296, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5295, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5294, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5293, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5292, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5291, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5290, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5289, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5288, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5287, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5286, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5285, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5284, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5283, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5282, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5281, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5280, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5279, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5278, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5277, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5276, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5275, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5274, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5273, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5272, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5271, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5270, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5269, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5268, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5267, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5266, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5265, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5264, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5263, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5262, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5261, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5260, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5259, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5258, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5257, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5256, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5255, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5254, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5253, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5252, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5251, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5250, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5249, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5248, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5247, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5246, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5245, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5244, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5243, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5242, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5241, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5240, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5239, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5238, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5237, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5236, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5235, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5234, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5233, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5232, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5231, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5230, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5229, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5228, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5227, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5226, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5225, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5224, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5223, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5222, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5221, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5220, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5219, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5218, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5217, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5216, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5215, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5214, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5213, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5212, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5211, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5210, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5209, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5208, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5207, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5206, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5205, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5204, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5203, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5202, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5201, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5200, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5199, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5198, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5197, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5196, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5195, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5194, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5193, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5192, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5191, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5190, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5189, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5188, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5187, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5186, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5185, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5184, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5183, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5182, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5181, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5180, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5179, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5178, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5177, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5176, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5175, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5174, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5173, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5172, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5171, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5170, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5169, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5168, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5167, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5166, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5165, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5164, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5163, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5162, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5161, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5160, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5159, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5158, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5157, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5156, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5155, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5154, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5153, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5152, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5151, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5150, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5149, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5148, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5147, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5146, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5145, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5144, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5143, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5142, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5141, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5140, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5139, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5138, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5137, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5136, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5135, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5134, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5133, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5132, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5131, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5130, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5129, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5128, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5127, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5126, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5125, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5124, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5123, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5122, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5121, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5120, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5119, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5118, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5117, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5116, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5115, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5114, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5113, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5112, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5111, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5110, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5109, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5108, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5107, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5106, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5105, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5104, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5103, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5102, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5101, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5100, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5099, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5098, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5097, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5096, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5095, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5094, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5093, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5092, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5091, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5090, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5089, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5088, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5087, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5086, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5085, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5084, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5083, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5082, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5081, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5080, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5079, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5078, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5077, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5076, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5075, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5074, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5073, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5072, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5071, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5070, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5069, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5068, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5067, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5066, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5065, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5064, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5063, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5062, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5061, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5060, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5059, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5058, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5057, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5056, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5055, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5054, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5053, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5052, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5051, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5050, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5049, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5048, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5047, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5046, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5045, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5044, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5043, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5042, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5041, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5040, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5039, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5038, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5037, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5036, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5035, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5034, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5033, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5032, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5031, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5030, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5029, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5028, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5027, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5026, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5025, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5024, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5023, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5022, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5021, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5020, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5019, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5018, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5017, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5016, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5015, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5014, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5013, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5012, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5011, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5010, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5009, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5008, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5007, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5006, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5005, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5004, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5003, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5002, -#endif -#if __COUNTER__ >= 5514 - &&Resume_5001, -#endif +/* Switch-case dispatch entries for the non-debug SH7095_RunSlaveUntil + * resume path. Numbered to match the __COUNTER__ values that + * CHECK_EXIT_RESUME() expansions assign as `Resume_NNNN:` labels. + * + * Range: 5001 .. 5392 (392 entries). Regenerate via + * notes/build_sh7095s_ctable.c + * + * Consumes ZERO __COUNTER__ values; the resume-id integers are + * compile-time constants in each `case` label. */ + case 5001: goto Resume_5001; + case 5002: goto Resume_5002; + case 5003: goto Resume_5003; + case 5004: goto Resume_5004; + case 5005: goto Resume_5005; + case 5006: goto Resume_5006; + case 5007: goto Resume_5007; + case 5008: goto Resume_5008; + case 5009: goto Resume_5009; + case 5010: goto Resume_5010; + case 5011: goto Resume_5011; + case 5012: goto Resume_5012; + case 5013: goto Resume_5013; + case 5014: goto Resume_5014; + case 5015: goto Resume_5015; + case 5016: goto Resume_5016; + case 5017: goto Resume_5017; + case 5018: goto Resume_5018; + case 5019: goto Resume_5019; + case 5020: goto Resume_5020; + case 5021: goto Resume_5021; + case 5022: goto Resume_5022; + case 5023: goto Resume_5023; + case 5024: goto Resume_5024; + case 5025: goto Resume_5025; + case 5026: goto Resume_5026; + case 5027: goto Resume_5027; + case 5028: goto Resume_5028; + case 5029: goto Resume_5029; + case 5030: goto Resume_5030; + case 5031: goto Resume_5031; + case 5032: goto Resume_5032; + case 5033: goto Resume_5033; + case 5034: goto Resume_5034; + case 5035: goto Resume_5035; + case 5036: goto Resume_5036; + case 5037: goto Resume_5037; + case 5038: goto Resume_5038; + case 5039: goto Resume_5039; + case 5040: goto Resume_5040; + case 5041: goto Resume_5041; + case 5042: goto Resume_5042; + case 5043: goto Resume_5043; + case 5044: goto Resume_5044; + case 5045: goto Resume_5045; + case 5046: goto Resume_5046; + case 5047: goto Resume_5047; + case 5048: goto Resume_5048; + case 5049: goto Resume_5049; + case 5050: goto Resume_5050; + case 5051: goto Resume_5051; + case 5052: goto Resume_5052; + case 5053: goto Resume_5053; + case 5054: goto Resume_5054; + case 5055: goto Resume_5055; + case 5056: goto Resume_5056; + case 5057: goto Resume_5057; + case 5058: goto Resume_5058; + case 5059: goto Resume_5059; + case 5060: goto Resume_5060; + case 5061: goto Resume_5061; + case 5062: goto Resume_5062; + case 5063: goto Resume_5063; + case 5064: goto Resume_5064; + case 5065: goto Resume_5065; + case 5066: goto Resume_5066; + case 5067: goto Resume_5067; + case 5068: goto Resume_5068; + case 5069: goto Resume_5069; + case 5070: goto Resume_5070; + case 5071: goto Resume_5071; + case 5072: goto Resume_5072; + case 5073: goto Resume_5073; + case 5074: goto Resume_5074; + case 5075: goto Resume_5075; + case 5076: goto Resume_5076; + case 5077: goto Resume_5077; + case 5078: goto Resume_5078; + case 5079: goto Resume_5079; + case 5080: goto Resume_5080; + case 5081: goto Resume_5081; + case 5082: goto Resume_5082; + case 5083: goto Resume_5083; + case 5084: goto Resume_5084; + case 5085: goto Resume_5085; + case 5086: goto Resume_5086; + case 5087: goto Resume_5087; + case 5088: goto Resume_5088; + case 5089: goto Resume_5089; + case 5090: goto Resume_5090; + case 5091: goto Resume_5091; + case 5092: goto Resume_5092; + case 5093: goto Resume_5093; + case 5094: goto Resume_5094; + case 5095: goto Resume_5095; + case 5096: goto Resume_5096; + case 5097: goto Resume_5097; + case 5098: goto Resume_5098; + case 5099: goto Resume_5099; + case 5100: goto Resume_5100; + case 5101: goto Resume_5101; + case 5102: goto Resume_5102; + case 5103: goto Resume_5103; + case 5104: goto Resume_5104; + case 5105: goto Resume_5105; + case 5106: goto Resume_5106; + case 5107: goto Resume_5107; + case 5108: goto Resume_5108; + case 5109: goto Resume_5109; + case 5110: goto Resume_5110; + case 5111: goto Resume_5111; + case 5112: goto Resume_5112; + case 5113: goto Resume_5113; + case 5114: goto Resume_5114; + case 5115: goto Resume_5115; + case 5116: goto Resume_5116; + case 5117: goto Resume_5117; + case 5118: goto Resume_5118; + case 5119: goto Resume_5119; + case 5120: goto Resume_5120; + case 5121: goto Resume_5121; + case 5122: goto Resume_5122; + case 5123: goto Resume_5123; + case 5124: goto Resume_5124; + case 5125: goto Resume_5125; + case 5126: goto Resume_5126; + case 5127: goto Resume_5127; + case 5128: goto Resume_5128; + case 5129: goto Resume_5129; + case 5130: goto Resume_5130; + case 5131: goto Resume_5131; + case 5132: goto Resume_5132; + case 5133: goto Resume_5133; + case 5134: goto Resume_5134; + case 5135: goto Resume_5135; + case 5136: goto Resume_5136; + case 5137: goto Resume_5137; + case 5138: goto Resume_5138; + case 5139: goto Resume_5139; + case 5140: goto Resume_5140; + case 5141: goto Resume_5141; + case 5142: goto Resume_5142; + case 5143: goto Resume_5143; + case 5144: goto Resume_5144; + case 5145: goto Resume_5145; + case 5146: goto Resume_5146; + case 5147: goto Resume_5147; + case 5148: goto Resume_5148; + case 5149: goto Resume_5149; + case 5150: goto Resume_5150; + case 5151: goto Resume_5151; + case 5152: goto Resume_5152; + case 5153: goto Resume_5153; + case 5154: goto Resume_5154; + case 5155: goto Resume_5155; + case 5156: goto Resume_5156; + case 5157: goto Resume_5157; + case 5158: goto Resume_5158; + case 5159: goto Resume_5159; + case 5160: goto Resume_5160; + case 5161: goto Resume_5161; + case 5162: goto Resume_5162; + case 5163: goto Resume_5163; + case 5164: goto Resume_5164; + case 5165: goto Resume_5165; + case 5166: goto Resume_5166; + case 5167: goto Resume_5167; + case 5168: goto Resume_5168; + case 5169: goto Resume_5169; + case 5170: goto Resume_5170; + case 5171: goto Resume_5171; + case 5172: goto Resume_5172; + case 5173: goto Resume_5173; + case 5174: goto Resume_5174; + case 5175: goto Resume_5175; + case 5176: goto Resume_5176; + case 5177: goto Resume_5177; + case 5178: goto Resume_5178; + case 5179: goto Resume_5179; + case 5180: goto Resume_5180; + case 5181: goto Resume_5181; + case 5182: goto Resume_5182; + case 5183: goto Resume_5183; + case 5184: goto Resume_5184; + case 5185: goto Resume_5185; + case 5186: goto Resume_5186; + case 5187: goto Resume_5187; + case 5188: goto Resume_5188; + case 5189: goto Resume_5189; + case 5190: goto Resume_5190; + case 5191: goto Resume_5191; + case 5192: goto Resume_5192; + case 5193: goto Resume_5193; + case 5194: goto Resume_5194; + case 5195: goto Resume_5195; + case 5196: goto Resume_5196; + case 5197: goto Resume_5197; + case 5198: goto Resume_5198; + case 5199: goto Resume_5199; + case 5200: goto Resume_5200; + case 5201: goto Resume_5201; + case 5202: goto Resume_5202; + case 5203: goto Resume_5203; + case 5204: goto Resume_5204; + case 5205: goto Resume_5205; + case 5206: goto Resume_5206; + case 5207: goto Resume_5207; + case 5208: goto Resume_5208; + case 5209: goto Resume_5209; + case 5210: goto Resume_5210; + case 5211: goto Resume_5211; + case 5212: goto Resume_5212; + case 5213: goto Resume_5213; + case 5214: goto Resume_5214; + case 5215: goto Resume_5215; + case 5216: goto Resume_5216; + case 5217: goto Resume_5217; + case 5218: goto Resume_5218; + case 5219: goto Resume_5219; + case 5220: goto Resume_5220; + case 5221: goto Resume_5221; + case 5222: goto Resume_5222; + case 5223: goto Resume_5223; + case 5224: goto Resume_5224; + case 5225: goto Resume_5225; + case 5226: goto Resume_5226; + case 5227: goto Resume_5227; + case 5228: goto Resume_5228; + case 5229: goto Resume_5229; + case 5230: goto Resume_5230; + case 5231: goto Resume_5231; + case 5232: goto Resume_5232; + case 5233: goto Resume_5233; + case 5234: goto Resume_5234; + case 5235: goto Resume_5235; + case 5236: goto Resume_5236; + case 5237: goto Resume_5237; + case 5238: goto Resume_5238; + case 5239: goto Resume_5239; + case 5240: goto Resume_5240; + case 5241: goto Resume_5241; + case 5242: goto Resume_5242; + case 5243: goto Resume_5243; + case 5244: goto Resume_5244; + case 5245: goto Resume_5245; + case 5246: goto Resume_5246; + case 5247: goto Resume_5247; + case 5248: goto Resume_5248; + case 5249: goto Resume_5249; + case 5250: goto Resume_5250; + case 5251: goto Resume_5251; + case 5252: goto Resume_5252; + case 5253: goto Resume_5253; + case 5254: goto Resume_5254; + case 5255: goto Resume_5255; + case 5256: goto Resume_5256; + case 5257: goto Resume_5257; + case 5258: goto Resume_5258; + case 5259: goto Resume_5259; + case 5260: goto Resume_5260; + case 5261: goto Resume_5261; + case 5262: goto Resume_5262; + case 5263: goto Resume_5263; + case 5264: goto Resume_5264; + case 5265: goto Resume_5265; + case 5266: goto Resume_5266; + case 5267: goto Resume_5267; + case 5268: goto Resume_5268; + case 5269: goto Resume_5269; + case 5270: goto Resume_5270; + case 5271: goto Resume_5271; + case 5272: goto Resume_5272; + case 5273: goto Resume_5273; + case 5274: goto Resume_5274; + case 5275: goto Resume_5275; + case 5276: goto Resume_5276; + case 5277: goto Resume_5277; + case 5278: goto Resume_5278; + case 5279: goto Resume_5279; + case 5280: goto Resume_5280; + case 5281: goto Resume_5281; + case 5282: goto Resume_5282; + case 5283: goto Resume_5283; + case 5284: goto Resume_5284; + case 5285: goto Resume_5285; + case 5286: goto Resume_5286; + case 5287: goto Resume_5287; + case 5288: goto Resume_5288; + case 5289: goto Resume_5289; + case 5290: goto Resume_5290; + case 5291: goto Resume_5291; + case 5292: goto Resume_5292; + case 5293: goto Resume_5293; + case 5294: goto Resume_5294; + case 5295: goto Resume_5295; + case 5296: goto Resume_5296; + case 5297: goto Resume_5297; + case 5298: goto Resume_5298; + case 5299: goto Resume_5299; + case 5300: goto Resume_5300; + case 5301: goto Resume_5301; + case 5302: goto Resume_5302; + case 5303: goto Resume_5303; + case 5304: goto Resume_5304; + case 5305: goto Resume_5305; + case 5306: goto Resume_5306; + case 5307: goto Resume_5307; + case 5308: goto Resume_5308; + case 5309: goto Resume_5309; + case 5310: goto Resume_5310; + case 5311: goto Resume_5311; + case 5312: goto Resume_5312; + case 5313: goto Resume_5313; + case 5314: goto Resume_5314; + case 5315: goto Resume_5315; + case 5316: goto Resume_5316; + case 5317: goto Resume_5317; + case 5318: goto Resume_5318; + case 5319: goto Resume_5319; + case 5320: goto Resume_5320; + case 5321: goto Resume_5321; + case 5322: goto Resume_5322; + case 5323: goto Resume_5323; + case 5324: goto Resume_5324; + case 5325: goto Resume_5325; + case 5326: goto Resume_5326; + case 5327: goto Resume_5327; + case 5328: goto Resume_5328; + case 5329: goto Resume_5329; + case 5330: goto Resume_5330; + case 5331: goto Resume_5331; + case 5332: goto Resume_5332; + case 5333: goto Resume_5333; + case 5334: goto Resume_5334; + case 5335: goto Resume_5335; + case 5336: goto Resume_5336; + case 5337: goto Resume_5337; + case 5338: goto Resume_5338; + case 5339: goto Resume_5339; + case 5340: goto Resume_5340; + case 5341: goto Resume_5341; + case 5342: goto Resume_5342; + case 5343: goto Resume_5343; + case 5344: goto Resume_5344; + case 5345: goto Resume_5345; + case 5346: goto Resume_5346; + case 5347: goto Resume_5347; + case 5348: goto Resume_5348; + case 5349: goto Resume_5349; + case 5350: goto Resume_5350; + case 5351: goto Resume_5351; + case 5352: goto Resume_5352; + case 5353: goto Resume_5353; + case 5354: goto Resume_5354; + case 5355: goto Resume_5355; + case 5356: goto Resume_5356; + case 5357: goto Resume_5357; + case 5358: goto Resume_5358; + case 5359: goto Resume_5359; + case 5360: goto Resume_5360; + case 5361: goto Resume_5361; + case 5362: goto Resume_5362; + case 5363: goto Resume_5363; + case 5364: goto Resume_5364; + case 5365: goto Resume_5365; + case 5366: goto Resume_5366; + case 5367: goto Resume_5367; + case 5368: goto Resume_5368; + case 5369: goto Resume_5369; + case 5370: goto Resume_5370; + case 5371: goto Resume_5371; + case 5372: goto Resume_5372; + case 5373: goto Resume_5373; + case 5374: goto Resume_5374; + case 5375: goto Resume_5375; + case 5376: goto Resume_5376; + case 5377: goto Resume_5377; + case 5378: goto Resume_5378; + case 5379: goto Resume_5379; + case 5380: goto Resume_5380; + case 5381: goto Resume_5381; + case 5382: goto Resume_5382; + case 5383: goto Resume_5383; + case 5384: goto Resume_5384; + case 5385: goto Resume_5385; + case 5386: goto Resume_5386; + case 5387: goto Resume_5387; + case 5388: goto Resume_5388; + case 5389: goto Resume_5389; + case 5390: goto Resume_5390; + case 5391: goto Resume_5391; + case 5392: goto Resume_5392; diff --git a/mednafen/ss/sh7095s_ctable_dm.inc b/mednafen/ss/sh7095s_ctable_dm.inc index 195ea570..d8a4eaf6 100644 --- a/mednafen/ss/sh7095s_ctable_dm.inc +++ b/mednafen/ss/sh7095s_ctable_dm.inc @@ -1,1536 +1,401 @@ -#if __COUNTER__ >= 10514 - &&Resume_10512, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10511, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10510, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10509, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10508, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10507, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10506, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10505, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10504, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10503, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10502, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10501, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10500, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10499, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10498, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10497, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10496, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10495, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10494, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10493, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10492, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10491, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10490, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10489, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10488, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10487, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10486, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10485, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10484, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10483, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10482, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10481, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10480, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10479, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10478, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10477, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10476, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10475, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10474, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10473, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10472, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10471, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10470, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10469, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10468, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10467, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10466, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10465, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10464, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10463, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10462, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10461, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10460, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10459, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10458, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10457, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10456, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10455, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10454, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10453, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10452, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10451, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10450, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10449, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10448, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10447, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10446, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10445, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10444, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10443, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10442, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10441, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10440, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10439, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10438, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10437, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10436, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10435, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10434, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10433, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10432, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10431, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10430, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10429, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10428, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10427, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10426, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10425, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10424, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10423, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10422, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10421, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10420, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10419, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10418, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10417, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10416, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10415, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10414, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10413, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10412, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10411, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10410, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10409, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10408, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10407, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10406, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10405, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10404, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10403, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10402, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10401, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10400, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10399, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10398, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10397, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10396, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10395, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10394, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10393, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10392, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10391, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10390, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10389, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10388, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10387, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10386, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10385, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10384, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10383, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10382, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10381, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10380, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10379, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10378, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10377, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10376, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10375, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10374, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10373, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10372, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10371, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10370, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10369, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10368, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10367, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10366, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10365, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10364, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10363, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10362, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10361, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10360, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10359, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10358, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10357, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10356, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10355, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10354, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10353, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10352, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10351, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10350, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10349, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10348, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10347, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10346, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10345, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10344, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10343, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10342, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10341, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10340, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10339, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10338, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10337, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10336, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10335, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10334, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10333, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10332, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10331, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10330, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10329, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10328, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10327, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10326, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10325, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10324, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10323, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10322, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10321, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10320, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10319, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10318, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10317, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10316, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10315, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10314, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10313, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10312, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10311, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10310, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10309, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10308, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10307, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10306, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10305, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10304, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10303, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10302, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10301, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10300, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10299, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10298, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10297, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10296, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10295, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10294, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10293, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10292, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10291, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10290, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10289, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10288, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10287, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10286, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10285, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10284, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10283, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10282, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10281, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10280, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10279, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10278, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10277, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10276, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10275, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10274, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10273, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10272, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10271, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10270, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10269, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10268, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10267, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10266, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10265, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10264, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10263, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10262, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10261, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10260, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10259, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10258, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10257, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10256, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10255, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10254, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10253, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10252, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10251, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10250, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10249, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10248, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10247, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10246, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10245, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10244, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10243, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10242, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10241, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10240, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10239, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10238, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10237, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10236, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10235, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10234, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10233, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10232, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10231, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10230, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10229, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10228, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10227, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10226, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10225, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10224, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10223, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10222, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10221, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10220, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10219, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10218, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10217, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10216, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10215, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10214, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10213, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10212, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10211, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10210, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10209, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10208, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10207, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10206, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10205, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10204, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10203, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10202, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10201, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10200, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10199, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10198, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10197, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10196, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10195, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10194, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10193, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10192, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10191, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10190, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10189, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10188, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10187, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10186, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10185, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10184, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10183, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10182, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10181, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10180, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10179, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10178, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10177, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10176, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10175, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10174, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10173, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10172, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10171, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10170, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10169, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10168, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10167, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10166, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10165, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10164, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10163, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10162, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10161, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10160, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10159, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10158, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10157, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10156, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10155, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10154, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10153, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10152, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10151, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10150, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10149, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10148, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10147, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10146, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10145, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10144, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10143, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10142, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10141, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10140, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10139, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10138, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10137, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10136, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10135, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10134, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10133, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10132, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10131, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10130, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10129, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10128, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10127, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10126, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10125, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10124, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10123, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10122, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10121, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10120, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10119, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10118, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10117, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10116, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10115, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10114, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10113, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10112, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10111, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10110, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10109, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10108, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10107, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10106, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10105, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10104, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10103, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10102, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10101, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10100, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10099, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10098, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10097, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10096, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10095, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10094, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10093, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10092, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10091, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10090, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10089, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10088, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10087, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10086, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10085, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10084, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10083, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10082, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10081, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10080, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10079, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10078, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10077, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10076, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10075, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10074, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10073, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10072, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10071, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10070, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10069, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10068, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10067, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10066, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10065, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10064, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10063, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10062, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10061, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10060, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10059, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10058, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10057, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10056, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10055, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10054, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10053, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10052, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10051, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10050, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10049, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10048, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10047, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10046, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10045, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10044, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10043, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10042, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10041, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10040, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10039, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10038, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10037, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10036, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10035, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10034, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10033, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10032, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10031, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10030, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10029, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10028, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10027, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10026, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10025, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10024, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10023, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10022, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10021, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10020, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10019, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10018, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10017, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10016, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10015, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10014, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10013, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10012, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10011, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10010, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10009, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10008, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10007, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10006, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10005, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10004, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10003, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10002, -#endif -#if __COUNTER__ >= 10514 - &&Resume_10001, -#endif +/* Switch-case dispatch entries for the debug SH7095_RunSlaveUntil_Debug + * resume path. Numbered to match the __COUNTER__ values that + * CHECK_EXIT_RESUME() expansions assign as `Resume_NNNN:` labels. + * + * Range: 10001 .. 10392 (392 entries). Regenerate via + * notes/build_sh7095s_ctable.c debug + * + * Consumes ZERO __COUNTER__ values; the resume-id integers are + * compile-time constants in each `case` label. */ + case 10001: goto Resume_10001; + case 10002: goto Resume_10002; + case 10003: goto Resume_10003; + case 10004: goto Resume_10004; + case 10005: goto Resume_10005; + case 10006: goto Resume_10006; + case 10007: goto Resume_10007; + case 10008: goto Resume_10008; + case 10009: goto Resume_10009; + case 10010: goto Resume_10010; + case 10011: goto Resume_10011; + case 10012: goto Resume_10012; + case 10013: goto Resume_10013; + case 10014: goto Resume_10014; + case 10015: goto Resume_10015; + case 10016: goto Resume_10016; + case 10017: goto Resume_10017; + case 10018: goto Resume_10018; + case 10019: goto Resume_10019; + case 10020: goto Resume_10020; + case 10021: goto Resume_10021; + case 10022: goto Resume_10022; + case 10023: goto Resume_10023; + case 10024: goto Resume_10024; + case 10025: goto Resume_10025; + case 10026: goto Resume_10026; + case 10027: goto Resume_10027; + case 10028: goto Resume_10028; + case 10029: goto Resume_10029; + case 10030: goto Resume_10030; + case 10031: goto Resume_10031; + case 10032: goto Resume_10032; + case 10033: goto Resume_10033; + case 10034: goto Resume_10034; + case 10035: goto Resume_10035; + case 10036: goto Resume_10036; + case 10037: goto Resume_10037; + case 10038: goto Resume_10038; + case 10039: goto Resume_10039; + case 10040: goto Resume_10040; + case 10041: goto Resume_10041; + case 10042: goto Resume_10042; + case 10043: goto Resume_10043; + case 10044: goto Resume_10044; + case 10045: goto Resume_10045; + case 10046: goto Resume_10046; + case 10047: goto Resume_10047; + case 10048: goto Resume_10048; + case 10049: goto Resume_10049; + case 10050: goto Resume_10050; + case 10051: goto Resume_10051; + case 10052: goto Resume_10052; + case 10053: goto Resume_10053; + case 10054: goto Resume_10054; + case 10055: goto Resume_10055; + case 10056: goto Resume_10056; + case 10057: goto Resume_10057; + case 10058: goto Resume_10058; + case 10059: goto Resume_10059; + case 10060: goto Resume_10060; + case 10061: goto Resume_10061; + case 10062: goto Resume_10062; + case 10063: goto Resume_10063; + case 10064: goto Resume_10064; + case 10065: goto Resume_10065; + case 10066: goto Resume_10066; + case 10067: goto Resume_10067; + case 10068: goto Resume_10068; + case 10069: goto Resume_10069; + case 10070: goto Resume_10070; + case 10071: goto Resume_10071; + case 10072: goto Resume_10072; + case 10073: goto Resume_10073; + case 10074: goto Resume_10074; + case 10075: goto Resume_10075; + case 10076: goto Resume_10076; + case 10077: goto Resume_10077; + case 10078: goto Resume_10078; + case 10079: goto Resume_10079; + case 10080: goto Resume_10080; + case 10081: goto Resume_10081; + case 10082: goto Resume_10082; + case 10083: goto Resume_10083; + case 10084: goto Resume_10084; + case 10085: goto Resume_10085; + case 10086: goto Resume_10086; + case 10087: goto Resume_10087; + case 10088: goto Resume_10088; + case 10089: goto Resume_10089; + case 10090: goto Resume_10090; + case 10091: goto Resume_10091; + case 10092: goto Resume_10092; + case 10093: goto Resume_10093; + case 10094: goto Resume_10094; + case 10095: goto Resume_10095; + case 10096: goto Resume_10096; + case 10097: goto Resume_10097; + case 10098: goto Resume_10098; + case 10099: goto Resume_10099; + case 10100: goto Resume_10100; + case 10101: goto Resume_10101; + case 10102: goto Resume_10102; + case 10103: goto Resume_10103; + case 10104: goto Resume_10104; + case 10105: goto Resume_10105; + case 10106: goto Resume_10106; + case 10107: goto Resume_10107; + case 10108: goto Resume_10108; + case 10109: goto Resume_10109; + case 10110: goto Resume_10110; + case 10111: goto Resume_10111; + case 10112: goto Resume_10112; + case 10113: goto Resume_10113; + case 10114: goto Resume_10114; + case 10115: goto Resume_10115; + case 10116: goto Resume_10116; + case 10117: goto Resume_10117; + case 10118: goto Resume_10118; + case 10119: goto Resume_10119; + case 10120: goto Resume_10120; + case 10121: goto Resume_10121; + case 10122: goto Resume_10122; + case 10123: goto Resume_10123; + case 10124: goto Resume_10124; + case 10125: goto Resume_10125; + case 10126: goto Resume_10126; + case 10127: goto Resume_10127; + case 10128: goto Resume_10128; + case 10129: goto Resume_10129; + case 10130: goto Resume_10130; + case 10131: goto Resume_10131; + case 10132: goto Resume_10132; + case 10133: goto Resume_10133; + case 10134: goto Resume_10134; + case 10135: goto Resume_10135; + case 10136: goto Resume_10136; + case 10137: goto Resume_10137; + case 10138: goto Resume_10138; + case 10139: goto Resume_10139; + case 10140: goto Resume_10140; + case 10141: goto Resume_10141; + case 10142: goto Resume_10142; + case 10143: goto Resume_10143; + case 10144: goto Resume_10144; + case 10145: goto Resume_10145; + case 10146: goto Resume_10146; + case 10147: goto Resume_10147; + case 10148: goto Resume_10148; + case 10149: goto Resume_10149; + case 10150: goto Resume_10150; + case 10151: goto Resume_10151; + case 10152: goto Resume_10152; + case 10153: goto Resume_10153; + case 10154: goto Resume_10154; + case 10155: goto Resume_10155; + case 10156: goto Resume_10156; + case 10157: goto Resume_10157; + case 10158: goto Resume_10158; + case 10159: goto Resume_10159; + case 10160: goto Resume_10160; + case 10161: goto Resume_10161; + case 10162: goto Resume_10162; + case 10163: goto Resume_10163; + case 10164: goto Resume_10164; + case 10165: goto Resume_10165; + case 10166: goto Resume_10166; + case 10167: goto Resume_10167; + case 10168: goto Resume_10168; + case 10169: goto Resume_10169; + case 10170: goto Resume_10170; + case 10171: goto Resume_10171; + case 10172: goto Resume_10172; + case 10173: goto Resume_10173; + case 10174: goto Resume_10174; + case 10175: goto Resume_10175; + case 10176: goto Resume_10176; + case 10177: goto Resume_10177; + case 10178: goto Resume_10178; + case 10179: goto Resume_10179; + case 10180: goto Resume_10180; + case 10181: goto Resume_10181; + case 10182: goto Resume_10182; + case 10183: goto Resume_10183; + case 10184: goto Resume_10184; + case 10185: goto Resume_10185; + case 10186: goto Resume_10186; + case 10187: goto Resume_10187; + case 10188: goto Resume_10188; + case 10189: goto Resume_10189; + case 10190: goto Resume_10190; + case 10191: goto Resume_10191; + case 10192: goto Resume_10192; + case 10193: goto Resume_10193; + case 10194: goto Resume_10194; + case 10195: goto Resume_10195; + case 10196: goto Resume_10196; + case 10197: goto Resume_10197; + case 10198: goto Resume_10198; + case 10199: goto Resume_10199; + case 10200: goto Resume_10200; + case 10201: goto Resume_10201; + case 10202: goto Resume_10202; + case 10203: goto Resume_10203; + case 10204: goto Resume_10204; + case 10205: goto Resume_10205; + case 10206: goto Resume_10206; + case 10207: goto Resume_10207; + case 10208: goto Resume_10208; + case 10209: goto Resume_10209; + case 10210: goto Resume_10210; + case 10211: goto Resume_10211; + case 10212: goto Resume_10212; + case 10213: goto Resume_10213; + case 10214: goto Resume_10214; + case 10215: goto Resume_10215; + case 10216: goto Resume_10216; + case 10217: goto Resume_10217; + case 10218: goto Resume_10218; + case 10219: goto Resume_10219; + case 10220: goto Resume_10220; + case 10221: goto Resume_10221; + case 10222: goto Resume_10222; + case 10223: goto Resume_10223; + case 10224: goto Resume_10224; + case 10225: goto Resume_10225; + case 10226: goto Resume_10226; + case 10227: goto Resume_10227; + case 10228: goto Resume_10228; + case 10229: goto Resume_10229; + case 10230: goto Resume_10230; + case 10231: goto Resume_10231; + case 10232: goto Resume_10232; + case 10233: goto Resume_10233; + case 10234: goto Resume_10234; + case 10235: goto Resume_10235; + case 10236: goto Resume_10236; + case 10237: goto Resume_10237; + case 10238: goto Resume_10238; + case 10239: goto Resume_10239; + case 10240: goto Resume_10240; + case 10241: goto Resume_10241; + case 10242: goto Resume_10242; + case 10243: goto Resume_10243; + case 10244: goto Resume_10244; + case 10245: goto Resume_10245; + case 10246: goto Resume_10246; + case 10247: goto Resume_10247; + case 10248: goto Resume_10248; + case 10249: goto Resume_10249; + case 10250: goto Resume_10250; + case 10251: goto Resume_10251; + case 10252: goto Resume_10252; + case 10253: goto Resume_10253; + case 10254: goto Resume_10254; + case 10255: goto Resume_10255; + case 10256: goto Resume_10256; + case 10257: goto Resume_10257; + case 10258: goto Resume_10258; + case 10259: goto Resume_10259; + case 10260: goto Resume_10260; + case 10261: goto Resume_10261; + case 10262: goto Resume_10262; + case 10263: goto Resume_10263; + case 10264: goto Resume_10264; + case 10265: goto Resume_10265; + case 10266: goto Resume_10266; + case 10267: goto Resume_10267; + case 10268: goto Resume_10268; + case 10269: goto Resume_10269; + case 10270: goto Resume_10270; + case 10271: goto Resume_10271; + case 10272: goto Resume_10272; + case 10273: goto Resume_10273; + case 10274: goto Resume_10274; + case 10275: goto Resume_10275; + case 10276: goto Resume_10276; + case 10277: goto Resume_10277; + case 10278: goto Resume_10278; + case 10279: goto Resume_10279; + case 10280: goto Resume_10280; + case 10281: goto Resume_10281; + case 10282: goto Resume_10282; + case 10283: goto Resume_10283; + case 10284: goto Resume_10284; + case 10285: goto Resume_10285; + case 10286: goto Resume_10286; + case 10287: goto Resume_10287; + case 10288: goto Resume_10288; + case 10289: goto Resume_10289; + case 10290: goto Resume_10290; + case 10291: goto Resume_10291; + case 10292: goto Resume_10292; + case 10293: goto Resume_10293; + case 10294: goto Resume_10294; + case 10295: goto Resume_10295; + case 10296: goto Resume_10296; + case 10297: goto Resume_10297; + case 10298: goto Resume_10298; + case 10299: goto Resume_10299; + case 10300: goto Resume_10300; + case 10301: goto Resume_10301; + case 10302: goto Resume_10302; + case 10303: goto Resume_10303; + case 10304: goto Resume_10304; + case 10305: goto Resume_10305; + case 10306: goto Resume_10306; + case 10307: goto Resume_10307; + case 10308: goto Resume_10308; + case 10309: goto Resume_10309; + case 10310: goto Resume_10310; + case 10311: goto Resume_10311; + case 10312: goto Resume_10312; + case 10313: goto Resume_10313; + case 10314: goto Resume_10314; + case 10315: goto Resume_10315; + case 10316: goto Resume_10316; + case 10317: goto Resume_10317; + case 10318: goto Resume_10318; + case 10319: goto Resume_10319; + case 10320: goto Resume_10320; + case 10321: goto Resume_10321; + case 10322: goto Resume_10322; + case 10323: goto Resume_10323; + case 10324: goto Resume_10324; + case 10325: goto Resume_10325; + case 10326: goto Resume_10326; + case 10327: goto Resume_10327; + case 10328: goto Resume_10328; + case 10329: goto Resume_10329; + case 10330: goto Resume_10330; + case 10331: goto Resume_10331; + case 10332: goto Resume_10332; + case 10333: goto Resume_10333; + case 10334: goto Resume_10334; + case 10335: goto Resume_10335; + case 10336: goto Resume_10336; + case 10337: goto Resume_10337; + case 10338: goto Resume_10338; + case 10339: goto Resume_10339; + case 10340: goto Resume_10340; + case 10341: goto Resume_10341; + case 10342: goto Resume_10342; + case 10343: goto Resume_10343; + case 10344: goto Resume_10344; + case 10345: goto Resume_10345; + case 10346: goto Resume_10346; + case 10347: goto Resume_10347; + case 10348: goto Resume_10348; + case 10349: goto Resume_10349; + case 10350: goto Resume_10350; + case 10351: goto Resume_10351; + case 10352: goto Resume_10352; + case 10353: goto Resume_10353; + case 10354: goto Resume_10354; + case 10355: goto Resume_10355; + case 10356: goto Resume_10356; + case 10357: goto Resume_10357; + case 10358: goto Resume_10358; + case 10359: goto Resume_10359; + case 10360: goto Resume_10360; + case 10361: goto Resume_10361; + case 10362: goto Resume_10362; + case 10363: goto Resume_10363; + case 10364: goto Resume_10364; + case 10365: goto Resume_10365; + case 10366: goto Resume_10366; + case 10367: goto Resume_10367; + case 10368: goto Resume_10368; + case 10369: goto Resume_10369; + case 10370: goto Resume_10370; + case 10371: goto Resume_10371; + case 10372: goto Resume_10372; + case 10373: goto Resume_10373; + case 10374: goto Resume_10374; + case 10375: goto Resume_10375; + case 10376: goto Resume_10376; + case 10377: goto Resume_10377; + case 10378: goto Resume_10378; + case 10379: goto Resume_10379; + case 10380: goto Resume_10380; + case 10381: goto Resume_10381; + case 10382: goto Resume_10382; + case 10383: goto Resume_10383; + case 10384: goto Resume_10384; + case 10385: goto Resume_10385; + case 10386: goto Resume_10386; + case 10387: goto Resume_10387; + case 10388: goto Resume_10388; + case 10389: goto Resume_10389; + case 10390: goto Resume_10390; + case 10391: goto Resume_10391; + case 10392: goto Resume_10392; diff --git a/mednafen/ss/sh7095s_rsu.inc b/mednafen/ss/sh7095s_rsu.inc index 92774a1a..be78a143 100644 --- a/mednafen/ss/sh7095s_rsu.inc +++ b/mednafen/ss/sh7095s_rsu.inc @@ -1,16 +1,31 @@ - enum : unsigned { which = 1 }; - enum : bool { EmulateICache = true }; - enum : bool { DebugMode = SH7095_DEBUG_MODE }; - enum : bool { CacheBypassHack = false }; + /* Plain enums (not C++11 `enum : type {}`). The latter is rejected + * by MSVC C89 mode and by strict ISO C. enum constants are always + * `int` in C; the bool / unsigned typed-enum forms in C++11 only + * affect the constant's underlying storage type, never its value -- + * so the downstream `if(EmulateICache)` / `case which:` / + * `if(DebugMode)` macro expansions get the same compile-time fold + * either way. */ + enum { which = 1 }; + enum { EmulateICache = 1 /* true */ }; + enum { DebugMode = SH7095_DEBUG_MODE }; + enum { CacheBypassHack = 0 /* false */ }; if(MDFN_UNLIKELY(z->timestamp >= bound_timestamp)) return; - if(z->ResumePoint) + if(z->resume_id) { - const void* const tmp = z->ResumePoint; - z->ResumePoint = NULL; - goto *tmp; + const uint16_t id = z->resume_id; + z->resume_id = 0; + switch (id) + { +#if SH7095_DEBUG_MODE + #include "sh7095s_ctable_dm.inc" +#else + #include "sh7095s_ctable.inc" +#endif + default: MDFN_UNREACHABLE; + } } do diff --git a/mednafen/ss/smpc.c b/mednafen/ss/smpc.c index e86d4a13..dc68f5b6 100644 --- a/mednafen/ss/smpc.c +++ b/mednafen/ss/smpc.c @@ -50,8 +50,8 @@ * methods it needs (SetActive / SetNMI) via the matching extern "C" * proxies in ss.cpp (which is where the CPU[2] global lives). Local * forward decls here cover them. */ -extern void SH7095_SetActive(int cpu, bool active); -extern void SH7095_SetNMI(int cpu, bool level); +extern void SH7095_S_SetActive(bool active); +extern void SH7095_M_SetNMI(bool level); enum { @@ -576,7 +576,7 @@ void SMPC_Reset(bool powering_up) { SlaveSH2Pending = 0; SlaveSH2On = false; - SH7095_SetActive(1, SlaveSH2On); + SH7095_S_SetActive(SlaveSH2On); // TurnSoundCPUOff(); CDOn = true; // ? false; @@ -584,7 +584,7 @@ void SMPC_Reset(bool powering_up) ResetButtonCount = 0; ResetNMIEnable = false; // or only on powering_up? - SH7095_SetNMI(0, true); + SH7095_M_SetNMI(true); memset(IREG, 0, sizeof(IREG)); memset(OREG, 0, sizeof(OREG)); @@ -778,7 +778,7 @@ void SMPC_ProcessSlaveOffOn(void) if(SlaveSH2Pending) { SlaveSH2On = (SlaveSH2Pending > 0); - SH7095_SetActive(1, SlaveSH2On); + SH7095_S_SetActive(SlaveSH2On); SlaveSH2Pending = 0; // } @@ -1168,8 +1168,8 @@ sscpu_timestamp_t SMPC_Update(sscpu_timestamp_t timestamp) if(ResetNMIEnable) { - SH7095_SetNMI(0, false); - SH7095_SetNMI(0, true); + SH7095_M_SetNMI(false); + SH7095_M_SetNMI(true); ResetButtonCount = -1; } @@ -1259,8 +1259,8 @@ sscpu_timestamp_t SMPC_Update(sscpu_timestamp_t timestamp) SMPC_WAIT_UNTIL_COND(vsync); // Send NMI to master SH-2 - SH7095_SetNMI(0, false); - SH7095_SetNMI(0, true); + SH7095_M_SetNMI(false); + SH7095_M_SetNMI(true); } else if(ExecutingCommand == CMD_INTBACK) { @@ -1582,8 +1582,8 @@ sscpu_timestamp_t SMPC_Update(sscpu_timestamp_t timestamp) } else if(ExecutingCommand == CMD_NMIREQ) { - SH7095_SetNMI(0, false); - SH7095_SetNMI(0, true); + SH7095_M_SetNMI(false); + SH7095_M_SetNMI(true); } else if(ExecutingCommand == CMD_RESENAB) { diff --git a/mednafen/ss/smpc.h b/mednafen/ss/smpc.h index 441e617f..1a2b3d02 100644 --- a/mednafen/ss/smpc.h +++ b/mednafen/ss/smpc.h @@ -24,18 +24,14 @@ #ifndef __MDFN_SS_SMPC_H #define __MDFN_SS_SMPC_H -#include +#include "../state.h" /* MDFN_COLD / MDFN_HOT attribute macros. Existing C++ TUs got * these transitively (via ss.h / mednafen.h); for C consumers the * header needs to be self-contained. */ -#include +#include "../mednafen-types.h" #include -/* C++ has 'bool' built in; C inclusion (future C-converted modules) - * needs the stdbool keyword macros. */ -#ifndef __cplusplus -#include -#endif +#include #include "../cdstream.h" diff --git a/mednafen/ss/smpc_iodevice.c b/mednafen/ss/smpc_iodevice.c index 12c4fdbe..2fdb24a2 100644 --- a/mednafen/ss/smpc_iodevice.c +++ b/mednafen/ss/smpc_iodevice.c @@ -35,22 +35,22 @@ wheel, mission, gun, keyboard, jpkeyboard, multitap. */ #include -#include #include #include #include #include #include +#include #include -#include -#include +#include "libretro_settings.h" #include "smpc_iodevice.h" #include "../video/surface.h" #include "../math_ops.h" /* MDFN_lzcount64, for the keyboard device */ -#include "libretro_settings.h" +#include "../state.h" +#include "../mdfn_gameinfo.h" /* ss.h is a C++ header (class SH7095, default args, ...), so it cannot be included here. Cross-boundary constants come from the diff --git a/mednafen/ss/smpc_iodevice.h b/mednafen/ss/smpc_iodevice.h index 3df51e8c..653663c1 100644 --- a/mednafen/ss/smpc_iodevice.h +++ b/mednafen/ss/smpc_iodevice.h @@ -23,12 +23,10 @@ #define __MDFN_SS_SMPC_IODEVICE_H #include -#ifndef __cplusplus -#include -#endif +#include -#include -#include +#include "../state.h" +#include "../video/surface.h" /* Formerly a C++ class hierarchy: `class IODevice` plus nine derived device classes, each in its own input/.{h,cpp}. Converted diff --git a/mednafen/ss/sound.c b/mednafen/ss/sound.c index b946d8d4..322afc70 100644 --- a/mednafen/ss/sound.c +++ b/mednafen/ss/sound.c @@ -44,7 +44,7 @@ #include "sound_internal.h" #include -#include +#include "../state.h" /* 32.32 fixed-point cycle accumulator. run_until_time tracks the * 68K cycle target derived from the SH-2 timestamp + clock ratio; diff --git a/mednafen/ss/sound.h b/mednafen/ss/sound.h index faf1390e..212f7bc5c 100644 --- a/mednafen/ss/sound.h +++ b/mednafen/ss/sound.h @@ -22,18 +22,14 @@ #ifndef __MDFN_SS_SOUND_H #define __MDFN_SS_SOUND_H -#include +#include "../state.h" /* MDFN_COLD / MDFN_HOT attribute macros. Existing C++ TUs got * these transitively (via ss.h / mednafen.h); for C consumers * include them explicitly so this header is self-contained. */ -#include +#include "../mednafen-types.h" #include -/* C++ has 'bool' built in; C inclusion (future C-converted modules) - * needs the stdbool keyword macros. */ -#ifndef __cplusplus -#include -#endif +#include #ifdef __cplusplus extern "C" { diff --git a/mednafen/ss/sound_glue.cpp b/mednafen/ss/sound_glue.c similarity index 80% rename from mednafen/ss/sound_glue.cpp rename to mednafen/ss/sound_glue.c index 6c74fa1e..7d8402c4 100644 --- a/mednafen/ss/sound_glue.cpp +++ b/mednafen/ss/sound_glue.c @@ -1,12 +1,14 @@ /******************************************************************************/ /* Mednafen Sega Saturn Emulation Module */ /******************************************************************************/ -/* sound_glue.cpp - C++ side of the Saturn sound module. Phase-6c split out -** from sound.cpp so the orchestration half can become C -** (see sound.c); this file keeps the SS_SCSP / M68K class -** instances, the M68K bus callbacks (which need C++-side -** access to the class globals), and exposes everything -** the C side needs through extern "C" SoundGlue_* wrappers. +/* sound_glue.c - Saturn sound-module glue (Phase-6c split from +** sound.cpp, Phase-9 renamed from sound_glue.cpp +** once SS_SCSP and M68K both shed their C++ class +** surface). Keeps the SS_SCSP / M68K struct +** instances, the eight M68K bus callbacks (which +** need access to those file-static globals), and +** exposes everything sound.c needs through plain +** SoundGlue_* C-linkage wrappers. ** ** Copyright (C) 2015-2021 Mednafen Team ** @@ -36,13 +38,19 @@ #include "cdb.h" #include "scsp.h" - -/* The two C++ class instances that drive the Saturn sound module. - * Both are file-static here; sound.c never sees the class types - * directly -- it only reaches them through the extern "C" wrappers - * below. */ +#include "scsp_dsp_jit.h" + +/* The two file-static struct instances that drive the Saturn + * sound module. Both are zero-initialised at program load + * (file scope -> implicit zero); SoundGlue_Init() finishes the + * setup by calling M68K_Construct() on SoundCPU (in lieu of the + * C++ ctor-call this used to spell as `static M68K SoundCPU + * (true);`) and SS_SCSP_Reset(&SCSP, true) (in lieu of what + * SS_SCSP::SS_SCSP() used to do implicitly). sound.c never + * sees these struct types directly -- it only reaches them + * through the SoundGlue_* wrappers below. */ static SS_SCSP SCSP; -static M68K SoundCPU(true); +static M68K SoundCPU; /* SCSP IRQ-line and main-CPU-int callbacks. These get pulled in * by scsp.inc and called from the SCSP state machine; they touch @@ -59,6 +67,20 @@ static INLINE void SCSP_MainIntChanged(SS_SCSP* s, bool state) #include "scsp.inc" +#ifdef WANT_JIT +/* Trampolines into scsp.inc's INLINE bodies; placed here so LTO + * can inline the body straight into the trampoline. */ +void SCSP_DSP_run_step(SS_SCSP* scsp, unsigned step) +{ + SS_SCSP_RunDSPStep(scsp, step); +} + +void SCSP_DSP_run_interpreter(SS_SCSP* scsp) +{ + SS_SCSP_RunDSPInterpreter(scsp); +} +#endif + /* =================================================================== * M68K SoundCPU bus callbacks * @@ -67,12 +89,13 @@ static INLINE void SCSP_MainIntChanged(SS_SCSP* s, bool state) * fields from SoundGlue_Init(). M68K execution dispatches into these * for every external memory access. * - * They live on the C++ side because the bodies reach SCSP.RW_* (member - * call, needs class visibility) and the global SoundCPU and SCSP - * instances. Their function-pointer addresses are stored in M68K's - * fields; the calling code (M68K::Run, deep in m68k.cpp) only sees - * the pointer values, so the calling convention is the only ABI - * constraint -- MDFN_FASTCALL on both sides. + * They live in this TU because the bodies reach SS_SCSP_RW_* (need + * scsp.h's SS_SCSP type visible -- it lives in this file via scsp.inc) + * and the file-static SoundCPU and SCSP instances. Their function- + * pointer addresses are stored in M68K's fields; the calling code + * (M68K::Run, deep in m68k.cpp) only sees the pointer values, so + * the calling convention is the only ABI constraint -- MDFN_FASTCALL + * on both sides. * * The bus-access bodies need three pieces of cross-TU state owned * by sound.c: SOUND_next_scsp_time (the SCSP-sample boundary timer @@ -210,7 +233,7 @@ static MDFN_FASTCALL unsigned SoundCPU_BusIntAck(uint8_t level) { SoundCPU.timestamp += 10; - return M68K::BUS_INT_ACK_AUTO; + return M68K_BUS_INT_ACK_AUTO; } static MDFN_FASTCALL void SoundCPU_BusRESET(bool state) @@ -220,17 +243,34 @@ static MDFN_FASTCALL void SoundCPU_BusRESET(bool state) } /* =================================================================== - * extern "C" wrappers exposed to sound.c + * SoundGlue_* wrappers exposed to sound.c + * + * No `extern "C" { ... }` block any more -- this file is now C + * (post Phase-9 rename from sound_glue.cpp to sound_glue.c). + * sound_internal.h still wraps the matching declarations in + * `#ifdef __cplusplus extern "C" { ... } #endif` so any future + * C++ consumer would see the C-linkage names; for this TU plain + * C linkage is the default and matches what sound.c expects. * =================================================================== */ -extern "C" { - void SoundGlue_Init(void) { - /* Phase-9: replace what SS_SCSP::SS_SCSP() used to do implicitly - * at program load: zero the dummy half of RAM (so out-of-range - * playback reads return 0) and reset SS_SCSP state. The ctor/dtor - * thunks have been dropped now that the struct is pure data. */ + /* Phase-9: replace what M68K::M68K(true) and SS_SCSP::SS_SCSP() + * used to do implicitly at program load. M68K_Construct does + * what the M68K(rev_e=true) constructor did: stash Revision_E, + * null the 7 bus-callback slots, install Dummy_BusRESET as + * BusRESET's default, zero timestamp/XPending/IPL, then power- + * on Reset. The 8 Bus-callback slots below overwrite the + * nulls that M68K_Construct just installed (BusRESET overwrites + * Dummy_BusRESET, which was just a stop-gap default for code + * paths that fire before SoundGlue_Init -- there are no such + * paths in practice). */ + M68K_Construct(&SoundCPU, true); + + /* Zero the dummy half of RAM (so out-of-range playback reads + * return 0) and reset SS_SCSP state -- what SS_SCSP::SS_SCSP() + * used to do. The struct itself is zero-initialised at program + * load via the file-scope `static SS_SCSP SCSP;` declaration. */ memset(SS_SCSP_GetRAMPtr(&SCSP) + 0x40000, 0x00, 0x40000 * sizeof(uint16_t)); SS_SCSP_Reset(&SCSP, true); @@ -345,5 +385,3 @@ void SOUND_RunSCSP(void) IBufferCount = (IBufferCount + 1) & 1023; SOUND_next_scsp_time += 256; } - -} /* extern "C" */ diff --git a/mednafen/ss/sound_internal.h b/mednafen/ss/sound_internal.h index 0bad274c..32307f2d 100644 --- a/mednafen/ss/sound_internal.h +++ b/mednafen/ss/sound_internal.h @@ -25,12 +25,10 @@ #define __MDFN_SS_SOUND_INTERNAL_H #include -#ifndef __cplusplus -#include -#endif +#include -#include -#include +#include "../mednafen-types.h" +#include "../state.h" #include "../jump.h" #ifdef __cplusplus diff --git a/mednafen/ss/ss.cpp b/mednafen/ss/ss.c similarity index 83% rename from mednafen/ss/ss.cpp rename to mednafen/ss/ss.c index f2f8431d..689aa206 100644 --- a/mednafen/ss/ss.cpp +++ b/mednafen/ss/ss.c @@ -27,8 +27,12 @@ * already pure C in everything except the #include surface. The * MDFNGI typedef ss.cpp needs for `extern MDFNGI EmulatedSS;` lives * in mdfn_gameinfo.h which is C-clean (factored out of git.h - * specifically so C TUs can include it). */ + * specifically so C TUs can include it). EmulateSpecStruct lives + * in emuspec.h for the same reason -- typedef'd at file scope so + * both C and C++ TUs can name the type without the `struct` + * keyword. */ #include "../mdfn_gameinfo.h" +#include "../emuspec.h" #include "../general.h" #include "../cdrom/cdromif.h" #include "../cdstream.h" @@ -103,7 +107,7 @@ SH7095 CPU[2]; * (static storage duration) and the once-only per-CPU init that the * ctor used to do moves into SH7095_ConstructAll below. Called from * InitCommon() before either CPU is touched. */ -extern "C" MDFN_COLD void SH7095_ConstructAll(void) +MDFN_COLD void SH7095_ConstructAll(void) { SH7095_Construct(&CPU[0], "SH2-M", SS_EVENT_SH2_M_DMA, SCU_MSH2VectorFetch); SH7095_Construct(&CPU[1], "SH2-S", SS_EVENT_SH2_S_DMA, SCU_SSH2VectorFetch); @@ -121,21 +125,33 @@ extern "C" MDFN_COLD void SH7095_ConstructAll(void) * C++-only (it exposes the class, and there is no current C TU that * needs anything beyond these two methods); when more SH7095 * operations need C-callable proxies they should be added here. */ -extern "C" void SH7095_SetActive(int cpu, bool active) +/* Phase-9 follow-up: these C-callable proxies used to shadow the + * SH7095*-primary `SH7095_SetActive` / `SH7095_SetNMI` decls in + * sh7095.h via C++ overloading (same name, different signature, + * different linkage namespace). Once sh7095.h became C-parseable + * the overload collapsed to a redefinition: C has no overloading. + * + * Both wrappers were always called with hard-coded CPU indices + * (SetActive only ever with 1 = slave, SetNMI only ever with + * 0 = master), so the right shape is `SH7095_M_*` / `SH7095_S_*` + * matching the existing SH7095_M_Init / SH7095_M_Reset naming + * convention -- drop the int parameter, encode the CPU in the + * function name. */ +void SH7095_S_SetActive(bool active) { - SH7095_SetActive(&CPU[cpu], active); + SH7095_SetActive(&CPU[1], active); } -extern "C" void SH7095_SetNMI(int cpu, bool level) +void SH7095_M_SetNMI(bool level) { - SH7095_SetNMI(&CPU[cpu], level); + SH7095_SetNMI(&CPU[0], level); } /* Used by vdp2.c (converted from C++) for the HORRIBLEHACK_NOSH2DMA- * LINE106 path -- vdp2's CPU loop iterates CPU[0..1] once per scanline * advance and sets the kludge flag. Matches the SetActive / SetNMI * proxies above; cpu index picks master (0) / slave (1). */ -extern "C" void SH7095_SetExtHaltDMAKludge(int cpu, bool state) +void SH7095_SetExtHaltDMAKludge(int cpu, bool state) { SH7095_SetExtHaltDMAKludgeFromVDP2(&CPU[cpu], state); } @@ -221,7 +237,7 @@ int ActiveCartType; // Used in save states. * via source-fold. Only (u8/u16) x (W0/W1) tuples are * invoked by callers in sh7095.inc; no u32 CS0 access. */ -static INLINE void BusRW_DB_CS0_u8_W1(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS0_u8_W1(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // @@ -254,7 +270,7 @@ static INLINE void BusRW_DB_CS0_u8_W1(const uint32_t A, uint32_t& DB, const bool * write width. Compiler folds away the dead branches per * template instantiation. */ const uint32_t boff_ = A & 0xFFFFF; - const uint8_t val_ = DB >> (((A & 1) ^ (2 - 1)) << 3); + const uint8_t val_ = *DB >> (((A & 1) ^ (2 - 1)) << 3); { #ifdef MSB_FIRST ((uint8_t*)WorkRAML)[boff_] = val_; @@ -295,7 +311,7 @@ static INLINE void BusRW_DB_CS0_u8_W1(const uint32_t A, uint32_t& DB, const bool { if(false || (A & 1)) - SMPC_Write(SH7095_mem_timestamp, SMPC_A, DB); + SMPC_Write(SH7095_mem_timestamp, SMPC_A, *DB); } return; @@ -316,9 +332,9 @@ static INLINE void BusRW_DB_CS0_u8_W1(const uint32_t A, uint32_t& DB, const bool { uint8_t* const brp = &BackupRAM[(A >> 1) & 0x7FFF]; - if(*brp != (uint8_t)DB) + if(*brp != (uint8_t)*DB) { - *brp = (uint8_t)DB; + *brp = (uint8_t)*DB; BackupRAM_Dirty = true; } } @@ -363,7 +379,7 @@ static INLINE void BusRW_DB_CS0_u8_W1(const uint32_t A, uint32_t& DB, const bool { if(false || (A & 1)) - STVIO_WriteIOGA(SH7095_mem_timestamp, IOGA_A, (uint8_t)DB); + STVIO_WriteIOGA(SH7095_mem_timestamp, IOGA_A, (uint8_t)*DB); } return; @@ -378,7 +394,7 @@ static INLINE void BusRW_DB_CS0_u8_W1(const uint32_t A, uint32_t& DB, const bool *SH2DMAHax += 4; } -static INLINE void BusRW_DB_CS0_u16_W1(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS0_u16_W1(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // @@ -411,7 +427,7 @@ static INLINE void BusRW_DB_CS0_u16_W1(const uint32_t A, uint32_t& DB, const boo * write width. Compiler folds away the dead branches per * template instantiation. */ const uint32_t boff_ = A & 0xFFFFF; - const uint16_t val_ = DB >> (((A & 1) ^ (2 - 2)) << 3); + const uint16_t val_ = *DB >> (((A & 1) ^ (2 - 2)) << 3); WorkRAML[boff_ >> 1] = val_; } @@ -446,7 +462,7 @@ static INLINE void BusRW_DB_CS0_u16_W1(const uint32_t A, uint32_t& DB, const boo { if(true || (A & 1)) - SMPC_Write(SH7095_mem_timestamp, SMPC_A, DB); + SMPC_Write(SH7095_mem_timestamp, SMPC_A, *DB); } return; @@ -467,9 +483,9 @@ static INLINE void BusRW_DB_CS0_u16_W1(const uint32_t A, uint32_t& DB, const boo { uint8_t* const brp = &BackupRAM[(A >> 1) & 0x7FFF]; - if(*brp != (uint8_t)DB) + if(*brp != (uint8_t)*DB) { - *brp = (uint8_t)DB; + *brp = (uint8_t)*DB; BackupRAM_Dirty = true; } } @@ -519,7 +535,7 @@ static INLINE void BusRW_DB_CS0_u16_W1(const uint32_t A, uint32_t& DB, const boo { if(true || (A & 1)) - STVIO_WriteIOGA(SH7095_mem_timestamp, IOGA_A, (uint8_t)DB); + STVIO_WriteIOGA(SH7095_mem_timestamp, IOGA_A, (uint8_t)*DB); } return; @@ -534,7 +550,7 @@ static INLINE void BusRW_DB_CS0_u16_W1(const uint32_t A, uint32_t& DB, const boo *SH2DMAHax += 4; } -static INLINE void BusRW_DB_CS0_u8_W0(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS0_u8_W0(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // @@ -557,7 +573,7 @@ static INLINE void BusRW_DB_CS0_u8_W0(const uint32_t A, uint32_t& DB, const bool // if(MDFN_UNLIKELY(A & 0x100000)) { - DB = DB | 0xFFFF; + *DB = *DB | 0xFFFF; return; } @@ -565,7 +581,7 @@ static INLINE void BusRW_DB_CS0_u8_W0(const uint32_t A, uint32_t& DB, const bool { /* ne16_rbo_be(WorkRAML, byte_off): aligned u16 read * — host-endian-stored slot, direct index. */ - DB = (DB & 0xFFFF0000) | WorkRAML[(A & 0xFFFFE) >> 1]; + *DB = (*DB & 0xFFFF0000) | WorkRAML[(A & 0xFFFFE) >> 1]; } return; @@ -581,7 +597,7 @@ static INLINE void BusRW_DB_CS0_u8_W0(const uint32_t A, uint32_t& DB, const bool else *SH2DMAHax += 8; - DB = (DB & 0xFFFF0000) | BIOSROM[(A & 0x7FFFE) >> 1]; + *DB = (*DB & 0xFFFF0000) | BIOSROM[(A & 0x7FFFE) >> 1]; return; } @@ -599,7 +615,7 @@ static INLINE void BusRW_DB_CS0_u8_W0(const uint32_t A, uint32_t& DB, const bool CheckEventsByMemTS(); } - DB = (DB & 0xFFFF0000) | 0xFF00 | SMPC_Read(SH7095_mem_timestamp, SMPC_A); + *DB = (*DB & 0xFFFF0000) | 0xFF00 | SMPC_Read(SH7095_mem_timestamp, SMPC_A); return; } @@ -614,7 +630,7 @@ static INLINE void BusRW_DB_CS0_u8_W0(const uint32_t A, uint32_t& DB, const bool else *SH2DMAHax += 8; - DB = (DB & 0xFFFF0000) | 0xFF00 | BackupRAM[(A >> 1) & 0x7FFF]; + *DB = (*DB & 0xFFFF0000) | 0xFF00 | BackupRAM[(A >> 1) & 0x7FFF]; return; } @@ -650,7 +666,7 @@ static INLINE void BusRW_DB_CS0_u8_W0(const uint32_t A, uint32_t& DB, const bool const uint8_t IOGA_A = (A >> 1) & 0x3F; - DB = (DB & 0xFFFF0000) | 0xFF00 | STVIO_ReadIOGA(SH7095_mem_timestamp, IOGA_A); + *DB = (*DB & 0xFFFF0000) | 0xFF00 | STVIO_ReadIOGA(SH7095_mem_timestamp, IOGA_A); return; } @@ -664,7 +680,7 @@ static INLINE void BusRW_DB_CS0_u8_W0(const uint32_t A, uint32_t& DB, const bool *SH2DMAHax += 4; } -static INLINE void BusRW_DB_CS0_u16_W0(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS0_u16_W0(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // @@ -687,7 +703,7 @@ static INLINE void BusRW_DB_CS0_u16_W0(const uint32_t A, uint32_t& DB, const boo // if(MDFN_UNLIKELY(A & 0x100000)) { - DB = DB | 0xFFFF; + *DB = *DB | 0xFFFF; return; } @@ -695,7 +711,7 @@ static INLINE void BusRW_DB_CS0_u16_W0(const uint32_t A, uint32_t& DB, const boo { /* ne16_rbo_be(WorkRAML, byte_off): aligned u16 read * — host-endian-stored slot, direct index. */ - DB = (DB & 0xFFFF0000) | WorkRAML[(A & 0xFFFFE) >> 1]; + *DB = (*DB & 0xFFFF0000) | WorkRAML[(A & 0xFFFFE) >> 1]; } return; @@ -711,7 +727,7 @@ static INLINE void BusRW_DB_CS0_u16_W0(const uint32_t A, uint32_t& DB, const boo else *SH2DMAHax += 8; - DB = (DB & 0xFFFF0000) | BIOSROM[(A & 0x7FFFE) >> 1]; + *DB = (*DB & 0xFFFF0000) | BIOSROM[(A & 0x7FFFE) >> 1]; return; } @@ -729,7 +745,7 @@ static INLINE void BusRW_DB_CS0_u16_W0(const uint32_t A, uint32_t& DB, const boo CheckEventsByMemTS(); } - DB = (DB & 0xFFFF0000) | 0xFF00 | SMPC_Read(SH7095_mem_timestamp, SMPC_A); + *DB = (*DB & 0xFFFF0000) | 0xFF00 | SMPC_Read(SH7095_mem_timestamp, SMPC_A); return; } @@ -744,7 +760,7 @@ static INLINE void BusRW_DB_CS0_u16_W0(const uint32_t A, uint32_t& DB, const boo else *SH2DMAHax += 8; - DB = (DB & 0xFFFF0000) | 0xFF00 | BackupRAM[(A >> 1) & 0x7FFF]; + *DB = (*DB & 0xFFFF0000) | 0xFF00 | BackupRAM[(A >> 1) & 0x7FFF]; return; } @@ -780,7 +796,7 @@ static INLINE void BusRW_DB_CS0_u16_W0(const uint32_t A, uint32_t& DB, const boo const uint8_t IOGA_A = (A >> 1) & 0x3F; - DB = (DB & 0xFFFF0000) | 0xFF00 | STVIO_ReadIOGA(SH7095_mem_timestamp, IOGA_A); + *DB = (*DB & 0xFFFF0000) | 0xFF00 | STVIO_ReadIOGA(SH7095_mem_timestamp, IOGA_A); return; } @@ -800,22 +816,22 @@ static INLINE void BusRW_DB_CS0_u16_W0(const uint32_t A, uint32_t& DB, const boo * ladder to SCU_FromSH2_BusRW_DB_* collapses to one * direct named call per variant. */ -static INLINE void BusRW_DB_CS12_u8_W0(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS12_u8_W0(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // // CS1 and CS2: SCU // - DB = 0; + *DB = 0; /* Phase-8q3: sizeof(T) + IsWrite fold at BusRW_DB_CS12 * template instantiation. */ { - SCU_FromSH2_BusRW_DB_u8_W0 (A, &DB, SH2DMAHax); + SCU_FromSH2_BusRW_DB_u8_W0 (A, DB, SH2DMAHax); } } -static INLINE void BusRW_DB_CS12_u8_W1(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS12_u8_W1(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // @@ -825,26 +841,26 @@ static INLINE void BusRW_DB_CS12_u8_W1(const uint32_t A, uint32_t& DB, const boo /* Phase-8q3: sizeof(T) + IsWrite fold at BusRW_DB_CS12 * template instantiation. */ { - SCU_FromSH2_BusRW_DB_u8_W1 (A, &DB, SH2DMAHax); + SCU_FromSH2_BusRW_DB_u8_W1 (A, DB, SH2DMAHax); } } -static INLINE void BusRW_DB_CS12_u16_W0(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS12_u16_W0(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // // CS1 and CS2: SCU // - DB = 0; + *DB = 0; /* Phase-8q3: sizeof(T) + IsWrite fold at BusRW_DB_CS12 * template instantiation. */ { - SCU_FromSH2_BusRW_DB_u16_W0(A, &DB, SH2DMAHax); + SCU_FromSH2_BusRW_DB_u16_W0(A, DB, SH2DMAHax); } } -static INLINE void BusRW_DB_CS12_u16_W1(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS12_u16_W1(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // @@ -854,26 +870,26 @@ static INLINE void BusRW_DB_CS12_u16_W1(const uint32_t A, uint32_t& DB, const bo /* Phase-8q3: sizeof(T) + IsWrite fold at BusRW_DB_CS12 * template instantiation. */ { - SCU_FromSH2_BusRW_DB_u16_W1(A, &DB, SH2DMAHax); + SCU_FromSH2_BusRW_DB_u16_W1(A, DB, SH2DMAHax); } } -static INLINE void BusRW_DB_CS12_u32_W0(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS12_u32_W0(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // // CS1 and CS2: SCU // - DB = 0; + *DB = 0; /* Phase-8q3: sizeof(T) + IsWrite fold at BusRW_DB_CS12 * template instantiation. */ { - SCU_FromSH2_BusRW_DB_u32_W0(A, &DB, SH2DMAHax); + SCU_FromSH2_BusRW_DB_u32_W0(A, DB, SH2DMAHax); } } -static INLINE void BusRW_DB_CS12_u32_W1(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS12_u32_W1(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // @@ -883,7 +899,7 @@ static INLINE void BusRW_DB_CS12_u32_W1(const uint32_t A, uint32_t& DB, const bo /* Phase-8q3: sizeof(T) + IsWrite fold at BusRW_DB_CS12 * template instantiation. */ { - SCU_FromSH2_BusRW_DB_u32_W1(A, &DB, SH2DMAHax); + SCU_FromSH2_BusRW_DB_u32_W1(A, DB, SH2DMAHax); } } @@ -891,7 +907,7 @@ static INLINE void BusRW_DB_CS12_u32_W1(const uint32_t A, uint32_t& DB, const bo /* Phase-8r2: BusRW_DB_CS3 retired into 6 named variants * via source-fold. */ -static INLINE void BusRW_DB_CS3_u8_W0(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS3_u8_W0(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // @@ -900,17 +916,17 @@ static INLINE void BusRW_DB_CS3_u8_W0(const uint32_t A, uint32_t& DB, const bool // Timing is handled in BSC_BusWrite() and BSC_BusRead() in sh7095.inc // { - /* ne16_rwbo_be(WorkRAMH, byte_off, &DB) folded: + /* ne16_rwbo_be(WorkRAMH, byte_off, DB) folded: * aligned uint32_t BE bus read or write over uint16_t array. Two * uint16_t halves: upper at index, lower at index+1. Same on * BE and LE hosts (host-endian uint16s combined in MSB-first * order). */ const uint32_t idx_ = (A & 0xFFFFC) >> 1; - DB = ((uint32_t)WorkRAMH[idx_] << 16) | WorkRAMH[idx_ + 1]; + *DB = ((uint32_t)WorkRAMH[idx_] << 16) | WorkRAMH[idx_ + 1]; } } -static INLINE void BusRW_DB_CS3_u8_W1(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS3_u8_W1(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // @@ -922,7 +938,7 @@ static INLINE void BusRW_DB_CS3_u8_W1(const uint32_t A, uint32_t& DB, const bool /* ne16_wbo_be(WorkRAMH, byte_off, val) folded. T is uint8_t * or uint16_t here (uint32_t caught above). */ const uint32_t boff_ = A & 0xFFFFF; - const uint8_t val_ = DB >> (((A & 3) ^ (4 - 1)) << 3); + const uint8_t val_ = *DB >> (((A & 3) ^ (4 - 1)) << 3); { #ifdef MSB_FIRST ((uint8_t*)WorkRAMH)[boff_] = val_; @@ -933,7 +949,7 @@ static INLINE void BusRW_DB_CS3_u8_W1(const uint32_t A, uint32_t& DB, const bool } } -static INLINE void BusRW_DB_CS3_u16_W0(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS3_u16_W0(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // @@ -942,17 +958,17 @@ static INLINE void BusRW_DB_CS3_u16_W0(const uint32_t A, uint32_t& DB, const boo // Timing is handled in BSC_BusWrite() and BSC_BusRead() in sh7095.inc // { - /* ne16_rwbo_be(WorkRAMH, byte_off, &DB) folded: + /* ne16_rwbo_be(WorkRAMH, byte_off, DB) folded: * aligned uint32_t BE bus read or write over uint16_t array. Two * uint16_t halves: upper at index, lower at index+1. Same on * BE and LE hosts (host-endian uint16s combined in MSB-first * order). */ const uint32_t idx_ = (A & 0xFFFFC) >> 1; - DB = ((uint32_t)WorkRAMH[idx_] << 16) | WorkRAMH[idx_ + 1]; + *DB = ((uint32_t)WorkRAMH[idx_] << 16) | WorkRAMH[idx_ + 1]; } } -static INLINE void BusRW_DB_CS3_u16_W1(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS3_u16_W1(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // @@ -964,12 +980,12 @@ static INLINE void BusRW_DB_CS3_u16_W1(const uint32_t A, uint32_t& DB, const boo /* ne16_wbo_be(WorkRAMH, byte_off, val) folded. T is uint8_t * or uint16_t here (uint32_t caught above). */ const uint32_t boff_ = A & 0xFFFFF; - const uint16_t val_ = DB >> (((A & 3) ^ (4 - 2)) << 3); + const uint16_t val_ = *DB >> (((A & 3) ^ (4 - 2)) << 3); WorkRAMH[boff_ >> 1] = val_; } } -static INLINE void BusRW_DB_CS3_u32_W0(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS3_u32_W0(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // @@ -978,17 +994,17 @@ static INLINE void BusRW_DB_CS3_u32_W0(const uint32_t A, uint32_t& DB, const boo // Timing is handled in BSC_BusWrite() and BSC_BusRead() in sh7095.inc // { - /* ne16_rwbo_be(WorkRAMH, byte_off, &DB) folded: + /* ne16_rwbo_be(WorkRAMH, byte_off, DB) folded: * aligned uint32_t BE bus read or write over uint16_t array. Two * uint16_t halves: upper at index, lower at index+1. Same on * BE and LE hosts (host-endian uint16s combined in MSB-first * order). */ const uint32_t idx_ = (A & 0xFFFFC) >> 1; - DB = ((uint32_t)WorkRAMH[idx_] << 16) | WorkRAMH[idx_ + 1]; + *DB = ((uint32_t)WorkRAMH[idx_] << 16) | WorkRAMH[idx_ + 1]; } } -static INLINE void BusRW_DB_CS3_u32_W1(const uint32_t A, uint32_t& DB, const bool BurstHax, int32_t* SH2DMAHax) +static INLINE void BusRW_DB_CS3_u32_W1(const uint32_t A, uint32_t* DB, const bool BurstHax, int32_t* SH2DMAHax) { // @@ -997,15 +1013,15 @@ static INLINE void BusRW_DB_CS3_u32_W1(const uint32_t A, uint32_t& DB, const boo // Timing is handled in BSC_BusWrite() and BSC_BusRead() in sh7095.inc // { - /* ne16_rwbo_be(WorkRAMH, byte_off, &DB) folded: + /* ne16_rwbo_be(WorkRAMH, byte_off, DB) folded: * aligned uint32_t BE bus read or write over uint16_t array. Two * uint16_t halves: upper at index, lower at index+1. Same on * BE and LE hosts (host-endian uint16s combined in MSB-first * order). */ const uint32_t idx_ = (A & 0xFFFFC) >> 1; { - WorkRAMH[idx_ + 0] = DB >> 16; - WorkRAMH[idx_ + 1] = DB; + WorkRAMH[idx_ + 0] = *DB >> 16; + WorkRAMH[idx_ + 1] = *DB; } } } @@ -1060,8 +1076,8 @@ static MDFN_COLD void CheatMemWrite(uint32_t A, uint8_t V) * extern "C" SH7095_{M,S}_DMA_Update helpers below that wrap the * C++-only SH7095_DMA_Update(&CPU[c], et) method dispatch. */ -extern "C" int32_t SH7095_M_DMA_Update(int32_t et) { return SH7095_DMA_Update(&CPU[0], et); } -extern "C" int32_t SH7095_S_DMA_Update(int32_t et) { return SH7095_DMA_Update(&CPU[1], et); } +int32_t SH7095_M_DMA_Update(int32_t et) { return SH7095_DMA_Update(&CPU[0], et); } +int32_t SH7095_S_DMA_Update(int32_t et) { return SH7095_DMA_Update(&CPU[1], et); } /* ForceEventUpdates stays in ss.cpp -- the first loop dispatches into * SH7095_ForceInternalEventUpdates(&CPU[c]), which is an SH7095 class method @@ -1193,22 +1209,22 @@ static NO_INLINE MDFN_HOT int32_t RunLoop_NoICache(EmulateSpecStruct* espec) * first loop calls CPU[c].ForceInternalEventUpdates (an SH7095 class * method); SH7095_{M,S}_AdjustTS wraps CPU[0/1].AdjustTS. All four * retire once the SH7095 class becomes a C struct. */ -extern "C" int32_t SS_RunLoop_ICache(EmulateSpecStruct* espec) { return RunLoop_ICache(espec); } -extern "C" int32_t SS_RunLoop_NoICache(EmulateSpecStruct* espec) { return RunLoop_NoICache(espec); } -extern "C" void SS_ForceEventUpdates(int32_t timestamp) { ForceEventUpdates(timestamp); } -extern "C" void SH7095_M_AdjustTS(int32_t delta) { SH7095_AdjustTS(&CPU[0], delta); } -extern "C" void SH7095_S_AdjustTS(int32_t delta) { SH7095_AdjustTS(&CPU[1], delta); } +int32_t SS_RunLoop_ICache(EmulateSpecStruct* espec) { return RunLoop_ICache(espec); } +int32_t SS_RunLoop_NoICache(EmulateSpecStruct* espec) { return RunLoop_NoICache(espec); } +void SS_ForceEventUpdates(int32_t timestamp) { ForceEventUpdates(timestamp); } +void SH7095_M_AdjustTS(int32_t delta) { SH7095_AdjustTS(&CPU[0], delta); } +void SH7095_S_AdjustTS(int32_t delta) { SH7095_AdjustTS(&CPU[1], delta); } /* Phase-7f: SH7095 wrappers used by InitCommon (Init / SetMD5 / * TruePowerOn) and SS_Reset (TruePowerOn / Reset). Retires when * SH7095 becomes a C struct. */ -extern "C" MDFN_COLD void SH7095_M_Init(const bool emumode_full, const bool emumode_cb_only) { SH7095_Init(&CPU[0], emumode_full, emumode_cb_only); } -extern "C" MDFN_COLD void SH7095_S_Init(const bool emumode_full, const bool emumode_cb_only) { SH7095_Init(&CPU[1], emumode_full, emumode_cb_only); } -extern "C" void SH7095_M_SetMD5(bool level) { SH7095_SetMD5(&CPU[0], level); } -extern "C" void SH7095_S_SetMD5(bool level) { SH7095_SetMD5(&CPU[1], level); } -extern "C" MDFN_COLD void SH7095_M_TruePowerOn(void) { SH7095_TruePowerOn(&CPU[0]); } -extern "C" MDFN_COLD void SH7095_S_TruePowerOn(void) { SH7095_TruePowerOn(&CPU[1]); } -extern "C" MDFN_COLD void SH7095_M_Reset(bool power_on_reset) { SH7095_Reset(&CPU[0], power_on_reset, false); } +MDFN_COLD void SH7095_M_Init(const bool emumode_full, const bool emumode_cb_only) { SH7095_Init(&CPU[0], emumode_full, emumode_cb_only); } +MDFN_COLD void SH7095_S_Init(const bool emumode_full, const bool emumode_cb_only) { SH7095_Init(&CPU[1], emumode_full, emumode_cb_only); } +void SH7095_M_SetMD5(bool level) { SH7095_SetMD5(&CPU[0], level); } +void SH7095_S_SetMD5(bool level) { SH7095_SetMD5(&CPU[1], level); } +MDFN_COLD void SH7095_M_TruePowerOn(void) { SH7095_TruePowerOn(&CPU[0]); } +MDFN_COLD void SH7095_S_TruePowerOn(void) { SH7095_TruePowerOn(&CPU[1]); } +MDFN_COLD void SH7095_M_Reset(bool power_on_reset) { SH7095_Reset(&CPU[0], power_on_reset, false); } // @@ -1238,22 +1254,22 @@ extern "C" MDFN_COLD void SH7095_M_Reset(bool power_on_reset) * through extern "C" wrappers below; those wrappers retire * when the SH7095 class becomes a C struct. */ -extern "C" void SH7095_M_StateAction(StateMem* sm, const unsigned load, const bool data_only, const char* sname) +void SH7095_M_StateAction(StateMem* sm, const unsigned load, const bool data_only, const char* sname) { SH7095_StateAction(&CPU[0], sm, load, data_only, sname); } -extern "C" void SH7095_S_StateAction(StateMem* sm, const unsigned load, const bool data_only, const char* sname) +void SH7095_S_StateAction(StateMem* sm, const unsigned load, const bool data_only, const char* sname) { SH7095_StateAction(&CPU[1], sm, load, data_only, sname); } -extern "C" void SH7095_M_PostStateLoad(const unsigned load, bool prev_NeedEmuICache, bool current_NeedEmuICache) +void SH7095_M_PostStateLoad(const unsigned load, bool prev_NeedEmuICache, bool current_NeedEmuICache) { SH7095_PostStateLoad(&CPU[0], load, prev_NeedEmuICache, current_NeedEmuICache); } -extern "C" void SH7095_S_PostStateLoad(const unsigned load, bool prev_NeedEmuICache, bool current_NeedEmuICache) +void SH7095_S_PostStateLoad(const unsigned load, bool prev_NeedEmuICache, bool current_NeedEmuICache) { SH7095_PostStateLoad(&CPU[1], load, prev_NeedEmuICache, current_NeedEmuICache); } diff --git a/mednafen/ss/ss.h b/mednafen/ss/ss.h index e5d18bd2..eb6c4d26 100644 --- a/mednafen/ss/ss.h +++ b/mednafen/ss/ss.h @@ -26,11 +26,9 @@ #include "../math_ops.h" #include #include -/* C inclusion (for future C-converted SS modules) needs the stdbool +/* C inclusion (for future C-converted SS modules) needs the boolean * keyword macros; C++ has `bool` built in. */ -#ifndef __cplusplus -#include -#endif +#include /* SS_EVENT_*, HORRIBLEHACK_*, event_list_entry: shared verbatim with the C-converted modules (vdp1.c, ...). Single source of truth -- see header. */ diff --git a/mednafen/ss/ss_init.c b/mednafen/ss/ss_init.c index 711b790f..a839186b 100644 --- a/mednafen/ss/ss_init.c +++ b/mednafen/ss/ss_init.c @@ -32,12 +32,12 @@ */ #include -#include #include #include #include +#include -#include +#include "../mednafen-types.h" /* mednafen.h pulls in git.h which #includes -- C++-only. * The only thing ss_init.c needs from mednafen.h is the _() identity * macro for translation strings. Define it directly. */ @@ -62,8 +62,8 @@ #include "../mempatcher.h" #include "../settings.h" #include "../hash/sha256.h" -#include -#include +#include "../emuspec.h" +#include "../mdfn_gameinfo.h" #include "../general.h" /* MDFN_MidSync, log_cb (via cdstream.h) */ #include "../../libretro_settings.h" /* setting_midsync, setting_multitap_port*, * retro_base_directory */ diff --git a/mednafen/ss/ss_init.h b/mednafen/ss/ss_init.h index ca3d836a..b8dc8555 100644 --- a/mednafen/ss/ss_init.h +++ b/mednafen/ss/ss_init.h @@ -28,11 +28,9 @@ #define __MDFN_SS_INIT_H #include -#ifndef __cplusplus -#include -#endif +#include -#include +#include "../mednafen-types.h" #include #include diff --git a/mednafen/ss/ss_state.c b/mednafen/ss/ss_state.c index 3f5c4631..6d76fcd4 100644 --- a/mednafen/ss/ss_state.c +++ b/mednafen/ss/ss_state.c @@ -26,13 +26,13 @@ */ #include -#include #include #include +#include -#include -#include -#include +#include "../mednafen-types.h" +#include "../state.h" +#include "../hash/sha256.h" #include "ss.h" #include "ss_state.h" #include "ss_init.h" /* events[], next_event_ts, InitEvents */ diff --git a/mednafen/ss/ss_state.h b/mednafen/ss/ss_state.h index d912c9ef..7f2151f4 100644 --- a/mednafen/ss/ss_state.h +++ b/mednafen/ss/ss_state.h @@ -28,7 +28,7 @@ #define __MDFN_SS_STATE_H #include -#include +#include "../mednafen-types.h" #ifdef __cplusplus extern "C" { diff --git a/mednafen/ss/stvio.c b/mednafen/ss/stvio.c index 88ef6ca2..d236e703 100644 --- a/mednafen/ss/stvio.c +++ b/mednafen/ss/stvio.c @@ -20,14 +20,14 @@ */ #include -#include #include #include #include #include +#include -#include /* MDFN_HOT, MDFN_COLD, MDFN_HIDE */ -#include /* crc16_ccitt */ +#include "../mednafen-types.h" /* MDFN_HOT, MDFN_COLD, MDFN_HIDE */ +#include "../hash/crc.h" /* crc16_ccitt */ #include "ak93c45.h" diff --git a/mednafen/ss/stvio.h b/mednafen/ss/stvio.h index 9b5c0a9f..4e466975 100644 --- a/mednafen/ss/stvio.h +++ b/mednafen/ss/stvio.h @@ -23,17 +23,14 @@ #define __MDFN_SS_STVIO_H #include -#ifndef __cplusplus -#include -#endif - -#include +#include +#include "../state.h" #include "../cdstream.h" #include "smpc_iodevice.h" /* stvio only needs STVGameInfo and STV_* enums; both live in - * db_stv.h which is pure C (stdint+stdbool). db.h is C-clean too + * db_stv.h which is pure C (stdint+boolean). db.h is C-clean too * (was made so when the std::-using DB_GetHHDescriptions / * DB_GetInternalDB functions were dropped as dead code), but * db_stv.h is the narrower include matching actual usage. */ diff --git a/mednafen/ss/vdp1.h b/mednafen/ss/vdp1.h index 6d8955ee..dd9f2672 100644 --- a/mednafen/ss/vdp1.h +++ b/mednafen/ss/vdp1.h @@ -23,13 +23,11 @@ #define __MDFN_SS_VDP1_H #include -#ifndef __cplusplus -#include -#endif +#include -#include #include -#include +#include "../mednafen-types.h" +#include "../state.h" /* Formerly `namespace VDP1`. Converted to C: the namespace is removed and every exported symbol gets a VDP1_ prefix. sscpu_timestamp_t diff --git a/mednafen/ss/vdp1_common.h b/mednafen/ss/vdp1_common.h index 7899bac6..3155dfcd 100644 --- a/mednafen/ss/vdp1_common.h +++ b/mednafen/ss/vdp1_common.h @@ -15,11 +15,9 @@ #include #include -#ifndef __cplusplus -#include -#endif -#include +#include #include +#include "../mednafen-types.h" #include "vdp1.h" /* Internal shorthand: map unqualified names to VDP1_-prefixed globals. diff --git a/mednafen/ss/vdp1_line.c b/mednafen/ss/vdp1_line.c index b834ccc7..6797929b 100644 --- a/mednafen/ss/vdp1_line.c +++ b/mednafen/ss/vdp1_line.c @@ -20,7 +20,7 @@ */ #include "vdp1_common.h" -#include +#include "../math_ops.h" /* Line: AA=0, Textured=0, ECD=0, HalfFGEn=(c&0x2) This matches the original DrawLine instantiation. */ diff --git a/mednafen/ss/vdp1_poly.c b/mednafen/ss/vdp1_poly.c index af250f5d..2a80957f 100644 --- a/mednafen/ss/vdp1_poly.c +++ b/mednafen/ss/vdp1_poly.c @@ -20,7 +20,7 @@ */ #include "vdp1_common.h" -#include +#include "../math_ops.h" /* Polygon: AA=1, Textured=0, ECD=0, HalfFGEn=(c&0x2) */ #define VDP1_DL_POLY_GEN(die, bpp8, b, c) \ diff --git a/mednafen/ss/vdp1_sprite.c b/mednafen/ss/vdp1_sprite.c index 68f7b7cd..edcb4bf7 100644 --- a/mednafen/ss/vdp1_sprite.c +++ b/mednafen/ss/vdp1_sprite.c @@ -20,7 +20,7 @@ */ #include "vdp1_common.h" -#include +#include "../math_ops.h" //#pragma GCC optimize("Os,no-crossjumping") diff --git a/mednafen/ss/vdp2.h b/mednafen/ss/vdp2.h index 49edc537..b5ebcdad 100644 --- a/mednafen/ss/vdp2.h +++ b/mednafen/ss/vdp2.h @@ -22,7 +22,7 @@ #ifndef __MDFN_SS_VDP2_H #define __MDFN_SS_VDP2_H -#include +#include "../state.h" #include "ss.h" /* sscpu_timestamp_t, events[], SS_SetEventNT, SS_EVENT_VDP2 */ /* MDFNGI / EmulateSpecStruct: forward-declared rather than pulling diff --git a/mednafen/ss/vdp2_render.c b/mednafen/ss/vdp2_render.c index bca3da75..ed71e5a5 100644 --- a/mednafen/ss/vdp2_render.c +++ b/mednafen/ss/vdp2_render.c @@ -26,8 +26,8 @@ #include "ss.h" #include "ss_memory.h" -#include -#include +#include "../emuspec.h" +#include "../mdfn_gameinfo.h" #include "vdp2_common.h" #include "vdp2_render.h" @@ -2055,20 +2055,23 @@ static MDFN_FORCE_INLINE void DrawCell8_BPP4(uint64_t* out, \ if(((ZMCTL >> (n << 3)) & 0x3) && VCSEn) \ { \ + /* CACHE FIX (#71): tile_vrb depends on (celly = iy & 7) in tile mode and \ + * varies bit-wise on (ix, iy) in bitmap mode, neither captured by \ + * (celli, cellj) alone. Key extended to full `iy` (catches iy & 7); bitmap \ + * mode bypassed via compile-time `(BMEN) ||` in the predicate. */ \ uint32_t prev_celli = ~0u; \ - uint32_t prev_cellj = ~0u; \ + uint32_t prev_iy = ~0u; \ \ for(unsigned i = 0; MDFN_LIKELY(i < w); i++) \ { \ const uint32_t ix = xc >> 8; \ iy = LB.vcscr[n][i >> 3]; \ const uint32_t celli = ix >> 3; \ - const uint32_t cellj = iy >> 3; \ \ - if(celli != prev_celli || cellj != prev_cellj) \ + if((BMEN) || celli != prev_celli || iy != prev_iy) \ { \ prev_celli = celli; \ - prev_cellj = cellj; \ + prev_iy = iy; \ TF_NR_FETCH(&tf, BPP, (BMEN), ix, iy); \ } \ /* */ \ @@ -2752,9 +2755,11 @@ static void SetupRotVars(const struct VDP2Rend_RotVars* rs, const unsigned rbg_w \ MAKE_SFCODE_LUT((PMODE), (CCMODE), (rn ? 0 : 4), sfcode_lut); \ \ + /* CACHE FIX (#71): key cache on full `iy` (tile-mode safe; catches iy & 7); bypass \ + * entirely in bitmap mode via `(BMEN) ||` in the predicate. */ \ unsigned prev_ab = ~0u; \ uint32_t prev_celli = ~0u; \ - uint32_t prev_cellj = ~0u; \ + uint32_t prev_iy = ~0u; \ bool prev_rot_tp_f = false; \ \ for(unsigned i = 0; MDFN_LIKELY(i < w); i++) \ @@ -2788,13 +2793,12 @@ static void SetupRotVars(const struct VDP2Rend_RotVars* rs, const unsigned rbg_w const uint32_t iy = (r->Yp + (uint32_t)(((int64_t)ky * (int32_t)(r->Ysp + (r->dY * i))) >> 16)) >> 10; \ \ const uint32_t celli = ix >> 3; \ - const uint32_t cellj = iy >> 3; \ \ - if(ab != prev_ab || celli != prev_celli || cellj != prev_cellj) \ + if((BMEN) || ab != prev_ab || celli != prev_celli || iy != prev_iy) \ { \ prev_ab = ab; \ prev_celli = celli; \ - prev_cellj = cellj; \ + prev_iy = iy; \ prev_rot_tp_f = TF_ROT_FETCH(tf, BPP, (BMEN), ix, iy); \ } \ rot_tp |= prev_rot_tp_f; \ @@ -2929,8 +2933,9 @@ static void (*DrawRBG[2 /*bitmap enable*/][5/*col mode*/][2/*igntp*/][3/*priomod const uint32_t r_base_c = r->base_coeff; \ const uint8_t ktctl_md = (KTCTL[const_ab] >> 2) & 0x3; \ \ + /* CACHE FIX (#71): see SetupRotVars sibling above. */ \ uint32_t prev_celli = ~0u; \ - uint32_t prev_cellj = ~0u; \ + uint32_t prev_iy = ~0u; \ bool prev_rot_tp_f = false; \ \ /* Strength-reduce the per-pixel (r_dX * i) / (r_dY * i) into running \ @@ -2977,12 +2982,11 @@ static void (*DrawRBG[2 /*bitmap enable*/][5/*col mode*/][2/*igntp*/][3/*priomod arg_y_u += r_dY_u; \ \ const uint32_t celli = ix >> 3; \ - const uint32_t cellj = iy >> 3; \ \ - if(celli != prev_celli || cellj != prev_cellj) \ + if(BMEN || celli != prev_celli || iy != prev_iy) \ { \ prev_celli = celli; \ - prev_cellj = cellj; \ + prev_iy = iy; \ prev_rot_tp_f = TF_ROT_FETCH(tf, BPP, BMEN, ix, iy); \ } \ rot_tp |= prev_rot_tp_f; \ diff --git a/mednafen/ss/vdp2_render.h b/mednafen/ss/vdp2_render.h index 6f1326a4..4e610790 100644 --- a/mednafen/ss/vdp2_render.h +++ b/mednafen/ss/vdp2_render.h @@ -22,7 +22,7 @@ #ifndef __MDFN_SS_VDP2_RENDER_H #define __MDFN_SS_VDP2_RENDER_H -#include +#include "../state.h" /* git.h is C++-only (CheatFormatStruct's std::exception, * GameDB_Entry's std::vector, etc.). This header now needs to * parse as C because vdp2.c (formerly vdp2.cpp) includes it. diff --git a/mednafen/state.h b/mednafen/state.h index 2cd2a4fb..16129d96 100644 --- a/mednafen/state.h +++ b/mednafen/state.h @@ -25,9 +25,7 @@ #include #include #include -#ifndef __cplusplus -#include -#endif +#include typedef struct {