From 14585f82138922a2d237dc22a6098093565dd397 Mon Sep 17 00:00:00 2001 From: toxieainc Date: Sun, 29 Sep 2024 21:33:45 +0200 Subject: [PATCH 1/3] improve SuperSleepUntil implementation: 1) as found in lots of experiments done for the VPX and PinMAME projects, Sleep() on windows can oversleep for over 1ms, especially when doing Sleep(>1) 2) thus loop Sleep(1) and end if its less than 2ms 3) in the spin to wait for the rest of the time, insert yield(=_mm_pause) or the determined by the Rust devs arm64 equivalent this actually gets rid of micro-stutter on my AMD based minipc e.g. in Daytona2 then also use same Sleep() implementation in the network code (to avoid potential sideeffects between the 2 implementations) --- Src/Network/SimNetBoard.cpp | 8 +++----- Src/Network/TCPReceive.cpp | 12 ++++-------- Src/OSD/SDL/Main.cpp | 39 ++++++++++++++++++++++++------------- 3 files changed, 32 insertions(+), 27 deletions(-) diff --git a/Src/Network/SimNetBoard.cpp b/Src/Network/SimNetBoard.cpp index 36635139..26cacd08 100644 --- a/Src/Network/SimNetBoard.cpp +++ b/Src/Network/SimNetBoard.cpp @@ -20,10 +20,10 @@ ** with Supermodel. If not, see . **/ -#include #include #include "Supermodel.h" #include "SimNetBoard.h" +#include // these make 16-bit read/writes much neater #define RAM16 *(uint16_t*)&RAM @@ -527,8 +527,6 @@ void CSimNetBoard::GetGame(const Game& gameInfo) void CSimNetBoard::ConnectProc(void) { - using namespace std::chrono_literals; - if (m_connected) return; @@ -546,7 +544,7 @@ void CSimNetBoard::ConnectProc(void) { if (m_quit) return; - std::this_thread::sleep_for(1ms); + CThread::Sleep(1); } printf("Successfully connected.\n"); @@ -624,4 +622,4 @@ void CSimNetBoard::WriteIORegister(unsigned reg, uint16_t data) default: ErrorLog("write to unknown IO register 0x%02x", reg); } -} \ No newline at end of file +} diff --git a/Src/Network/TCPReceive.cpp b/Src/Network/TCPReceive.cpp index 6e235c65..4b3dcdc4 100644 --- a/Src/Network/TCPReceive.cpp +++ b/Src/Network/TCPReceive.cpp @@ -22,9 +22,7 @@ #include "TCPReceive.h" #include "OSD/Logger.h" -#include - -using namespace std::chrono_literals; +#include "OSD/Thread.h" #if defined(_DEBUG) #include @@ -98,9 +96,7 @@ std::vector& TCPReceive::Receive() } int size = 0; - int result = 0; - - result = SDLNet_TCP_Recv(m_receiveSocket, &size, sizeof(int)); + int result = SDLNet_TCP_Recv(m_receiveSocket, &size, sizeof(int)); DPRINTF("Received %i bytes\n", result); if (result <= 0) { SDLNet_TCP_Close(m_receiveSocket); @@ -130,7 +126,7 @@ void TCPReceive::ListenFunc() { while (m_running) { - std::this_thread::sleep_for(16ms); + CThread::Sleep(16); if (m_receiveSocket) continue; auto socket = SDLNet_TCP_Accept(m_listenSocket); @@ -156,4 +152,4 @@ void TCPReceive::ListenFunc() bool TCPReceive::Connected() { return (m_receiveSocket != 0); -} \ No newline at end of file +} diff --git a/Src/OSD/SDL/Main.cpp b/Src/OSD/SDL/Main.cpp index 021df00c..73f47380 100644 --- a/Src/OSD/SDL/Main.cpp +++ b/Src/OSD/SDL/Main.cpp @@ -102,6 +102,10 @@ #include "Crosshair.h" +#if (defined(_M_X64) || defined(__x86_64__)) +#include +#endif + /****************************************************************************** Global Run-time Config ******************************************************************************/ @@ -873,33 +877,40 @@ static uint64_t GetDesiredRefreshRateMilliHz() return refreshRateMilliHz; } -static void SuperSleepUntil(uint64_t target) +static void SuperSleepUntil(const uint64_t target) { uint64_t time = SDL_GetPerformanceCounter(); // If we're ahead of the target, we're done - if (time > target) + if (time >= target) { return; } - // Compute the whole number of millis to sleep. Because OS sleep is not accurate, - // we actually sleep for one less and will spin-wait for the final millisecond. - int32_t numWholeMillisToSleep = int32_t((target - time) * 1000 / s_perfCounterFrequency); - numWholeMillisToSleep -= 1; - if (numWholeMillisToSleep > 0) + // Because OS sleep is not accurate, + // we actually sleep until a maximum of 2 milliseconds are left. + while (int64_t(target - time) * 1000 > 2 * int64_t(s_perfCounterFrequency)) { - SDL_Delay(numWholeMillisToSleep); + SDL_Delay(1); + time = SDL_GetPerformanceCounter(); } // Spin until requested time - volatile uint64_t now; - int32_t remain; + int64_t remain; do { - now = SDL_GetPerformanceCounter(); - remain = int32_t((target - now)); - } while (remain>0); + // according to all available processor documentation for x86 and arm, + // spinning should pause the processor for a short while for better + // power efficiency and (surprisingly) overall faster system performance +#ifdef _WIN32 + YieldProcessor(); +#elif (defined(_M_X64) || defined(__x86_64__)) + _mm_pause(); +#elif (defined(_M_ARM64) || defined(__aarch64__)) + __asm__ __volatile__("isb\n"); // as researched by Rust devs +#endif + remain = target - SDL_GetPerformanceCounter(); + } while (remain > 0); } @@ -977,7 +988,7 @@ int Supermodel(const Game &game, ROMSet *rom_set, IEmulator *Model3, CInputs *In if (gameHasLightguns) videoInputs = Inputs; else - videoInputs = NULL; + videoInputs = nullptr; // Attach the inputs to the emulator Model3->AttachInputs(Inputs); From ae293fe38604f1dc886b262c82137c9defe1d00c Mon Sep 17 00:00:00 2001 From: toxieainc Date: Mon, 30 Sep 2024 08:27:49 +0200 Subject: [PATCH 2/3] use SDL_CPUPauseInstruction --- Src/OSD/SDL/Main.cpp | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/Src/OSD/SDL/Main.cpp b/Src/OSD/SDL/Main.cpp index 73f47380..11aed8d1 100644 --- a/Src/OSD/SDL/Main.cpp +++ b/Src/OSD/SDL/Main.cpp @@ -102,10 +102,6 @@ #include "Crosshair.h" -#if (defined(_M_X64) || defined(__x86_64__)) -#include -#endif - /****************************************************************************** Global Run-time Config ******************************************************************************/ @@ -902,13 +898,7 @@ static void SuperSleepUntil(const uint64_t target) // according to all available processor documentation for x86 and arm, // spinning should pause the processor for a short while for better // power efficiency and (surprisingly) overall faster system performance -#ifdef _WIN32 - YieldProcessor(); -#elif (defined(_M_X64) || defined(__x86_64__)) - _mm_pause(); -#elif (defined(_M_ARM64) || defined(__aarch64__)) - __asm__ __volatile__("isb\n"); // as researched by Rust devs -#endif + SDL_CPUPauseInstruction(); remain = target - SDL_GetPerformanceCounter(); } while (remain > 0); } From 518e3df4dd48dfedcc499c5d1bbbb3b894e0dea7 Mon Sep 17 00:00:00 2001 From: toxieainc Date: Mon, 30 Sep 2024 22:28:58 +0200 Subject: [PATCH 3/3] only use SDL_CPUPauseInstruction if it exists --- Src/OSD/SDL/Main.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Src/OSD/SDL/Main.cpp b/Src/OSD/SDL/Main.cpp index 11aed8d1..bdaa34f5 100644 --- a/Src/OSD/SDL/Main.cpp +++ b/Src/OSD/SDL/Main.cpp @@ -898,7 +898,9 @@ static void SuperSleepUntil(const uint64_t target) // according to all available processor documentation for x86 and arm, // spinning should pause the processor for a short while for better // power efficiency and (surprisingly) overall faster system performance + #ifdef SDL_CPUPauseInstruction SDL_CPUPauseInstruction(); + #endif remain = target - SDL_GetPerformanceCounter(); } while (remain > 0); }