Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Optimization] Experimental: LV2: Add cpu_flag::unmem #12664

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 33 additions & 13 deletions rpcs3/Emu/CPU/CPUThread.cpp
Original file line number Diff line number Diff line change
@@ -46,6 +46,7 @@ void fmt_class_string<cpu_flag>::format(std::string& out, u64 arg)
case cpu_flag::stop: return "STOP";
case cpu_flag::exit: return "EXIT";
case cpu_flag::wait: return "w";
case cpu_flag::unmem: return "un";
case cpu_flag::temp: return "t";
case cpu_flag::pause: return "p";
case cpu_flag::suspend: return "s";
@@ -498,10 +499,10 @@ void cpu_thread::operator()()
cpu_thread* _cpu = get_current_cpu_thread();

// Wait flag isn't set asynchronously so this should be thread-safe
if (progress == 0 && _cpu->state.none_of(cpu_flag::wait + cpu_flag::temp))
if (progress == 0 && _cpu->state.none_of(cpu_flag::unmem + cpu_flag::temp + cpu_flag::wait))
{
// Operation just started and syscall is imminent
_cpu->state += cpu_flag::wait + cpu_flag::temp;
_cpu->state += cpu_flag::unmem + cpu_flag::temp;
wait_set = true;
return;
}
@@ -661,12 +662,12 @@ bool cpu_thread::check_state() noexcept

if (flags & cpu_flag::pause && s_tls_thread_slot != umax)
{
// Save value before state is saved and cpu_flag::wait is observed
// Save value before state is saved and cpu_flag::unmem is observed
if (s_tls_sctr == umax)
{
u64 ctr = g_suspend_counter;

if (flags & cpu_flag::wait)
if (flags & (cpu_flag::unmem + cpu_flag::wait))
{
if ((ctr & 3) == 2)
{
@@ -695,7 +696,6 @@ bool cpu_thread::check_state() noexcept
{
// Sticky flag, indicates check_state() is not allowed to return true
flags -= cpu_flag::temp;
flags -= cpu_flag::wait;
cpu_can_stop = false;
store = true;
}
@@ -756,19 +756,39 @@ bool cpu_thread::check_state() noexcept
return store;
}

if (flags & cpu_flag::wait)
if (flags & (cpu_flag::wait + cpu_flag::unmem))
{
flags -= cpu_flag::wait;
flags -= (cpu_flag::wait + cpu_flag::unmem);
store = true;
}

retval = false;
}
else
{
if (cpu_can_stop && !(flags & cpu_flag::wait))
if (cpu_can_stop)
{
flags += cpu_flag::wait;
if (flags & (cpu_flag::yield + cpu_flag::preempt))
{
flags -= (cpu_flag::yield + cpu_flag::preempt);
store = true;
}

if (::is_stopped(flags) == !(flags & cpu_flag::wait))
{
flags ^= cpu_flag::wait;
store = true;
}

if (!(flags & cpu_flag::unmem))
{
flags += cpu_flag::unmem;
store = true;
}
}
else if (flags & (cpu_flag::wait + cpu_flag::unmem))
{
flags -= (cpu_flag::wait + cpu_flag::unmem);
store = true;
}

@@ -780,7 +800,7 @@ bool cpu_thread::check_state() noexcept
return store;
}).first;

if (state0 & cpu_flag::preempt && cpu_can_stop)
if ((state0 - state1) & cpu_flag::preempt)
{
if (cpu_flag::wait - state0)
{
@@ -1116,7 +1136,7 @@ std::string cpu_thread::dump_misc() const
bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept
{
// Can't allow pre-set wait bit (it'd be a problem)
ensure(!_this || !(_this->state & cpu_flag::wait));
ensure(!_this || !(_this->state & (cpu_flag::unmem + cpu_flag::wait)));

do
{
@@ -1168,7 +1188,7 @@ bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept

copy = cpu_counter::for_all_cpu(copy, [&](cpu_thread* cpu, u32 /*index*/)
{
if (cpu->state.fetch_add(cpu_flag::pause) & cpu_flag::wait)
if (cpu->state.fetch_add(cpu_flag::pause) & (cpu_flag::unmem + cpu_flag::wait))
{
// Clear bits as long as wait flag is set
return false;
@@ -1182,7 +1202,7 @@ bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept
// Check only CPUs which haven't acknowledged their waiting state yet
copy = cpu_counter::for_all_cpu(copy, [&](cpu_thread* cpu, u32 /*index*/)
{
if (cpu->state & cpu_flag::wait)
if (cpu->state & (cpu_flag::unmem + cpu_flag::wait))
{
return false;
}
1 change: 1 addition & 0 deletions rpcs3/Emu/CPU/CPUThread.h
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@ enum class cpu_flag : u32
stop, // Thread not running (HLE, initial state)
exit, // Irreversible exit
wait, // Indicates waiting state, set by the thread itself
unmem, // Thread has unlocked memory mutex
temp, // Indicates that the thread cannot properly return after next check_state()
pause, // Thread suspended by suspend_all technique
suspend, // Thread suspended
14 changes: 7 additions & 7 deletions rpcs3/Emu/Cell/Modules/cellVdec.cpp
Original file line number Diff line number Diff line change
@@ -998,7 +998,7 @@ error_code cellVdecClose(ppu_thread& ppu, u32 handle)

error_code cellVdecStartSeq(ppu_thread& ppu, u32 handle)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

cellVdec.warning("cellVdecStartSeq(handle=0x%x)", handle);

@@ -1050,7 +1050,7 @@ error_code cellVdecStartSeq(ppu_thread& ppu, u32 handle)

error_code cellVdecEndSeq(ppu_thread& ppu, u32 handle)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

cellVdec.warning("cellVdecEndSeq(handle=0x%x)", handle);

@@ -1083,7 +1083,7 @@ error_code cellVdecEndSeq(ppu_thread& ppu, u32 handle)

error_code cellVdecDecodeAu(ppu_thread& ppu, u32 handle, CellVdecDecodeMode mode, vm::cptr<CellVdecAuInfo> auInfo)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

cellVdec.trace("cellVdecDecodeAu(handle=0x%x, mode=%d, auInfo=*0x%x)", handle, +mode, auInfo);

@@ -1131,7 +1131,7 @@ error_code cellVdecDecodeAu(ppu_thread& ppu, u32 handle, CellVdecDecodeMode mode

error_code cellVdecDecodeAuEx2(ppu_thread& ppu, u32 handle, CellVdecDecodeMode mode, vm::cptr<CellVdecAuInfoEx2> auInfo)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

cellVdec.todo("cellVdecDecodeAuEx2(handle=0x%x, mode=%d, auInfo=*0x%x)", handle, +mode, auInfo);

@@ -1187,7 +1187,7 @@ error_code cellVdecDecodeAuEx2(ppu_thread& ppu, u32 handle, CellVdecDecodeMode m

error_code cellVdecGetPictureExt(ppu_thread& ppu, u32 handle, vm::cptr<CellVdecPicFormat2> format, vm::ptr<u8> outBuff, u32 arg4)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

cellVdec.trace("cellVdecGetPictureExt(handle=0x%x, format=*0x%x, outBuff=*0x%x, arg4=*0x%x)", handle, format, outBuff, arg4);

@@ -1334,7 +1334,7 @@ error_code cellVdecGetPictureExt(ppu_thread& ppu, u32 handle, vm::cptr<CellVdecP

error_code cellVdecGetPicture(ppu_thread& ppu, u32 handle, vm::cptr<CellVdecPicFormat> format, vm::ptr<u8> outBuff)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

cellVdec.trace("cellVdecGetPicture(handle=0x%x, format=*0x%x, outBuff=*0x%x)", handle, format, outBuff);

@@ -1355,7 +1355,7 @@ error_code cellVdecGetPicture(ppu_thread& ppu, u32 handle, vm::cptr<CellVdecPicF

error_code cellVdecGetPicItem(ppu_thread& ppu, u32 handle, vm::pptr<CellVdecPicItem> picItem)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

cellVdec.trace("cellVdecGetPicItem(handle=0x%x, picItem=**0x%x)", handle, picItem);

2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/Modules/sys_ppu_thread_.cpp
Original file line number Diff line number Diff line change
@@ -125,7 +125,7 @@ void sys_initialize_tls(ppu_thread& ppu, u64 main_thread_id, u32 tls_seg_addr, u

error_code sys_ppu_thread_create(ppu_thread& ppu, vm::ptr<u64> thread_id, u32 entry, u64 arg, s32 prio, u32 stacksize, u64 flags, vm::cptr<char> threadname)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

sysPrxForUser.warning("sys_ppu_thread_create(thread_id=*0x%x, entry=0x%x, arg=0x%llx, prio=%d, stacksize=0x%x, flags=0x%llx, threadname=%s)",
thread_id, entry, arg, prio, stacksize, flags, threadname);
4 changes: 2 additions & 2 deletions rpcs3/Emu/Cell/PPUThread.cpp
Original file line number Diff line number Diff line change
@@ -1501,8 +1501,8 @@ void ppu_thread::cpu_sleep()
// Clear reservation
raddr = 0;

// Setup wait flag and memory flags to relock itself
state += g_use_rtm ? cpu_flag::wait : cpu_flag::wait + cpu_flag::memory;
// Setup unmem flag and memory flags to relock itself
state += g_use_rtm ? cpu_flag::unmem : cpu_flag::unmem + cpu_flag::memory;

if (auto ptr = vm::g_tls_locked)
{
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
Original file line number Diff line number Diff line change
@@ -1446,7 +1446,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)

if (out > 1500)
{
_spu->state += cpu_flag::wait;
_spu->state += cpu_flag::unmem;
std::this_thread::yield();

if (_spu->test_stopped())
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/SPURecompiler.cpp
Original file line number Diff line number Diff line change
@@ -6195,7 +6195,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

if (res > 1500 && g_cfg.core.spu_loop_detection)
{
_spu->state += cpu_flag::wait;
_spu->state += cpu_flag::unmem;
std::this_thread::yield();
static_cast<void>(_spu->test_stopped());
}
4 changes: 3 additions & 1 deletion rpcs3/Emu/Cell/SPUThread.cpp
Original file line number Diff line number Diff line change
@@ -4248,6 +4248,7 @@ s64 spu_thread::get_ch_value(u32 ch)
spu_function_logger logger(*this, "MFC Events read");

lv2_obj::prepare_for_sleep(*this);
state += cpu_flag::wait;

using resrv_ptr = std::add_pointer_t<const decltype(rdata)>;

@@ -4958,6 +4959,7 @@ bool spu_thread::stop_and_signal(u32 code)
}

lv2_obj::prepare_for_sleep(*this);
state += cpu_flag::wait;

spu_function_logger logger(*this, "sys_spu_thread_receive_event");

@@ -5057,7 +5059,7 @@ bool spu_thread::stop_and_signal(u32 code)

while (auto old = +state)
{
if (old & cpu_flag::signal && state.test_and_reset(cpu_flag::signal))
if (old & cpu_flag::signal)
{
break;
}
17 changes: 13 additions & 4 deletions rpcs3/Emu/Cell/lv2/lv2.cpp
Original file line number Diff line number Diff line change
@@ -1341,7 +1341,7 @@ bool lv2_obj::sleep_unlocked(cpu_thread& thread, u64 timeout, u64 current_time)
{
if (!(val & cpu_flag::signal))
{
val += cpu_flag::suspend;
val += cpu_flag::suspend + cpu_flag::wait;

// Flag used for forced timeout notification
ensure(!timeout || !(val & cpu_flag::notify));
@@ -1471,6 +1471,7 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
}

ppu->start_time = get_guest_system_time();
ppu->state += cpu_flag::suspend + cpu_flag::wait;
break;
}

@@ -1554,12 +1555,16 @@ bool lv2_obj::awake_unlocked(cpu_thread* cpu, s32 prio)
// Suspend threads if necessary
for (usz i = 0, thread_count = g_cfg.core.ppu_threads; target; target = target->next_ppu, i++)
{
if (i >= thread_count && cpu_flag::suspend - target->state)
if (i >= thread_count && cpu_flag::suspend - target->state && target->state.atomic_op([](bs_t<cpu_flag>& state)
{
ensure(cpu_flag::suspend - state);
state += cpu_flag::suspend;
return state.none_of(cpu_flag::wait + cpu_flag::signal);
}))
{
ppu_log.trace("suspend(): %s", target->id);
target->ack_suspend = true;
g_pending++;
ensure(!target->state.test_and_set(cpu_flag::suspend));

if (is_paused(target->state - cpu_flag::suspend))
{
@@ -1605,9 +1610,13 @@ void lv2_obj::schedule_all(u64 current_time)
if (target->state & cpu_flag::suspend)
{
ppu_log.trace("schedule(): %s", target->id);
target->state.atomic_op(FN(x += cpu_flag::signal, x -= cpu_flag::suspend));
target->start_time = 0;

if ((cpu_flag::signal + cpu_flag::wait) - target->state.fetch_op(FN(x += cpu_flag::signal, x -= cpu_flag::suspend, void())) != cpu_flag::signal)
{
continue;
}

if (notify_later_idx == std::size(g_to_notify))
{
// Out of notification slots, notify locally (resizable container is not worth it)
14 changes: 7 additions & 7 deletions rpcs3/Emu/Cell/lv2/sys_cond.cpp
Original file line number Diff line number Diff line change
@@ -72,7 +72,7 @@ void lv2_cond::save(utils::serial& ar)

error_code sys_cond_create(ppu_thread& ppu, vm::ptr<u32> cond_id, u32 mutex_id, vm::ptr<sys_cond_attribute_t> attr)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

sys_cond.warning("sys_cond_create(cond_id=*0x%x, mutex_id=0x%x, attr=*0x%x)", cond_id, mutex_id, attr);

@@ -105,7 +105,7 @@ error_code sys_cond_create(ppu_thread& ppu, vm::ptr<u32> cond_id, u32 mutex_id,

error_code sys_cond_destroy(ppu_thread& ppu, u32 cond_id)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

sys_cond.warning("sys_cond_destroy(cond_id=0x%x)", cond_id);

@@ -138,7 +138,7 @@ error_code sys_cond_destroy(ppu_thread& ppu, u32 cond_id)

error_code sys_cond_signal(ppu_thread& ppu, u32 cond_id)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

sys_cond.trace("sys_cond_signal(cond_id=0x%x)", cond_id);

@@ -176,7 +176,7 @@ error_code sys_cond_signal(ppu_thread& ppu, u32 cond_id)

error_code sys_cond_signal_all(ppu_thread& ppu, u32 cond_id)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

sys_cond.trace("sys_cond_signal_all(cond_id=0x%x)", cond_id);

@@ -224,7 +224,7 @@ error_code sys_cond_signal_all(ppu_thread& ppu, u32 cond_id)

error_code sys_cond_signal_to(ppu_thread& ppu, u32 cond_id, u32 thread_id)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

sys_cond.trace("sys_cond_signal_to(cond_id=0x%x, thread_id=0x%x)", cond_id, thread_id);

@@ -279,7 +279,7 @@ error_code sys_cond_signal_to(ppu_thread& ppu, u32 cond_id, u32 thread_id)

error_code sys_cond_wait(ppu_thread& ppu, u32 cond_id, u64 timeout)
{
ppu.state += cpu_flag::wait;
ppu.state += cpu_flag::unmem;

sys_cond.trace("sys_cond_wait(cond_id=0x%x, timeout=%lld)", cond_id, timeout);

@@ -360,7 +360,7 @@ error_code sys_cond_wait(ppu_thread& ppu, u32 cond_id, u64 timeout)

while (auto state = +ppu.state)
{
if (state & cpu_flag::signal && ppu.state.test_and_reset(cpu_flag::signal))
if (state & cpu_flag::signal)
{
break;
}
Loading