Skip to content

Commit

Permalink
Allow Short Circuiting GL Fences
Browse files Browse the repository at this point in the history
  • Loading branch information
cwfitzgerald committed Jan 18, 2025
1 parent b452cab commit 3bba040
Show file tree
Hide file tree
Showing 11 changed files with 293 additions and 142 deletions.
4 changes: 1 addition & 3 deletions deno_webgpu/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,9 +395,7 @@ pub fn op_webgpu_request_adapter(
dx12: wgpu_types::Dx12BackendOptions {
shader_compiler: wgpu_types::Dx12Compiler::Fxc,
},
gl: wgpu_types::GlBackendOptions {
gles_minor_version: wgpu_types::Gles3MinorVersion::default(),
},
gl: wgpu_types::GlBackendOptions::default(),
},
},
)));
Expand Down
3 changes: 1 addition & 2 deletions tests/src/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,14 @@ pub fn initialize_instance(backends: wgpu::Backends, force_fxc: bool) -> Instanc
} else {
wgpu::Dx12Compiler::from_env().unwrap_or(wgpu::Dx12Compiler::StaticDxc)
};
let gles_minor_version = wgpu::Gles3MinorVersion::from_env().unwrap_or_default();
Instance::new(&wgpu::InstanceDescriptor {
backends,
flags: wgpu::InstanceFlags::debugging().with_env(),
backend_options: wgpu::BackendOptions {
dx12: wgpu::Dx12BackendOptions {
shader_compiler: dx12_shader_compiler,
},
gl: wgpu::GlBackendOptions { gles_minor_version },
gl: wgpu::GlBackendOptions::from_env_or_default(),
},
})
}
Expand Down
15 changes: 9 additions & 6 deletions wgpu-hal/examples/raw-gles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,15 @@ fn main() {
println!("Hooking up to wgpu-hal");
exposed.get_or_insert_with(|| {
unsafe {
<hal::api::Gles as hal::Api>::Adapter::new_external(|name| {
// XXX: On WGL this should only be called after the context was made current
gl_config
.display()
.get_proc_address(&CString::new(name).expect(name))
})
<hal::api::Gles as hal::Api>::Adapter::new_external(
|name| {
// XXX: On WGL this should only be called after the context was made current
gl_config
.display()
.get_proc_address(&CString::new(name).expect(name))
},
wgt::GlBackendOptions::default(),
)
}
.expect("GL adapter can't be initialized")
});
Expand Down
2 changes: 2 additions & 0 deletions wgpu-hal/src/gles/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ impl super::Adapter {

pub(super) unsafe fn expose(
context: super::AdapterContext,
backend_options: wgt::GlBackendOptions,
) -> Option<crate::ExposedAdapter<super::Api>> {
let gl = context.lock();
let extensions = gl.supported_extensions();
Expand Down Expand Up @@ -824,6 +825,7 @@ impl super::Adapter {
private_caps,
workarounds,
features,
options: backend_options,
shading_language_version,
next_shader_id: Default::default(),
program_cache: Default::default(),
Expand Down
62 changes: 17 additions & 45 deletions wgpu-hal/src/gles/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::{
sync::{Arc, Mutex},
};

use crate::{AtomicFenceValue, TlasInstance};
use crate::TlasInstance;
use arrayvec::ArrayVec;
use std::sync::atomic::Ordering;

Expand Down Expand Up @@ -1523,17 +1523,12 @@ impl crate::Device for super::Device {

unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> {
self.counters.fences.add(1);
Ok(super::Fence {
last_completed: AtomicFenceValue::new(0),
pending: Vec::new(),
})
Ok(super::Fence::new(&self.shared.options))
}

unsafe fn destroy_fence(&self, fence: super::Fence) {
let gl = &self.shared.context.lock();
for (_, sync) in fence.pending {
unsafe { gl.delete_sync(sync) };
}
fence.destroy(gl);
self.counters.fences.sub(1);
}

Expand All @@ -1550,44 +1545,21 @@ impl crate::Device for super::Device {
wait_value: crate::FenceValue,
timeout_ms: u32,
) -> Result<bool, crate::DeviceError> {
if fence.last_completed.load(Ordering::Relaxed) < wait_value {
let gl = &self.shared.context.lock();
// MAX_CLIENT_WAIT_TIMEOUT_WEBGL is:
// - 1s in Gecko https://searchfox.org/mozilla-central/rev/754074e05178e017ef6c3d8e30428ffa8f1b794d/dom/canvas/WebGLTypes.h#1386
// - 0 in WebKit https://github.com/WebKit/WebKit/blob/4ef90d4672ca50267c0971b85db403d9684508ea/Source/WebCore/html/canvas/WebGL2RenderingContext.cpp#L110
// - 0 in Chromium https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/modules/webgl/webgl2_rendering_context_base.cc;l=112;drc=a3cb0ac4c71ec04abfeaed199e5d63230eca2551
let timeout_ns = if cfg!(any(webgl, Emscripten)) {
0
} else {
(timeout_ms as u64 * 1_000_000).min(!0u32 as u64)
};
if let Some(&(_, sync)) = fence
.pending
.iter()
.find(|&&(value, _)| value >= wait_value)
{
let signalled = match unsafe {
gl.client_wait_sync(sync, glow::SYNC_FLUSH_COMMANDS_BIT, timeout_ns as i32)
} {
// for some reason firefox returns WAIT_FAILED, to investigate
#[cfg(any(webgl, Emscripten))]
glow::WAIT_FAILED => {
log::warn!("wait failed!");
false
}
glow::TIMEOUT_EXPIRED => false,
glow::CONDITION_SATISFIED | glow::ALREADY_SIGNALED => true,
_ => return Err(crate::DeviceError::Lost),
};
if signalled {
fence
.last_completed
.fetch_max(wait_value, Ordering::Relaxed);
}
return Ok(signalled);
}
if fence.satisfied(wait_value) {
return Ok(true);
}
Ok(true)

let gl = &self.shared.context.lock();
// MAX_CLIENT_WAIT_TIMEOUT_WEBGL is:
// - 1s in Gecko https://searchfox.org/mozilla-central/rev/754074e05178e017ef6c3d8e30428ffa8f1b794d/dom/canvas/WebGLTypes.h#1386
// - 0 in WebKit https://github.com/WebKit/WebKit/blob/4ef90d4672ca50267c0971b85db403d9684508ea/Source/WebCore/html/canvas/WebGL2RenderingContext.cpp#L110
// - 0 in Chromium https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/modules/webgl/webgl2_rendering_context_base.cc;l=112;drc=a3cb0ac4c71ec04abfeaed199e5d63230eca2551
let timeout_ns = if cfg!(any(webgl, Emscripten)) {
0
} else {
(timeout_ms as u64 * 1_000_000).min(!0u32 as u64)
};
fence.wait(gl, wait_value, timeout_ns)
}

unsafe fn start_capture(&self) -> bool {
Expand Down
25 changes: 17 additions & 8 deletions wgpu-hal/src/gles/egl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,7 @@ struct WindowSystemInterface {
pub struct Instance {
wsi: WindowSystemInterface,
flags: wgt::InstanceFlags,
options: wgt::GlBackendOptions,
inner: Mutex<Inner>,
}

Expand Down Expand Up @@ -929,6 +930,7 @@ impl crate::Instance for Instance {
kind: wsi_kind,
},
flags: desc.flags,
options: desc.backend_options.gl.clone(),
inner: Mutex::new(inner),
})
}
Expand Down Expand Up @@ -1088,10 +1090,13 @@ impl crate::Instance for Instance {
inner.egl.unmake_current();

unsafe {
super::Adapter::expose(AdapterContext {
glow: Mutex::new(gl),
egl: Some(inner.egl.clone()),
})
super::Adapter::expose(
AdapterContext {
glow: Mutex::new(gl),
egl: Some(inner.egl.clone()),
},
self.options.clone(),
)
}
.into_iter()
.collect()
Expand All @@ -1110,13 +1115,17 @@ impl super::Adapter {
/// dropping any objects returned from this adapter.
pub unsafe fn new_external(
fun: impl FnMut(&str) -> *const ffi::c_void,
options: wgt::GlBackendOptions,
) -> Option<crate::ExposedAdapter<super::Api>> {
let context = unsafe { glow::Context::from_loader_function(fun) };
unsafe {
Self::expose(AdapterContext {
glow: Mutex::new(ManuallyDrop::new(context)),
egl: None,
})
Self::expose(
AdapterContext {
glow: Mutex::new(ManuallyDrop::new(context)),
egl: None,
},
options,
)
}
}

Expand Down
167 changes: 167 additions & 0 deletions wgpu-hal/src/gles/fence.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
use std::sync::atomic::Ordering;

use glow::HasContext;

use crate::AtomicFenceValue;

#[derive(Debug, Copy, Clone)]
struct GLFence {
sync: glow::Fence,
value: crate::FenceValue,
}

#[derive(Debug)]
pub struct Fence {
last_completed: AtomicFenceValue,
pending: Vec<GLFence>,
fence_mode: wgt::GlShortCircuitFences,
}

impl crate::DynFence for Fence {}

#[cfg(send_sync)]
unsafe impl Send for Fence {}
#[cfg(send_sync)]
unsafe impl Sync for Fence {}

impl Fence {
pub fn new(options: &wgt::GlBackendOptions) -> Self {
Self {
last_completed: AtomicFenceValue::new(0),
pending: Vec::new(),
fence_mode: options.short_circuit_fences,
}
}

pub fn signal(
&mut self,
gl: &glow::Context,
value: crate::FenceValue,
) -> Result<(), crate::DeviceError> {
if self.fence_mode.is_short_circuit() {
*self.last_completed.get_mut() = value;
return Ok(());
}

let sync = unsafe { gl.fence_sync(glow::SYNC_GPU_COMMANDS_COMPLETE, 0) }
.map_err(|_| crate::DeviceError::OutOfMemory)?;
self.pending.push(GLFence { sync, value });

Ok(())
}

pub fn satisfied(&self, value: crate::FenceValue) -> bool {
self.last_completed.load(Ordering::Relaxed) >= value
}

pub fn get_latest(&self, gl: &glow::Context) -> crate::FenceValue {
let mut max_value = self.last_completed.load(Ordering::Relaxed);

if self.fence_mode.is_short_circuit() {
return max_value;
}

for gl_fence in self.pending.iter() {
if gl_fence.value <= max_value {
// We already know this was good, no need to check again
continue;
}
let status = unsafe { gl.get_sync_status(gl_fence.sync) };
if status == glow::SIGNALED {
max_value = gl_fence.value;
} else {
// Anything after the first unsignalled is guaranteed to also be unsignalled
break;
}
}

// Track the latest value, to save ourselves some querying later
self.last_completed.fetch_max(max_value, Ordering::Relaxed);

max_value
}

pub fn maintain(&mut self, gl: &glow::Context) {
if self.fence_mode.is_short_circuit() {
return;
}

let latest = self.get_latest(gl);
for &gl_fence in self.pending.iter() {
if gl_fence.value <= latest {
unsafe {
gl.delete_sync(gl_fence.sync);
}
}
}
self.pending.retain(|&gl_fence| gl_fence.value > latest);
}

pub fn wait(
&self,
gl: &glow::Context,
wait_value: crate::FenceValue,
timeout_ns: u64,
) -> Result<bool, crate::DeviceError> {
let last_completed = self.last_completed.load(Ordering::Relaxed);

if self.fence_mode.is_short_circuit() {
return Ok(last_completed >= wait_value);
}

// We already know this fence has been signalled to that value. Return signalled.
if last_completed >= wait_value {
return Ok(true);
}

// Find a matching fence
let gl_fence = self
.pending
.iter()
// Greater or equal as an abundance of caution, but there should be one fence per value
.find(|gl_fence| gl_fence.value >= wait_value);

let Some(gl_fence) = gl_fence else {
log::warn!("Tried to wait for {wait_value} but that value has not been signalled yet");
return Ok(false);
};

// We should have found a fence with the exact value.
debug_assert_eq!(gl_fence.value, wait_value);

let status = unsafe {
gl.client_wait_sync(
gl_fence.sync,
glow::SYNC_FLUSH_COMMANDS_BIT,
timeout_ns as i32,
)
};

let signalled = match status {
glow::ALREADY_SIGNALED | glow::CONDITION_SATISFIED => true,
glow::TIMEOUT_EXPIRED | glow::WAIT_FAILED => false,
_ => {
log::warn!("Unexpected result from client_wait_sync: {status}");
false
}
};

if signalled {
self.last_completed.fetch_max(wait_value, Ordering::Relaxed);
}

Ok(signalled)
}

pub fn destroy(self, gl: &glow::Context) {
if self.fence_mode.is_short_circuit() {
return;
}

for gl_fence in self.pending {
unsafe {
gl.delete_sync(gl_fence.sync);
}
}
}
}
Loading

0 comments on commit 3bba040

Please sign in to comment.