Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More complete implementation of "lose the device". #4299

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ Bottom level categories:
- Hal
-->

## Unreleased ##

### Changes ###

#### General ####
* `LoseDeviceClosure` callback mechanism provided so user agents can resolve `GPUDevice.lost` Promises at the appropriate time by @bradwerth in [#4299](https://github.com/gfx-rs/wgpu/pull/4299)

## v0.18.0 (2023-10-25)

### Desktop OpenGL 3.3+ Support on Windows
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ png.workspace = true
pollster.workspace = true
serde_json.workspace = true
serde.workspace = true
wgc.workspace = true
wgpu.workspace = true
wgpu-macros.workspace = true
wgt = { workspace = true, features = ["replay"] }
Expand Down
32 changes: 32 additions & 0 deletions tests/tests/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -449,3 +449,35 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
buffer_for_unmap.unmap();
});
});

#[gpu_test]
static DEVICE_DESTROY_THEN_LOST: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default())
.run_sync(|ctx| {
// This test checks that when device.destroy is called, the provided
// LoseDeviceClosure is called with reason DeviceLostReason::Destroyed.
let was_called = std::sync::Arc::<std::sync::atomic::AtomicBool>::new(false.into());

// Set a LoseDeviceCallback on the device.
let was_called_clone = was_called.clone();
let callback = Box::new(move |reason, _m| {
was_called_clone.store(true, std::sync::atomic::Ordering::SeqCst);
assert!(
matches!(reason, wgt::DeviceLostReason::Destroyed),
"Device lost info reason should match DeviceLostReason::Destroyed."
);
});
ctx.device.set_lose_device_callback(callback);

// Destroy the device.
ctx.device.destroy();

// Make sure the device queues are empty, which ensures that the closure
// has been called.
assert!(ctx.device.poll(wgpu::Maintain::Wait));

assert!(
was_called.load(std::sync::atomic::Ordering::SeqCst),
"Lose device callback should have been called."
);
});
47 changes: 27 additions & 20 deletions wgpu-core/src/device/global.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ use crate::device::trace;
use crate::{
binding_model::{self, BindGroupLayout},
command, conv,
device::{life::WaitIdleError, map_buffer, queue, Device, DeviceError, HostMap},
device::{
life::WaitIdleError, map_buffer, queue, Device, DeviceError, HostMap, LoseDeviceClosure,
},
global::Global,
hal_api::HalApi,
hub::Token,
Expand Down Expand Up @@ -2633,6 +2635,21 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
}
}

pub fn device_set_lose_device_closure<A: HalApi>(
&self,
device_id: DeviceId,
lose_device_closure: LoseDeviceClosure,
) {
let hub = A::hub(self);
let mut token = Token::root();

let (mut device_guard, mut token) = hub.devices.write(&mut token);
if let Ok(device) = device_guard.get_mut(device_id) {
let mut life_tracker = device.lock_life(&mut token);
life_tracker.lose_device_closure = Some(lose_device_closure);
}
}

pub fn device_destroy<A: HalApi>(&self, device_id: DeviceId) {
log::trace!("Device::destroy {device_id:?}");

Expand All @@ -2644,36 +2661,26 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
// Follow the steps at
// https://gpuweb.github.io/gpuweb/#dom-gpudevice-destroy.

// It's legal to call destroy multiple times, but if the device
// is already invalid, there's nothing more to do. There's also
// no need to return an error.
if !device.valid {
return;
}

// The last part of destroy is to lose the device. The spec says
// delay that until all "currently-enqueued operations on any
// queue on this device are completed."

// TODO: implement this delay.

// Finish by losing the device.

// TODO: associate this "destroyed" reason more tightly with
// the GPUDeviceLostReason defined in webgpu.idl.
device.lose(Some("destroyed"));
// queue on this device are completed." This is accomplished by
// setting valid to false, and then relying upon maintain to
// check for empty queues and a LoseDeviceClosure. At that time,
// the LoseDeviceClosure will be called with "destroyed" as the
// reason.
device.valid = false;
}
}

pub fn device_lose<A: HalApi>(&self, device_id: DeviceId, reason: Option<&str>) {
pub fn device_lose<A: HalApi>(&self, device_id: DeviceId, message: &str) {
log::trace!("Device::lose {device_id:?}");

let hub = A::hub(self);
let mut token = Token::root();

let (mut device_guard, _) = hub.devices.write(&mut token);
let (mut device_guard, mut token) = hub.devices.write(&mut token);
if let Ok(device) = device_guard.get_mut(device_id) {
device.lose(reason);
device.lose(&mut token, message);
}
}

Expand Down
8 changes: 7 additions & 1 deletion wgpu-core/src/device/life.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::device::trace;
use crate::{
device::{
queue::{EncoderInFlight, SubmittedWorkDoneClosure, TempResource},
DeviceError,
DeviceError, LoseDeviceClosure,
},
hal_api::HalApi,
hub::{Hub, Token},
Expand Down Expand Up @@ -313,6 +313,11 @@ pub(super) struct LifetimeTracker<A: hal::Api> {
/// must happen _after_ all mapped buffer callbacks are mapped, so we defer them
/// here until the next time the device is maintained.
work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>,

/// Closure to be called on "lose the device". This is invoked directly by
/// device.lose or by the UserCallbacks returned from maintain when the device
/// has been destroyed and its queues are empty.
pub lose_device_closure: Option<LoseDeviceClosure>,
}

impl<A: hal::Api> LifetimeTracker<A> {
Expand All @@ -326,6 +331,7 @@ impl<A: hal::Api> LifetimeTracker<A> {
free_resources: NonReferencedResources::new(),
ready_to_map: Vec::new(),
work_done_closures: SmallVec::new(),
lose_device_closure: None,
}
}

Expand Down
99 changes: 98 additions & 1 deletion wgpu-core/src/device/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ use crate::{
use arrayvec::ArrayVec;
use hal::Device as _;
use smallvec::SmallVec;
use std::os::raw::c_char;
use thiserror::Error;
use wgt::{BufferAddress, TextureFormat};
use wgt::{BufferAddress, DeviceLostReason, TextureFormat};

use std::{iter, num::NonZeroU32, ptr};

Expand Down Expand Up @@ -169,12 +170,15 @@ pub type BufferMapPendingClosure = (BufferMapOperation, BufferAccessResult);
pub struct UserClosures {
pub mappings: Vec<BufferMapPendingClosure>,
pub submissions: SmallVec<[queue::SubmittedWorkDoneClosure; 1]>,
pub lose_device_invocations: SmallVec<[LoseDeviceInvocation; 1]>,
}

impl UserClosures {
fn extend(&mut self, other: Self) {
self.mappings.extend(other.mappings);
self.submissions.extend(other.submissions);
self.lose_device_invocations
.extend(other.lose_device_invocations);
}

fn fire(self) {
Expand All @@ -189,6 +193,99 @@ impl UserClosures {
for closure in self.submissions {
closure.call();
}
for invocation in self.lose_device_invocations {
invocation
.closure
.call(invocation.reason, invocation.message);
}
}
}

#[repr(C)]
pub struct LoseDeviceClosureC {
pub callback: unsafe extern "C" fn(user_data: *mut u8, reason: u8, message: *const c_char),
pub user_data: *mut u8,
}

#[cfg(any(
not(target_arch = "wasm32"),
all(
feature = "fragile-send-sync-non-atomic-wasm",
not(target_feature = "atomics")
)
))]
unsafe impl Send for LoseDeviceClosureC {}

pub struct LoseDeviceClosure {
// We wrap this so creating the enum in the C variant can be unsafe,
// allowing our call function to be safe.
inner: LoseDeviceClosureInner,
}

pub struct LoseDeviceInvocation {
closure: LoseDeviceClosure,
reason: DeviceLostReason,
message: String,
}

#[cfg(any(
not(target_arch = "wasm32"),
all(
feature = "fragile-send-sync-non-atomic-wasm",
not(target_feature = "atomics")
)
))]
pub type LoseDeviceCallback = Box<dyn FnOnce(DeviceLostReason, String) + Send + 'static>;
#[cfg(not(any(
not(target_arch = "wasm32"),
all(
feature = "fragile-send-sync-non-atomic-wasm",
not(target_feature = "atomics")
)
)))]
pub type LoseDeviceCallback = Box<dyn FnOnce(DeviceLostReason, String) + 'static>;

enum LoseDeviceClosureInner {
Rust { callback: LoseDeviceCallback },
C { inner: LoseDeviceClosureC },
}

impl LoseDeviceClosure {
pub fn from_rust(callback: LoseDeviceCallback) -> Self {
Self {
inner: LoseDeviceClosureInner::Rust { callback },
}
}

/// # Safety
///
/// - The callback pointer must be valid to call with the provided `user_data`
/// pointer.
///
/// - Both pointers must point to `'static` data, as the callback may happen at
/// an unspecified time.
pub unsafe fn from_c(inner: LoseDeviceClosureC) -> Self {
Self {
inner: LoseDeviceClosureInner::C { inner },
}
}

pub(crate) fn call(self, reason: DeviceLostReason, message: String) {
match self.inner {
LoseDeviceClosureInner::Rust { callback } => callback(reason, message),
// SAFETY: the contract of the call to from_c says that this unsafe is sound.
LoseDeviceClosureInner::C { inner } => unsafe {
// We need to pass message as a c_char typed pointer. To avoid
// trivial conversion warnings on some platforms, we first coerce
// the pointer to u8.
let message_u8_ptr: *const u8 = message.as_ptr();
(inner.callback)(
inner.user_data,
reason as u8,
message_u8_ptr as *const c_char,
)
},
}
}
}

Expand Down
35 changes: 28 additions & 7 deletions wgpu-core/src/device/resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use crate::{
command, conv,
device::life::WaitIdleError,
device::{
AttachmentData, CommandAllocator, MissingDownlevelFlags, MissingFeatures,
RenderPassContext, CLEANUP_WAIT_MS,
AttachmentData, CommandAllocator, LoseDeviceInvocation, MissingDownlevelFlags,
MissingFeatures, RenderPassContext, CLEANUP_WAIT_MS,
},
hal_api::HalApi,
hal_label,
Expand All @@ -34,7 +34,7 @@ use hal::{CommandEncoder as _, Device as _};
use parking_lot::{Mutex, MutexGuard};
use smallvec::SmallVec;
use thiserror::Error;
use wgt::{TextureFormat, TextureSampleType, TextureViewDimension};
use wgt::{DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimension};

use std::{borrow::Cow, iter, num::NonZeroU32};

Expand Down Expand Up @@ -315,9 +315,24 @@ impl<A: HalApi> Device<A> {
let mapping_closures = life_tracker.handle_mapping(hub, &self.raw, &self.trackers, token);
life_tracker.cleanup(&self.raw);

// Detect if we have been destroyed and now need to lose the device.
// If we are invalid (set at start of destroy) and our queue is empty,
// and we have a LoseDeviceClosure, return the closure to be called by
// our caller. This will complete the steps for both destroy and for
// "lose the device".
let mut lose_device_invocations = SmallVec::new();
if !self.valid && life_tracker.queue_empty() && life_tracker.lose_device_closure.is_some() {
lose_device_invocations.push(LoseDeviceInvocation {
closure: life_tracker.lose_device_closure.take().unwrap(),
reason: DeviceLostReason::Destroyed,
message: String::new(),
});
}

let closures = UserClosures {
mappings: mapping_closures,
submissions: submission_closures,
lose_device_invocations,
};
Ok((closures, life_tracker.queue_empty()))
}
Expand Down Expand Up @@ -3275,17 +3290,23 @@ impl<A: HalApi> Device<A> {
})
}

pub(crate) fn lose(&mut self, _reason: Option<&str>) {
pub(crate) fn lose<'this, 'token: 'this>(
&'this mut self,
token: &mut Token<'token, Self>,
message: &str,
) {
// Follow the steps at https://gpuweb.github.io/gpuweb/#lose-the-device.

// Mark the device explicitly as invalid. This is checked in various
// places to prevent new work from being submitted.
self.valid = false;

// The following steps remain in "lose the device":
// 1) Resolve the GPUDevice device.lost promise.

// TODO: triggger this passively or actively, and supply the reason.
let mut life_tracker = self.lock_life(token);
if life_tracker.lose_device_closure.is_some() {
let lose_device_closure = life_tracker.lose_device_closure.take().unwrap();
lose_device_closure.call(DeviceLostReason::Unknown, message.to_string());
}

// 2) Complete any outstanding mapAsync() steps.
// 3) Complete any outstanding onSubmittedWorkDone() steps.
Expand Down
12 changes: 12 additions & 0 deletions wgpu-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6692,3 +6692,15 @@ mod send_sync {
)))]
impl<T> WasmNotSync for T {}
}

/// Reason for "lose the device".
///
/// Corresponds to [WebGPU `GPUDeviceLostReason`](https://gpuweb.github.io/gpuweb/#enumdef-gpudevicelostreason).
#[repr(u8)]
#[derive(Debug, Copy, Clone)]
pub enum DeviceLostReason {
/// Triggered by driver
Unknown = 0,
/// After Device::destroy
Destroyed = 1,
}
Loading