Allow timeouting on poll (#8282)

2025-12-08 21:26:17 +00:00 · 2025-10-01 09:19:24 +02:00 · 2025-10-01 09:19:24 +02:00 · 00ea850d79
commit 00ea850d79
parent 06fc6f7345
17 changed files with 117 additions and 38 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -165,6 +165,7 @@ By @cwfitzgerald in [#8162](https://github.com/gfx-rs/wgpu/pull/8162).
 - Added mesh shader support to `wgpu`, with examples. Requires passthrough. By @SupaMaggie70Incorporated in [#7345](https://github.com/gfx-rs/wgpu/pull/7345).

 - Added support for external textures based on WebGPU's [`GPUExternalTexture`](https://www.w3.org/TR/webgpu/#gpuexternaltexture). These allow shaders to transparently operate on potentially multiplanar source texture data in either RGB or YCbCr formats via WGSL's `texture_external` type. This is gated behind the `Features::EXTERNAL_TEXTURE` feature, which is currently only supported on DX12. By @jamienicol in [#4386](https://github.com/gfx-rs/wgpu/issues/4386).
+- `wgpu::Device::poll` can now specify a timeout via `wgpu::PollType::WaitWithTimeout`/`wgpu::PollType::WaitForSubmissionIndexWithTimeout`. By @wumpf in [#8282](https://github.com/gfx-rs/wgpu/pull/8282)

 #### naga

@ -194,6 +195,7 @@ By @cwfitzgerald in [#8162](https://github.com/gfx-rs/wgpu/pull/8162).
 - Require new `F16_IN_F32` downlevel flag for `quantizeToF16`, `pack2x16float`, and `unpack2x16float` in WGSL input. By @aleiserson in [#8130](https://github.com/gfx-rs/wgpu/pull/8130).
 - The error message for non-copyable depth/stencil formats no longer mentions the aspect when it is not relevant. By @reima in [#8156](https://github.com/gfx-rs/wgpu/pull/8156).
 - Track the initialization status of buffer memory correctly when `copy_texture_to_buffer` skips over padding space between rows or layers, or when the start/end of a texture-buffer transfer is not 4B aligned. By @andyleiserson in [#8099](https://github.com/gfx-rs/wgpu/pull/8099).
+- `wgpu::PollType::Wait`/`wgpu::PollType::WaitForSubmissionIndex` will no longer timeout after 60 seconds, but instead wait indefinitely or (depending on backend implementation) until an error is encountered. Use `wgpu::PollType::WaitWithTimeout`/`wgpu::PollType::WaitForSubmissionIndexWithTimeout` if you need a timeout. By @wumpf in [#8282](https://github.com/gfx-rs/wgpu/pull/8282)

 #### naga

--- a/tests/tests/wgpu-gpu/poll.rs
+++ b/tests/tests/wgpu-gpu/poll.rs
@ -1,4 +1,4 @@
-use std::num::NonZeroU64;
+use std::{num::NonZeroU64, time::Duration};

 use wgpu::{
    BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, BindGroupLayoutEntry,
@ -13,8 +13,10 @@ use wgpu_test::{
 pub fn all_tests(vec: &mut Vec<GpuTestInitializer>) {
    vec.extend([
        WAIT,
+        WAIT_WITH_TIMEOUT,
        DOUBLE_WAIT,
        WAIT_ON_SUBMISSION,
+        WAIT_ON_SUBMISSION_WITH_TIMEOUT,
        DOUBLE_WAIT_ON_SUBMISSION,
        WAIT_OUT_OF_ORDER,
        WAIT_AFTER_BAD_SUBMISSION,
@ -75,6 +77,18 @@ static WAIT: GpuTestConfiguration = GpuTestConfiguration::new()
        ctx.async_poll(PollType::wait()).await.unwrap();
    });

+#[gpu_test]
+static WAIT_WITH_TIMEOUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().enable_noop())
+    .run_async(|ctx| async move {
+        let cmd_buf = generate_dummy_work(&ctx);
+
+        ctx.queue.submit(Some(cmd_buf));
+        ctx.async_poll(PollType::WaitWithTimeout(Duration::from_secs(1)))
+            .await
+            .unwrap();
+    });
+
 #[gpu_test]
 static DOUBLE_WAIT: GpuTestConfiguration = GpuTestConfiguration::new()
    .parameters(TestParameters::default().enable_noop())
@ -96,6 +110,21 @@ static WAIT_ON_SUBMISSION: GpuTestConfiguration = GpuTestConfiguration::new()
        ctx.async_poll(PollType::wait_for(index)).await.unwrap();
    });

+#[gpu_test]
+static WAIT_ON_SUBMISSION_WITH_TIMEOUT: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().enable_noop())
+    .run_async(|ctx| async move {
+        let cmd_buf = generate_dummy_work(&ctx);
+
+        let index = ctx.queue.submit(Some(cmd_buf));
+        ctx.async_poll(PollType::WaitForSubmissionIndexWithTimeout {
+            submission_index: index,
+            timeout: Duration::from_secs(1),
+        })
+        .await
+        .unwrap();
+    });
+
 #[gpu_test]
 static DOUBLE_WAIT_ON_SUBMISSION: GpuTestConfiguration = GpuTestConfiguration::new()
    .parameters(TestParameters::default().enable_noop())
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@ -35,10 +35,6 @@ pub const SHADER_STAGE_COUNT: usize = hal::MAX_CONCURRENT_SHADER_STAGES;
 // value is enough for a 16k texture with float4 format.
 pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10;

-// If a submission is not completed within this time, we go off into UB land.
-// See https://github.com/gfx-rs/wgpu/issues/4589. 60s to reduce the chances of this.
-const CLEANUP_WAIT_MS: u32 = 60000;
-
 pub(crate) const ENTRYPOINT_FAILURE_ERROR: &str = "The given EntryPoint is Invalid";

 pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>;
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@ -183,9 +183,9 @@ impl Drop for Queue {
                    fence.as_ref(),
                    last_successful_submission_index,
                    #[cfg(not(target_arch = "wasm32"))]
-                    timeout_ms,
+                    Some(core::time::Duration::from_millis(timeout_ms)),
                    #[cfg(target_arch = "wasm32")]
-                    0, // WebKit and Chromium don't support a non-0 timeout
+                    Some(core::time::Duration::ZERO), // WebKit and Chromium don't support a non-0 timeout
                )
            };
            // Note: If we don't panic below we are in UB land (destroying resources while they are still in use by the GPU).
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@ -29,7 +29,6 @@ use crate::{
    device::{
        bgl, create_validator, life::WaitIdleError, map_buffer, AttachmentData,
        DeviceLostInvocation, HostMap, MissingDownlevelFlags, MissingFeatures, RenderPassContext,
-        CLEANUP_WAIT_MS,
    },
    hal_label,
    init_tracker::{
@ -712,7 +711,10 @@ impl Device {

        // If a wait was requested, determine which submission index to wait for.
        let wait_submission_index = match poll_type {
-            wgt::PollType::WaitForSubmissionIndex(submission_index) => {
+            wgt::PollType::WaitForSubmissionIndex(submission_index)
+            | wgt::PollType::WaitForSubmissionIndexWithTimeout {
+                submission_index, ..
+            } => {
                let last_successful_submission_index = self
                    .last_successful_submission_index
                    .load(Ordering::Acquire);
@ -728,7 +730,7 @@ impl Device {

                Some(submission_index)
            }
-            wgt::PollType::Wait => Some(
+            wgt::PollType::Wait | wgt::PollType::WaitWithTimeout { .. } => Some(
                self.last_successful_submission_index
                    .load(Ordering::Acquire),
            ),
@ -741,7 +743,7 @@ impl Device {

            let wait_result = unsafe {
                self.raw()
-                    .wait(fence.as_ref(), target_submission_index, CLEANUP_WAIT_MS)
+                    .wait(fence.as_ref(), target_submission_index, poll_type.timeout())
            };

            // This error match is only about `DeviceErrors`. At this stage we do not care if
@ -4499,7 +4501,7 @@ impl Device {
        let last_done_index = unsafe { self.raw().get_fence_value(fence.as_ref()) }
            .map_err(|e| self.handle_hal_error(e))?;
        if last_done_index < submission_index {
-            unsafe { self.raw().wait(fence.as_ref(), submission_index, !0) }
+            unsafe { self.raw().wait(fence.as_ref(), submission_index, None) }
                .map_err(|e| self.handle_hal_error(e))?;
            drop(fence);
            if let Some(queue) = self.get_queue() {
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@ -54,7 +54,7 @@ struct ExecutionContext<A: hal::Api> {

 impl<A: hal::Api> ExecutionContext<A> {
    unsafe fn wait_and_clear(&mut self, device: &A::Device) {
-        device.wait(&self.fence, self.fence_value, !0).unwrap();
+        device.wait(&self.fence, self.fence_value, None).unwrap();
        self.encoder.reset_all(self.used_cmd_bufs.drain(..));
        for view in self.used_views.drain(..) {
            device.destroy_texture_view(view);
@ -519,7 +519,7 @@ impl<A: hal::Api> Example<A> {
            queue
                .submit(&[&init_cmd], &[], (&mut fence, init_fence_value))
                .unwrap();
-            device.wait(&fence, init_fence_value, !0).unwrap();
+            device.wait(&fence, init_fence_value, None).unwrap();
            device.destroy_buffer(staging_buffer);
            cmd_encoder.reset_all(iter::once(init_cmd));
            fence
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@ -181,7 +181,7 @@ struct ExecutionContext<A: hal::Api> {

 impl<A: hal::Api> ExecutionContext<A> {
    unsafe fn wait_and_clear(&mut self, device: &A::Device) {
-        device.wait(&self.fence, self.fence_value, !0).unwrap();
+        device.wait(&self.fence, self.fence_value, None).unwrap();
        self.encoder.reset_all(self.used_cmd_bufs.drain(..));
        for view in self.used_views.drain(..) {
            device.destroy_texture_view(view);
@ -816,7 +816,7 @@ impl<A: hal::Api> Example<A> {
            queue
                .submit(&[&init_cmd], &[], (&mut fence, init_fence_value))
                .unwrap();
-            device.wait(&fence, init_fence_value, !0).unwrap();
+            device.wait(&fence, init_fence_value, None).unwrap();
            cmd_encoder.reset_all(iter::once(init_cmd));
            fence
        };
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@ -2237,9 +2237,9 @@ impl crate::Device for super::Device {
        &self,
        fence: &super::Fence,
        value: crate::FenceValue,
-        timeout_ms: u32,
+        timeout: Option<Duration>,
    ) -> Result<bool, crate::DeviceError> {
-        let timeout_duration = Duration::from_millis(timeout_ms as u64);
+        let timeout = timeout.unwrap_or(Duration::MAX);

        // We first check if the fence has already reached the value we're waiting for.
        let mut fence_value = unsafe { fence.raw.GetCompletedValue() };
@ -2273,7 +2273,7 @@ impl crate::Device for super::Device {
            //
            // This happens when a previous iteration WaitForSingleObject succeeded with a previous fence value,
            // right before the timeout would have been hit.
-            let remaining_wait_duration = match timeout_duration.checked_sub(elapsed) {
+            let remaining_wait_duration = match timeout.checked_sub(elapsed) {
                Some(remaining) => remaining,
                None => {
                    log::trace!("Timeout elapsed in between waits!");
@ -2286,7 +2286,7 @@ impl crate::Device for super::Device {
            match unsafe {
                Threading::WaitForSingleObject(
                    event.0,
-                    remaining_wait_duration.as_millis().try_into().unwrap(),
+                    remaining_wait_duration.as_millis().min(u32::MAX as u128) as u32,
                )
            } {
                Foundation::WAIT_OBJECT_0 => {}
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@ -135,7 +135,7 @@ pub trait DynDevice: DynResource {
        &self,
        fence: &dyn DynFence,
        value: FenceValue,
-        timeout_ms: u32,
+        timeout: Option<core::time::Duration>,
    ) -> Result<bool, DeviceError>;

    unsafe fn start_graphics_debugger_capture(&self) -> bool;
@ -486,10 +486,10 @@ impl<D: Device + DynResource> DynDevice for D {
        &self,
        fence: &dyn DynFence,
        value: FenceValue,
-        timeout_ms: u32,
+        timeout: Option<core::time::Duration>,
    ) -> Result<bool, DeviceError> {
        let fence = fence.expect_downcast_ref();
-        unsafe { D::wait(self, fence, value, timeout_ms) }
+        unsafe { D::wait(self, fence, value, timeout) }
    }

    unsafe fn start_graphics_debugger_capture(&self) -> bool {
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@ -1564,7 +1564,7 @@ impl crate::Device for super::Device {
        &self,
        fence: &super::Fence,
        wait_value: crate::FenceValue,
-        timeout_ms: u32,
+        timeout: Option<core::time::Duration>,
    ) -> Result<bool, crate::DeviceError> {
        if fence.satisfied(wait_value) {
            return Ok(true);
@ -1578,7 +1578,9 @@ impl crate::Device for super::Device {
        let timeout_ns = if cfg!(any(webgl, Emscripten)) {
            0
        } else {
-            (timeout_ms as u64 * 1_000_000).min(!0u32 as u64)
+            timeout
+                .map(|t| t.as_nanos().min(u32::MAX as u128) as u32)
+                .unwrap_or(u32::MAX)
        };
        fence.wait(gl, wait_value, timeout_ns)
    }
--- a/wgpu-hal/src/gles/fence.rs
+++ b/wgpu-hal/src/gles/fence.rs
@ -102,7 +102,7 @@ impl Fence {
        &self,
        gl: &glow::Context,
        wait_value: crate::FenceValue,
-        timeout_ns: u64,
+        timeout_ns: u32,
    ) -> Result<bool, crate::DeviceError> {
        let last_completed = self.last_completed.load(Ordering::Acquire);

@ -134,7 +134,7 @@ impl Fence {
            gl.client_wait_sync(
                gl_fence.sync,
                glow::SYNC_FLUSH_COMMANDS_BIT,
-                timeout_ns as i32,
+                timeout_ns.min(i32::MAX as u32) as i32,
            )
        };

--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@ -981,6 +981,9 @@ pub trait Device: WasmNotSendSync {
    /// Calling `wait` with a lower [`FenceValue`] than `fence`'s current value
    /// returns immediately.
    ///
+    /// If `timeout` is provided, the function will block indefinitely or until
+    /// an error is encountered.
+    ///
    /// Returns `Ok(true)` on success and `Ok(false)` on timeout.
    ///
    /// [`Fence`]: Api::Fence
@ -989,7 +992,7 @@ pub trait Device: WasmNotSendSync {
        &self,
        fence: &<Self::A as Api>::Fence,
        value: FenceValue,
-        timeout_ms: u32,
+        timeout: Option<core::time::Duration>,
    ) -> Result<bool, DeviceError>;

    /// Start a graphics debugger capture.
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@ -1580,7 +1580,7 @@ impl crate::Device for super::Device {
        &self,
        fence: &super::Fence,
        wait_value: crate::FenceValue,
-        timeout_ms: u32,
+        timeout: Option<core::time::Duration>,
    ) -> DeviceResult<bool> {
        if wait_value <= fence.completed_value.load(atomic::Ordering::Acquire) {
            return Ok(true);
@ -1603,8 +1603,10 @@ impl crate::Device for super::Device {
            if let MTLCommandBufferStatus::Completed = cmd_buf.status() {
                return Ok(true);
            }
-            if start.elapsed().as_millis() >= timeout_ms as u128 {
-                return Ok(false);
+            if let Some(timeout) = timeout {
+                if start.elapsed() >= timeout {
+                    return Ok(false);
+                }
            }
            thread::sleep(core::time::Duration::from_millis(1));
        }
--- a/wgpu-hal/src/noop/mod.rs
+++ b/wgpu-hal/src/noop/mod.rs
@ -429,7 +429,7 @@ impl crate::Device for Context {
        &self,
        fence: &Fence,
        value: crate::FenceValue,
-        timeout_ms: u32,
+        timeout: Option<Duration>,
    ) -> DeviceResult<bool> {
        // The relevant commands must have already been submitted, and noop-backend commands are
        // executed synchronously, so there is no waiting — either it is already done,
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@ -4,6 +4,7 @@ use core::{
    mem::{self, MaybeUninit},
    num::NonZeroU32,
    ptr,
+    time::Duration,
 };

 use arrayvec::ArrayVec;
@ -2443,9 +2444,12 @@ impl crate::Device for super::Device {
        &self,
        fence: &super::Fence,
        wait_value: crate::FenceValue,
-        timeout_ms: u32,
+        timeout: Option<Duration>,
    ) -> Result<bool, crate::DeviceError> {
-        let timeout_ns = timeout_ms as u64 * super::MILLIS_TO_NANOS;
+        let timeout_ns = timeout
+            .unwrap_or(Duration::MAX)
+            .as_nanos()
+            .min(u64::MAX as _) as u64;
        self.shared.wait_for_fence(fence, wait_value, timeout_ns)
    }

--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@ -49,7 +49,6 @@ use wgt::InternalCounter;

 use semaphore_list::SemaphoreList;

-const MILLIS_TO_NANOS: u64 = 1_000_000;
 const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;

 #[derive(Clone, Debug)]
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@ -16,13 +16,14 @@ extern crate alloc;

 use alloc::borrow::Cow;
 use alloc::{string::String, vec, vec::Vec};
-use core::cmp::Ordering;
 use core::{
+    cmp::Ordering,
    fmt,
    hash::{Hash, Hasher},
    mem,
    num::NonZeroU32,
    ops::Range,
+    time::Duration,
 };

 use bytemuck::{Pod, Zeroable};
@ -4503,8 +4504,26 @@ pub enum PollType<T> {
    /// On WebGPU, this has no effect. Callbacks are invoked from the
    /// window event loop.
    WaitForSubmissionIndex(T),
-    /// Same as `WaitForSubmissionIndex` but waits for the most recent submission.
+
+    /// Same as [`Self::WaitForSubmissionIndex`] but with a timeout.
+    WaitForSubmissionIndexWithTimeout {
+        /// Submission index to wait for.
+        submission_index: T,
+
+        /// Max time to wait for the submission to complete.
+        ///
+        /// If waiting for the GPU device takes this long or longer, the poll will return [`PollError::Timeout`].
+        timeout: Duration,
+    },
+
+    /// Same as [`Self::WaitForSubmissionIndex`] but waits for the most recent submission.
    Wait,
+
+    /// Same as [`Self::Wait`], but with a timeout.
+    ///
+    /// If waiting for the GPU device takes this long or longer, the poll will return [`PollError::Timeout`].
+    WaitWithTimeout(Duration),
+
    /// Check the device for a single time without blocking.
    Poll,
 }
@ -4532,7 +4551,10 @@ impl<T> PollType<T> {
    #[must_use]
    pub fn is_wait(&self) -> bool {
        match *self {
-            Self::WaitForSubmissionIndex(..) | Self::Wait => true,
+            Self::WaitForSubmissionIndex(..)
+            | Self::Wait
+            | Self::WaitForSubmissionIndexWithTimeout { .. }
+            | Self::WaitWithTimeout { .. } => true,
            Self::Poll => false,
        }
    }
@ -4546,9 +4568,27 @@ impl<T> PollType<T> {
        match self {
            Self::WaitForSubmissionIndex(i) => PollType::WaitForSubmissionIndex(func(i)),
            Self::Wait => PollType::Wait,
+            Self::WaitForSubmissionIndexWithTimeout {
+                submission_index,
+                timeout,
+            } => PollType::WaitForSubmissionIndexWithTimeout {
+                submission_index: func(submission_index),
+                timeout,
+            },
+            Self::WaitWithTimeout(timeout) => PollType::WaitWithTimeout(timeout),
            Self::Poll => PollType::Poll,
        }
    }
+
+    /// Returns the timeout in milliseconds if the poll type has a timeout.
+    #[must_use]
+    pub fn timeout(&self) -> Option<Duration> {
+        match self {
+            Self::WaitForSubmissionIndexWithTimeout { timeout, .. }
+            | Self::WaitWithTimeout(timeout) => Some(*timeout),
+            _ => None,
+        }
+    }
 }

 /// Error states after a device poll