[D3D12/VK] add OOM check on submit and poll that will lose the device if we are over 95% of our budget

This commit is contained in:
teoxoy 2025-04-02 15:11:19 +02:00 committed by Teodor Tanasoaia
parent c144f2a697
commit ece29b6e68
11 changed files with 119 additions and 1 deletions

View File

@ -1977,6 +1977,8 @@ impl Global {
let fence = device.fence.read(); let fence = device.fence.read();
let maintain_result = device.maintain(fence, poll_type, snatch_guard); let maintain_result = device.maintain(fence, poll_type, snatch_guard);
device.lose_if_oom();
// Some deferred destroys are scheduled in maintain so run this right after // Some deferred destroys are scheduled in maintain so run this right after
// to avoid holding on to them until the next device poll. // to avoid holding on to them until the next device poll.
device.deferred_resource_destruction(); device.deferred_resource_destruction();

View File

@ -1361,6 +1361,8 @@ impl Queue {
// the closures should execute with nothing locked! // the closures should execute with nothing locked!
callbacks.fire(); callbacks.fire();
self.device.lose_if_oom();
api_log!("Queue::submit returned submit index {submit_index}"); api_log!("Queue::submit returned submit index {submit_index}");
Ok(submit_index) Ok(submit_index)

View File

@ -361,6 +361,19 @@ impl Device {
} }
} }
/// Checks that we are operating within the memory budget reported by the native APIs.
///
/// If we are not, the device gets invalidated.
///
/// The budget might fluctuate over the lifetime of the application, so it should be checked
/// somewhat frequently.
pub fn lose_if_oom(&self) {
let _ = self
.raw()
.check_if_oom()
.map_err(|e| self.handle_hal_error(e));
}
pub fn handle_hal_error(&self, error: hal::DeviceError) -> DeviceError { pub fn handle_hal_error(&self, error: hal::DeviceError) -> DeviceError {
match error { match error {
hal::DeviceError::OutOfMemory hal::DeviceError::OutOfMemory

View File

@ -2320,4 +2320,33 @@ impl crate::Device for super::Device {
bytemuck::bytes_of(&Desc::wrap(temp)).to_vec() bytemuck::bytes_of(&Desc::wrap(temp)).to_vec()
} }
fn check_if_oom(&self) -> Result<(), crate::DeviceError> {
let info = self
.shared
.adapter
.query_video_memory_info(Dxgi::DXGI_MEMORY_SEGMENT_GROUP_LOCAL)?;
// Make sure we don't exceed 95% of the budget
if info.CurrentUsage >= info.Budget / 100 * 95 {
return Err(crate::DeviceError::OutOfMemory);
}
if matches!(
self.shared.private_caps.memory_architecture,
super::MemoryArchitecture::NonUnified
) {
let info = self
.shared
.adapter
.query_video_memory_info(Dxgi::DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL)?;
// Make sure we don't exceed 95% of the budget
if info.CurrentUsage >= info.Budget / 100 * 95 {
return Err(crate::DeviceError::OutOfMemory);
}
}
Ok(())
}
} }

View File

@ -171,6 +171,8 @@ pub trait DynDevice: DynResource {
fn get_internal_counters(&self) -> wgt::HalCounters; fn get_internal_counters(&self) -> wgt::HalCounters;
fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport>; fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport>;
fn check_if_oom(&self) -> Result<(), DeviceError>;
} }
impl<D: Device + DynResource> DynDevice for D { impl<D: Device + DynResource> DynDevice for D {
@ -563,4 +565,8 @@ impl<D: Device + DynResource> DynDevice for D {
fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> { fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
D::generate_allocator_report(self) D::generate_allocator_report(self)
} }
fn check_if_oom(&self) -> Result<(), DeviceError> {
D::check_if_oom(self)
}
} }

View File

@ -1622,6 +1622,10 @@ impl crate::Device for super::Device {
fn get_internal_counters(&self) -> wgt::HalCounters { fn get_internal_counters(&self) -> wgt::HalCounters {
self.counters.as_ref().clone() self.counters.as_ref().clone()
} }
fn check_if_oom(&self) -> Result<(), crate::DeviceError> {
Ok(())
}
} }
#[cfg(send_sync)] #[cfg(send_sync)]

View File

@ -1020,6 +1020,8 @@ pub trait Device: WasmNotSendSync {
fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> { fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
None None
} }
fn check_if_oom(&self) -> Result<(), DeviceError>;
} }
pub trait Queue: WasmNotSendSync { pub trait Queue: WasmNotSendSync {

View File

@ -1601,4 +1601,10 @@ impl crate::Device for super::Device {
fn get_internal_counters(&self) -> wgt::HalCounters { fn get_internal_counters(&self) -> wgt::HalCounters {
self.counters.as_ref().clone() self.counters.as_ref().clone()
} }
fn check_if_oom(&self) -> Result<(), crate::DeviceError> {
// TODO: see https://github.com/gfx-rs/wgpu/issues/7460
Ok(())
}
} }

View File

@ -457,4 +457,8 @@ impl crate::Device for Context {
fn get_internal_counters(&self) -> wgt::HalCounters { fn get_internal_counters(&self) -> wgt::HalCounters {
Default::default() Default::default()
} }
fn check_if_oom(&self) -> DeviceResult<()> {
Ok(())
}
} }

View File

@ -1062,6 +1062,13 @@ impl PhysicalDeviceProperties {
extensions.push(ext::external_memory_dma_buf::NAME); extensions.push(ext::external_memory_dma_buf::NAME);
} }
// Optional `VK_EXT_memory_budget`
if self.supports_extension(ext::memory_budget::NAME) {
extensions.push(ext::memory_budget::NAME);
} else {
log::warn!("VK_EXT_memory_budget is not available.")
}
// Require `VK_KHR_draw_indirect_count` if the associated feature was requested // Require `VK_KHR_draw_indirect_count` if the associated feature was requested
// Even though Vulkan 1.2 has promoted the extension to core, we must require the extension to avoid // Even though Vulkan 1.2 has promoted the extension to core, we must require the extension to avoid
// large amounts of spaghetti involved with using PhysicalDeviceVulkan12Features. // large amounts of spaghetti involved with using PhysicalDeviceVulkan12Features.

View File

@ -10,7 +10,7 @@ use std::{
}; };
use arrayvec::ArrayVec; use arrayvec::ArrayVec;
use ash::{khr, vk}; use ash::{ext, khr, vk};
use hashbrown::hash_map::Entry; use hashbrown::hash_map::Entry;
use parking_lot::Mutex; use parking_lot::Mutex;
@ -2872,6 +2872,49 @@ impl crate::Device for super::Device {
}; };
bytemuck::bytes_of(&temp).to_vec() bytemuck::bytes_of(&temp).to_vec()
} }
fn check_if_oom(&self) -> Result<(), crate::DeviceError> {
if !self
.shared
.enabled_extensions
.contains(&ext::memory_budget::NAME)
{
return Ok(());
}
let get_physical_device_properties = self
.shared
.instance
.get_physical_device_properties
.as_ref()
.unwrap();
let mut memory_budget_properties = vk::PhysicalDeviceMemoryBudgetPropertiesEXT::default();
let mut memory_properties =
vk::PhysicalDeviceMemoryProperties2::default().push_next(&mut memory_budget_properties);
unsafe {
get_physical_device_properties.get_physical_device_memory_properties2(
self.shared.physical_device,
&mut memory_properties,
);
}
let memory_properties = memory_properties.memory_properties;
for i in 0..memory_properties.memory_heap_count {
let heap_usage = memory_budget_properties.heap_usage[i as usize];
let heap_budget = memory_budget_properties.heap_budget[i as usize];
// Make sure we don't exceed 95% of the budget
if heap_usage >= heap_budget / 100 * 95 {
return Err(crate::DeviceError::OutOfMemory);
}
}
Ok(())
}
} }
impl super::DeviceShared { impl super::DeviceShared {