[d3d12] refactor: move allocation related fields into a new Allocator struct

This commit is contained in:
teoxoy 2025-04-18 17:29:46 +02:00 committed by Teodor Tanasoaia
parent 3b72d59a3c
commit 1fdd05a2b8
3 changed files with 115 additions and 98 deletions

View File

@ -1,6 +1,6 @@
use std::{
borrow::Cow,
ffi, mem,
ffi,
num::NonZeroU32,
ptr,
string::{String, ToString as _},
@ -57,9 +57,8 @@ impl super::Device {
auxil::dxgi::exception::register_exception_handler();
}
let (mem_allocator, device_memblock_size, host_memblock_size) =
suballocation::create_allocator(&raw, memory_hints)?;
let mem_allocator = Arc::new(mem_allocator);
let mem_allocator =
suballocation::Allocator::new(&raw, memory_hints, memory_budget_thresholds)?;
let idle_fence: Direct3D12::ID3D12Fence = unsafe {
profiling::scope!("ID3D12Device::CreateFence");
@ -158,9 +157,6 @@ impl super::Device {
)?,
sampler_heap: super::sampler::SamplerHeap::new(&raw, &private_caps)?,
private_caps,
device_memblock_size,
host_memblock_size,
memory_budget_thresholds,
};
let mut rtv_pool =
@ -1939,7 +1935,11 @@ impl crate::Device for super::Device {
),
};
if let Some(threshold) = self.shared.memory_budget_thresholds.for_resource_creation {
if let Some(threshold) = self
.mem_allocator
.memory_budget_thresholds
.for_resource_creation
{
let info = self
.shared
.adapter
@ -2285,33 +2285,7 @@ impl crate::Device for super::Device {
}
fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
let mut upstream = self.mem_allocator.lock().generate_report();
let allocations = upstream
.allocations
.iter_mut()
.map(|alloc| wgt::AllocationReport {
name: mem::take(&mut alloc.name),
offset: alloc.offset,
size: alloc.size,
})
.collect();
let blocks = upstream
.blocks
.iter()
.map(|block| wgt::MemoryBlockReport {
size: block.size,
allocations: block.allocations.clone(),
})
.collect();
Some(wgt::AllocatorReport {
allocations,
blocks,
total_allocated_bytes: upstream.total_allocated_bytes,
total_reserved_bytes: upstream.total_reserved_bytes,
})
Some(self.mem_allocator.generate_report())
}
fn tlas_instance_to_bytes(&self, instance: TlasInstance) -> Vec<u8> {
@ -2329,7 +2303,7 @@ impl crate::Device for super::Device {
}
fn check_if_oom(&self) -> Result<(), crate::DeviceError> {
let Some(threshold) = self.shared.memory_budget_thresholds.for_device_loss else {
let Some(threshold) = self.mem_allocator.memory_budget_thresholds.for_device_loss else {
return Ok(());
};

View File

@ -89,8 +89,8 @@ mod view;
use std::{borrow::ToOwned as _, ffi, fmt, mem, num::NonZeroU32, ops::Deref, sync::Arc, vec::Vec};
use arrayvec::ArrayVec;
use gpu_allocator::d3d12::Allocator;
use parking_lot::{Mutex, RwLock};
use suballocation::Allocator;
use windows::{
core::{Free, Interface},
Win32::{
@ -635,9 +635,6 @@ struct DeviceShared {
heap_views: descriptor::GeneralHeap,
sampler_heap: sampler::SamplerHeap,
private_caps: PrivateCapabilities,
device_memblock_size: u64,
host_memblock_size: u64,
memory_budget_thresholds: wgt::MemoryBudgetThresholds,
}
unsafe impl Send for DeviceShared {}
@ -658,7 +655,7 @@ pub struct Device {
#[cfg(feature = "renderdoc")]
render_doc: auxil::renderdoc::RenderDoc,
null_rtv_handle: descriptor::Handle,
mem_allocator: Arc<Mutex<Allocator>>,
mem_allocator: Allocator,
dxc_container: Option<Arc<shader_compilation::DxcContainer>>,
counters: Arc<wgt::HalCounters>,
}
@ -800,7 +797,7 @@ pub struct CommandEncoder {
allocator: Direct3D12::ID3D12CommandAllocator,
device: Direct3D12::ID3D12Device,
shared: Arc<DeviceShared>,
mem_allocator: Arc<Mutex<Allocator>>,
mem_allocator: Allocator,
null_rtv_handle: descriptor::Handle,
list: Option<Direct3D12::ID3D12GraphicsCommandList>,

View File

@ -1,8 +1,6 @@
use gpu_allocator::{
d3d12::{AllocationCreateDesc, Allocator},
MemoryLocation,
};
use gpu_allocator::{d3d12::AllocationCreateDesc, MemoryLocation};
use parking_lot::Mutex;
use std::sync::Arc;
use windows::Win32::Graphics::{Direct3D12, Dxgi};
use crate::{
@ -61,51 +59,93 @@ impl Allocation {
}
}
pub(crate) fn create_allocator(
raw: &Direct3D12::ID3D12Device,
memory_hints: &wgt::MemoryHints,
) -> Result<(Mutex<Allocator>, u64, u64), crate::DeviceError> {
// TODO: the allocator's configuration should take hardware capability into
// account.
const MB: u64 = 1024 * 1024;
let (device_memblock_size, host_memblock_size) = match memory_hints {
wgt::MemoryHints::Performance => (256 * MB, 64 * MB),
wgt::MemoryHints::MemoryUsage => (8 * MB, 4 * MB),
wgt::MemoryHints::Manual {
suballocated_device_memory_block_size,
} => {
// TODO: Would it be useful to expose the host size in memory hints
// instead of always using half of the device size?
let device_size = suballocated_device_memory_block_size.start;
let host_size = device_size / 2;
(device_size, host_size)
#[derive(Clone)]
pub(crate) struct Allocator {
inner: Arc<Mutex<gpu_allocator::d3d12::Allocator>>,
device_memblock_size: u64,
host_memblock_size: u64,
pub memory_budget_thresholds: wgt::MemoryBudgetThresholds,
}
impl Allocator {
pub(crate) fn new(
raw: &Direct3D12::ID3D12Device,
memory_hints: &wgt::MemoryHints,
memory_budget_thresholds: wgt::MemoryBudgetThresholds,
) -> Result<Self, crate::DeviceError> {
// TODO: the allocator's configuration should take hardware capability into
// account.
const MB: u64 = 1024 * 1024;
let (device_memblock_size, host_memblock_size) = match memory_hints {
wgt::MemoryHints::Performance => (256 * MB, 64 * MB),
wgt::MemoryHints::MemoryUsage => (8 * MB, 4 * MB),
wgt::MemoryHints::Manual {
suballocated_device_memory_block_size,
} => {
// TODO: Would it be useful to expose the host size in memory hints
// instead of always using half of the device size?
let device_size = suballocated_device_memory_block_size.start;
let host_size = device_size / 2;
(device_size, host_size)
}
};
// gpu_allocator clamps the sizes between 4MiB and 256MiB, but we clamp them ourselves since we use
// the sizes when detecting high memory pressure and there is no way to query the values otherwise.
let device_memblock_size = device_memblock_size.clamp(4 * MB, 256 * MB);
let host_memblock_size = host_memblock_size.clamp(4 * MB, 256 * MB);
let allocation_sizes =
gpu_allocator::AllocationSizes::new(device_memblock_size, host_memblock_size);
let allocator_desc = gpu_allocator::d3d12::AllocatorCreateDesc {
device: gpu_allocator::d3d12::ID3D12DeviceVersion::Device(raw.clone()),
debug_settings: Default::default(),
allocation_sizes,
};
let allocator = gpu_allocator::d3d12::Allocator::new(&allocator_desc).inspect_err(|e| {
log::error!("Failed to create d3d12 allocator, error: {}", e);
})?;
Ok(Self {
inner: Arc::new(Mutex::new(allocator)),
device_memblock_size,
host_memblock_size,
memory_budget_thresholds,
})
}
pub(crate) fn generate_report(&self) -> wgt::AllocatorReport {
let mut upstream = self.inner.lock().generate_report();
let allocations = upstream
.allocations
.iter_mut()
.map(|alloc| wgt::AllocationReport {
name: core::mem::take(&mut alloc.name),
offset: alloc.offset,
size: alloc.size,
})
.collect();
let blocks = upstream
.blocks
.iter()
.map(|block| wgt::MemoryBlockReport {
size: block.size,
allocations: block.allocations.clone(),
})
.collect();
wgt::AllocatorReport {
allocations,
blocks,
total_allocated_bytes: upstream.total_allocated_bytes,
total_reserved_bytes: upstream.total_reserved_bytes,
}
};
// gpu_allocator clamps the sizes between 4MiB and 256MiB, but we clamp them ourselves since we use
// the sizes when detecting high memory pressure and there is no way to query the values otherwise.
let device_memblock_size = device_memblock_size.clamp(4 * MB, 256 * MB);
let host_memblock_size = host_memblock_size.clamp(4 * MB, 256 * MB);
let allocation_sizes =
gpu_allocator::AllocationSizes::new(device_memblock_size, host_memblock_size);
let allocator_desc = gpu_allocator::d3d12::AllocatorCreateDesc {
device: gpu_allocator::d3d12::ID3D12DeviceVersion::Device(raw.clone()),
debug_settings: Default::default(),
allocation_sizes,
};
let allocator = Allocator::new(&allocator_desc).inspect_err(|e| {
log::error!("Failed to create d3d12 allocator, error: {}", e);
})?;
Ok((
Mutex::new(allocator),
device_memblock_size,
host_memblock_size,
))
}
}
/// To allow us to construct buffers from both a `Device` and `CommandEncoder`
@ -114,7 +154,7 @@ pub(crate) fn create_allocator(
pub(crate) struct DeviceAllocationContext<'a> {
pub(crate) raw: &'a Direct3D12::ID3D12Device,
pub(crate) shared: &'a super::DeviceShared,
pub(crate) mem_allocator: &'a Mutex<Allocator>,
pub(crate) mem_allocator: &'a Allocator,
pub(crate) counters: &'a wgt::HalCounters,
}
@ -248,7 +288,7 @@ impl<'a> DeviceAllocationContext<'a> {
counter.sub(allocation.size() as isize);
if let AllocationInner::Placed { inner } = allocation.inner {
match self.mem_allocator.lock().free(inner) {
match self.mem_allocator.inner.lock().free(inner) {
Ok(_) => (),
// TODO: Don't panic here
Err(e) => panic!("Failed to destroy dx12 {:?}, {e}", allocation.ty),
@ -269,7 +309,7 @@ impl<'a> DeviceAllocationContext<'a> {
) -> Result<(Direct3D12::ID3D12Resource, Allocation), crate::DeviceError> {
let name = desc.label.unwrap_or("Unlabeled buffer");
let mut allocator = self.mem_allocator.lock();
let mut allocator = self.mem_allocator.inner.lock();
let allocation_desc = AllocationCreateDesc {
name,
@ -308,7 +348,7 @@ impl<'a> DeviceAllocationContext<'a> {
) -> Result<(Direct3D12::ID3D12Resource, Allocation), crate::DeviceError> {
let name = desc.label.unwrap_or("Unlabeled texture");
let mut allocator = self.mem_allocator.lock();
let mut allocator = self.mem_allocator.inner.lock();
let allocation_desc = AllocationCreateDesc {
name,
@ -347,7 +387,7 @@ impl<'a> DeviceAllocationContext<'a> {
) -> Result<(Direct3D12::ID3D12Resource, Allocation), crate::DeviceError> {
let name = desc.label.unwrap_or("Unlabeled acceleration structure");
let mut allocator = self.mem_allocator.lock();
let mut allocator = self.mem_allocator.inner.lock();
let allocation_desc = AllocationCreateDesc {
name,
@ -526,7 +566,11 @@ impl<'a> DeviceAllocationContext<'a> {
.GetResourceAllocationInfo(0, std::slice::from_ref(desc))
};
let Some(threshold) = self.shared.memory_budget_thresholds.for_resource_creation else {
let Some(threshold) = self
.mem_allocator
.memory_budget_thresholds
.for_resource_creation
else {
return Ok(allocation_info);
};
@ -552,8 +596,10 @@ impl<'a> DeviceAllocationContext<'a> {
let memblock_size = match location {
MemoryLocation::Unknown => unreachable!(),
MemoryLocation::GpuOnly => self.shared.device_memblock_size,
MemoryLocation::CpuToGpu | MemoryLocation::GpuToCpu => self.shared.host_memblock_size,
MemoryLocation::GpuOnly => self.mem_allocator.device_memblock_size,
MemoryLocation::CpuToGpu | MemoryLocation::GpuToCpu => {
self.mem_allocator.host_memblock_size
}
};
if info.CurrentUsage + allocation_info.SizeInBytes.max(memblock_size)