Unconditionally Generate Bindless Samplers in DX12 (#6766)

This commit is contained in:
Connor Fitzgerald 2025-01-21 14:24:03 -05:00 committed by GitHub
parent 2298cd2dd6
commit 436f716715
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
31 changed files with 1641 additions and 268 deletions

10
Cargo.lock generated
View File

@ -159,6 +159,15 @@ version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
[[package]]
name = "approx"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6"
dependencies = [
"num-traits",
]
[[package]] [[package]]
name = "arbitrary" name = "arbitrary"
version = "1.4.1" version = "1.4.1"
@ -4187,6 +4196,7 @@ name = "wgpu-test"
version = "24.0.0" version = "24.0.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"approx",
"arrayvec", "arrayvec",
"bitflags 2.8.0", "bitflags 2.8.0",
"bytemuck", "bytemuck",

View File

@ -79,6 +79,7 @@ version = "24.0.0"
anyhow = "1.0.95" anyhow = "1.0.95"
argh = "0.1.13" argh = "0.1.13"
arrayvec = "0.7" arrayvec = "0.7"
approx = "0.5"
bincode = "1" bincode = "1"
bit-vec = "0.8" bit-vec = "0.8"
bitflags = "2.7" bitflags = "2.7"

View File

@ -1190,6 +1190,83 @@ impl<W: Write> super::Writer<'_, W> {
Ok(()) Ok(())
} }
/// Writes out the sampler heap declarations if they haven't been written yet.
pub(super) fn write_sampler_heaps(&mut self) -> BackendResult {
if self.wrapped.sampler_heaps {
return Ok(());
}
writeln!(
self.out,
"SamplerState {}[2048]: register(s{}, space{});",
super::writer::SAMPLER_HEAP_VAR,
self.options.sampler_heap_target.standard_samplers.register,
self.options.sampler_heap_target.standard_samplers.space
)?;
writeln!(
self.out,
"SamplerComparisonState {}[2048]: register(s{}, space{});",
super::writer::COMPARISON_SAMPLER_HEAP_VAR,
self.options
.sampler_heap_target
.comparison_samplers
.register,
self.options.sampler_heap_target.comparison_samplers.space
)?;
self.wrapped.sampler_heaps = true;
Ok(())
}
/// Writes out the sampler index buffer declaration if it hasn't been written yet.
pub(super) fn write_wrapped_sampler_buffer(
&mut self,
key: super::SamplerIndexBufferKey,
) -> BackendResult {
// The astute will notice that we do a double hash lookup, but we do this to avoid
// holding a mutable reference to `self` while trying to call `write_sampler_heaps`.
//
// We only pay this double lookup cost when we actually need to write out the sampler
// buffer, which should be not be common.
if self.wrapped.sampler_index_buffers.contains_key(&key) {
return Ok(());
};
self.write_sampler_heaps()?;
// Because the group number can be arbitrary, we use the namer to generate a unique name
// instead of adding it to the reserved name list.
let sampler_array_name = self
.namer
.call(&format!("nagaGroup{}SamplerIndexArray", key.group));
let bind_target = match self.options.sampler_buffer_binding_map.get(&key) {
Some(&bind_target) => bind_target,
None if self.options.fake_missing_bindings => super::BindTarget {
space: u8::MAX,
register: key.group,
binding_array_size: None,
},
None => {
unreachable!("Sampler buffer of group {key:?} not bound to a register");
}
};
writeln!(
self.out,
"StructuredBuffer<uint> {sampler_array_name} : register(t{}, space{});",
bind_target.register, bind_target.space
)?;
self.wrapped
.sampler_index_buffers
.insert(key, sampler_array_name);
Ok(())
}
pub(super) fn write_texture_coordinates( pub(super) fn write_texture_coordinates(
&mut self, &mut self,
kind: &str, kind: &str,

View File

@ -820,6 +820,8 @@ pub const RESERVED: &[&str] = &[
super::writer::FREXP_FUNCTION, super::writer::FREXP_FUNCTION,
super::writer::EXTRACT_BITS_FUNCTION, super::writer::EXTRACT_BITS_FUNCTION,
super::writer::INSERT_BITS_FUNCTION, super::writer::INSERT_BITS_FUNCTION,
super::writer::SAMPLER_HEAP_VAR,
super::writer::COMPARISON_SAMPLER_HEAP_VAR,
]; ];
// DXC scalar types, from https://github.com/microsoft/DirectXShaderCompiler/blob/18c9e114f9c314f93e68fbc72ce207d4ed2e65ae/tools/clang/lib/AST/ASTContextHLSL.cpp#L48-L254 // DXC scalar types, from https://github.com/microsoft/DirectXShaderCompiler/blob/18c9e114f9c314f93e68fbc72ce207d4ed2e65ae/tools/clang/lib/AST/ASTContextHLSL.cpp#L48-L254

View File

@ -92,6 +92,15 @@ float3x2 GetMatmOnBaz(Baz obj) {
We also emit an analogous `Set` function, as well as functions for We also emit an analogous `Set` function, as well as functions for
accessing individual columns by dynamic index. accessing individual columns by dynamic index.
## Sampler Handling
Due to limitations in how sampler heaps work in D3D12, we need to access samplers
through a layer of indirection. Instead of directly binding samplers, we bind the entire
sampler heap as both a standard and a comparison sampler heap. We then use a sampler
index buffer for each bind group. This buffer is accessed in the shader to get the actual
sampler index within the heap. See the wgpu_hal dx12 backend documentation for more
information.
[hlsl]: https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl [hlsl]: https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl
[ilov]: https://gpuweb.github.io/gpuweb/wgsl/#internal-value-layout [ilov]: https://gpuweb.github.io/gpuweb/wgsl/#internal-value-layout
[16bb]: https://github.com/microsoft/DirectXShaderCompiler/wiki/Buffer-Packing#constant-buffer-packing [16bb]: https://github.com/microsoft/DirectXShaderCompiler/wiki/Buffer-Packing#constant-buffer-packing
@ -110,11 +119,14 @@ use thiserror::Error;
use crate::{back, proc}; use crate::{back, proc};
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] #[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serialize", derive(serde::Serialize))] #[cfg_attr(feature = "serialize", derive(serde::Serialize))]
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))] #[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
pub struct BindTarget { pub struct BindTarget {
pub space: u8, pub space: u8,
/// For regular bindings this is the register number.
///
/// For sampler bindings, this is the index to use into the bind group's sampler index buffer.
pub register: u32, pub register: u32,
/// If the binding is an unsized binding array, this overrides the size. /// If the binding is an unsized binding array, this overrides the size.
pub binding_array_size: Option<u32>, pub binding_array_size: Option<u32>,
@ -179,6 +191,43 @@ impl crate::ImageDimension {
} }
} }
#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)]
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
pub struct SamplerIndexBufferKey {
pub group: u32,
}
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
#[cfg_attr(feature = "deserialize", serde(default))]
pub struct SamplerHeapBindTargets {
pub standard_samplers: BindTarget,
pub comparison_samplers: BindTarget,
}
impl Default for SamplerHeapBindTargets {
fn default() -> Self {
Self {
standard_samplers: BindTarget {
space: 0,
register: 0,
binding_array_size: None,
},
comparison_samplers: BindTarget {
space: 1,
register: 0,
binding_array_size: None,
},
}
}
}
// We use a BTreeMap here so that we can hash it.
pub type SamplerIndexBufferBindingMap =
std::collections::BTreeMap<SamplerIndexBufferKey, BindTarget>;
/// Shorthand result used internally by the backend /// Shorthand result used internally by the backend
type BackendResult = Result<(), Error>; type BackendResult = Result<(), Error>;
@ -207,6 +256,10 @@ pub struct Options {
pub special_constants_binding: Option<BindTarget>, pub special_constants_binding: Option<BindTarget>,
/// Bind target of the push constant buffer /// Bind target of the push constant buffer
pub push_constants_target: Option<BindTarget>, pub push_constants_target: Option<BindTarget>,
/// Bind target of the sampler heap and comparison sampler heap.
pub sampler_heap_target: SamplerHeapBindTargets,
/// Mapping of each bind group's sampler index buffer to a bind target.
pub sampler_buffer_binding_map: SamplerIndexBufferBindingMap,
/// Should workgroup variables be zero initialized (by polyfilling)? /// Should workgroup variables be zero initialized (by polyfilling)?
pub zero_initialize_workgroup_memory: bool, pub zero_initialize_workgroup_memory: bool,
/// Should we restrict indexing of vectors, matrices and arrays? /// Should we restrict indexing of vectors, matrices and arrays?
@ -220,6 +273,8 @@ impl Default for Options {
binding_map: BindingMap::default(), binding_map: BindingMap::default(),
fake_missing_bindings: true, fake_missing_bindings: true,
special_constants_binding: None, special_constants_binding: None,
sampler_heap_target: SamplerHeapBindTargets::default(),
sampler_buffer_binding_map: std::collections::BTreeMap::default(),
push_constants_target: None, push_constants_target: None,
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
restrict_indexing: true, restrict_indexing: true,
@ -233,13 +288,13 @@ impl Options {
res_binding: &crate::ResourceBinding, res_binding: &crate::ResourceBinding,
) -> Result<BindTarget, EntryPointError> { ) -> Result<BindTarget, EntryPointError> {
match self.binding_map.get(res_binding) { match self.binding_map.get(res_binding) {
Some(target) => Ok(target.clone()), Some(target) => Ok(*target),
None if self.fake_missing_bindings => Ok(BindTarget { None if self.fake_missing_bindings => Ok(BindTarget {
space: res_binding.group as u8, space: res_binding.group as u8,
register: res_binding.binding, register: res_binding.binding,
binding_array_size: None, binding_array_size: None,
}), }),
None => Err(EntryPointError::MissingBinding(res_binding.clone())), None => Err(EntryPointError::MissingBinding(*res_binding)),
} }
} }
} }
@ -279,6 +334,10 @@ struct Wrapped {
struct_matrix_access: crate::FastHashSet<help::WrappedStructMatrixAccess>, struct_matrix_access: crate::FastHashSet<help::WrappedStructMatrixAccess>,
mat_cx2s: crate::FastHashSet<help::WrappedMatCx2>, mat_cx2s: crate::FastHashSet<help::WrappedMatCx2>,
math: crate::FastHashSet<help::WrappedMath>, math: crate::FastHashSet<help::WrappedMath>,
/// If true, the sampler heaps have been written out.
sampler_heaps: bool,
// Mapping from SamplerIndexBufferKey to the name the namer returned.
sampler_index_buffers: crate::FastHashMap<SamplerIndexBufferKey, String>,
} }
impl Wrapped { impl Wrapped {

View File

@ -24,6 +24,8 @@ pub(crate) const MODF_FUNCTION: &str = "naga_modf";
pub(crate) const FREXP_FUNCTION: &str = "naga_frexp"; pub(crate) const FREXP_FUNCTION: &str = "naga_frexp";
pub(crate) const EXTRACT_BITS_FUNCTION: &str = "naga_extractBits"; pub(crate) const EXTRACT_BITS_FUNCTION: &str = "naga_extractBits";
pub(crate) const INSERT_BITS_FUNCTION: &str = "naga_insertBits"; pub(crate) const INSERT_BITS_FUNCTION: &str = "naga_insertBits";
pub(crate) const SAMPLER_HEAP_VAR: &str = "nagaSamplerHeap";
pub(crate) const COMPARISON_SAMPLER_HEAP_VAR: &str = "nagaComparisonSamplerHeap";
struct EpStructMember { struct EpStructMember {
name: String, name: String,
@ -94,6 +96,16 @@ const fn is_subgroup_builtin_binding(binding: &Option<crate::Binding>) -> bool {
) )
} }
/// Information for how to generate a `binding_array<sampler>` access.
struct BindingArraySamplerInfo {
/// Variable name of the sampler heap
sampler_heap_name: &'static str,
/// Variable name of the sampler index buffer
sampler_index_buffer_name: String,
/// Variable name of the base index _into_ the sampler index buffer
binding_array_base_index_name: String,
}
impl<'a, W: fmt::Write> super::Writer<'a, W> { impl<'a, W: fmt::Write> super::Writer<'a, W> {
pub fn new(out: W, options: &'a Options) -> Self { pub fn new(out: W, options: &'a Options) -> Self {
Self { Self {
@ -143,11 +155,11 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
) { ) {
use crate::Expression; use crate::Expression;
self.need_bake_expressions.clear(); self.need_bake_expressions.clear();
for (fun_handle, expr) in func.expressions.iter() { for (exp_handle, expr) in func.expressions.iter() {
let expr_info = &info[fun_handle]; let expr_info = &info[exp_handle];
let min_ref_count = func.expressions[fun_handle].bake_ref_count(); let min_ref_count = func.expressions[exp_handle].bake_ref_count();
if min_ref_count <= expr_info.ref_count { if min_ref_count <= expr_info.ref_count {
self.need_bake_expressions.insert(fun_handle); self.need_bake_expressions.insert(exp_handle);
} }
if let Expression::Math { fun, arg, .. } = *expr { if let Expression::Math { fun, arg, .. } = *expr {
@ -172,7 +184,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
self.need_bake_expressions.insert(arg); self.need_bake_expressions.insert(arg);
} }
crate::MathFunction::CountLeadingZeros => { crate::MathFunction::CountLeadingZeros => {
let inner = info[fun_handle].ty.inner_with(&module.types); let inner = info[exp_handle].ty.inner_with(&module.types);
if let Some(ScalarKind::Sint) = inner.scalar_kind() { if let Some(ScalarKind::Sint) = inner.scalar_kind() {
self.need_bake_expressions.insert(arg); self.need_bake_expressions.insert(arg);
} }
@ -187,6 +199,14 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
self.need_bake_expressions.insert(expr); self.need_bake_expressions.insert(expr);
} }
} }
if let Expression::GlobalVariable(_) = *expr {
let inner = info[exp_handle].ty.inner_with(&module.types);
if let TypeInner::Sampler { .. } = *inner {
self.need_bake_expressions.insert(exp_handle);
}
}
} }
for statement in func.body.iter() { for statement in func.body.iter() {
match *statement { match *statement {
@ -814,6 +834,18 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
} }
} }
let handle_ty = match *inner {
TypeInner::BindingArray { ref base, .. } => &module.types[*base].inner,
_ => inner,
};
// Samplers are handled entirely differently, so defer entirely to that method.
let is_sampler = matches!(*handle_ty, TypeInner::Sampler { .. });
if is_sampler {
return self.write_global_sampler(module, handle, global);
}
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-variable-register // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-variable-register
let register_ty = match global.space { let register_ty = match global.space {
crate::AddressSpace::Function => unreachable!("Function address space"), crate::AddressSpace::Function => unreachable!("Function address space"),
@ -843,13 +875,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
register register
} }
crate::AddressSpace::Handle => { crate::AddressSpace::Handle => {
let handle_ty = match *inner {
TypeInner::BindingArray { ref base, .. } => &module.types[*base].inner,
_ => inner,
};
let register = match *handle_ty { let register = match *handle_ty {
TypeInner::Sampler { .. } => "s",
// all storage textures are UAV, unconditionally // all storage textures are UAV, unconditionally
TypeInner::Image { TypeInner::Image {
class: crate::ImageClass::Storage { .. }, class: crate::ImageClass::Storage { .. },
@ -956,6 +982,66 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
Ok(()) Ok(())
} }
fn write_global_sampler(
&mut self,
module: &Module,
handle: Handle<crate::GlobalVariable>,
global: &crate::GlobalVariable,
) -> BackendResult {
let binding = *global.binding.as_ref().unwrap();
let key = super::SamplerIndexBufferKey {
group: binding.group,
};
self.write_wrapped_sampler_buffer(key)?;
// This was already validated, so we can confidently unwrap it.
let bt = self.options.resolve_resource_binding(&binding).unwrap();
match module.types[global.ty].inner {
TypeInner::Sampler { comparison } => {
// If we are generating a static access, we create a variable for the sampler.
//
// This prevents the DXIL from containing multiple lookups for the sampler, which
// the backend compiler will then have to eliminate. AMD does seem to be able to
// eliminate these, but better safe than sorry.
write!(self.out, "static const ")?;
self.write_type(module, global.ty)?;
let heap_var = if comparison {
COMPARISON_SAMPLER_HEAP_VAR
} else {
SAMPLER_HEAP_VAR
};
let index_buffer_name = &self.wrapped.sampler_index_buffers[&key];
let name = &self.names[&NameKey::GlobalVariable(handle)];
writeln!(
self.out,
" {name} = {heap_var}[{index_buffer_name}[{register}]];",
register = bt.register
)?;
}
TypeInner::BindingArray { .. } => {
// If we are generating a binding array, we cannot directly access the sampler as the index
// into the sampler index buffer is unknown at compile time. Instead we generate a constant
// that represents the "base" index into the sampler index buffer. This constant is added
// to the user provided index to get the final index into the sampler index buffer.
let name = &self.names[&NameKey::GlobalVariable(handle)];
writeln!(
self.out,
"static const uint {name} = {register};",
register = bt.register
)?;
}
_ => unreachable!(),
};
Ok(())
}
/// Helper method used to write global constants /// Helper method used to write global constants
/// ///
/// # Notes /// # Notes
@ -2670,7 +2756,16 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
}; };
self.write_expr(module, base, func_ctx)?; self.write_expr(module, base, func_ctx)?;
write!(self.out, "[")?;
let array_sampler_info = self.sampler_binding_array_info_from_expression(
module, func_ctx, base, resolved,
);
if let Some(ref info) = array_sampler_info {
write!(self.out, "{}[", info.sampler_heap_name)?;
} else {
write!(self.out, "[")?;
}
let needs_bound_check = self.options.restrict_indexing let needs_bound_check = self.options.restrict_indexing
&& !indexing_binding_array && !indexing_binding_array
@ -2715,7 +2810,17 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
if non_uniform_qualifier { if non_uniform_qualifier {
write!(self.out, "NonUniformResourceIndex(")?; write!(self.out, "NonUniformResourceIndex(")?;
} }
if let Some(ref info) = array_sampler_info {
write!(
self.out,
"{}[{} + ",
info.sampler_index_buffer_name, info.binding_array_base_index_name,
)?;
}
self.write_expr(module, index, func_ctx)?; self.write_expr(module, index, func_ctx)?;
if array_sampler_info.is_some() {
write!(self.out, "]")?;
}
if non_uniform_qualifier { if non_uniform_qualifier {
write!(self.out, ")")?; write!(self.out, ")")?;
} }
@ -2730,43 +2835,6 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
{ {
// do nothing, the chain is written on `Load`/`Store` // do nothing, the chain is written on `Load`/`Store`
} else { } else {
fn write_access<W: fmt::Write>(
writer: &mut super::Writer<'_, W>,
resolved: &TypeInner,
base_ty_handle: Option<Handle<crate::Type>>,
index: u32,
) -> BackendResult {
match *resolved {
// We specifically lift the ValuePointer to this case. While `[0]` is valid
// HLSL for any vector behind a value pointer, FXC completely miscompiles
// it and generates completely nonsensical DXBC.
//
// See https://github.com/gfx-rs/naga/issues/2095 for more details.
TypeInner::Vector { .. } | TypeInner::ValuePointer { .. } => {
// Write vector access as a swizzle
write!(writer.out, ".{}", back::COMPONENTS[index as usize])?
}
TypeInner::Matrix { .. }
| TypeInner::Array { .. }
| TypeInner::BindingArray { .. } => write!(writer.out, "[{index}]")?,
TypeInner::Struct { .. } => {
// This will never panic in case the type is a `Struct`, this is not true
// for other types so we can only check while inside this match arm
let ty = base_ty_handle.unwrap();
write!(
writer.out,
".{}",
&writer.names[&NameKey::StructMember(ty, index)]
)?
}
ref other => {
return Err(Error::Custom(format!("Cannot index {other:?}")))
}
}
Ok(())
}
// We write the matrix column access in a special way since // We write the matrix column access in a special way since
// the type of `base` is our special __matCx2 struct. // the type of `base` is our special __matCx2 struct.
if let Some(MatrixType { if let Some(MatrixType {
@ -2816,8 +2884,60 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
} }
} }
let array_sampler_info = self.sampler_binding_array_info_from_expression(
module, func_ctx, base, resolved,
);
if let Some(ref info) = array_sampler_info {
write!(
self.out,
"{}[{}",
info.sampler_heap_name, info.sampler_index_buffer_name
)?;
}
self.write_expr(module, base, func_ctx)?; self.write_expr(module, base, func_ctx)?;
write_access(self, resolved, base_ty_handle, index)?;
match *resolved {
// We specifically lift the ValuePointer to this case. While `[0]` is valid
// HLSL for any vector behind a value pointer, FXC completely miscompiles
// it and generates completely nonsensical DXBC.
//
// See https://github.com/gfx-rs/naga/issues/2095 for more details.
TypeInner::Vector { .. } | TypeInner::ValuePointer { .. } => {
// Write vector access as a swizzle
write!(self.out, ".{}", back::COMPONENTS[index as usize])?
}
TypeInner::Matrix { .. }
| TypeInner::Array { .. }
| TypeInner::BindingArray { .. } => {
if let Some(ref info) = array_sampler_info {
write!(
self.out,
"[{} + {index}]",
info.binding_array_base_index_name
)?;
} else {
write!(self.out, "[{index}]")?;
}
}
TypeInner::Struct { .. } => {
// This will never panic in case the type is a `Struct`, this is not true
// for other types so we can only check while inside this match arm
let ty = base_ty_handle.unwrap();
write!(
self.out,
".{}",
&self.names[&NameKey::StructMember(ty, index)]
)?
}
ref other => return Err(Error::Custom(format!("Cannot index {other:?}"))),
}
if array_sampler_info.is_some() {
write!(self.out, "]")?;
}
} }
} }
Expression::FunctionArgument(pos) => { Expression::FunctionArgument(pos) => {
@ -2958,13 +3078,30 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
write!(self.out, ".x")?; write!(self.out, ".x")?;
} }
} }
Expression::GlobalVariable(handle) => match module.global_variables[handle].space { Expression::GlobalVariable(handle) => {
crate::AddressSpace::Storage { .. } => {} let global_variable = &module.global_variables[handle];
_ => { let ty = &module.types[global_variable.ty].inner;
// In the case of binding arrays of samplers, we need to not write anything
// as the we are in the wrong position to fully write the expression.
//
// The entire writing is done by AccessIndex.
let is_binding_array_of_samplers = match *ty {
TypeInner::BindingArray { base, .. } => {
let base_ty = &module.types[base].inner;
matches!(*base_ty, TypeInner::Sampler { .. })
}
_ => false,
};
let is_storage_space =
matches!(global_variable.space, crate::AddressSpace::Storage { .. });
if !is_binding_array_of_samplers && !is_storage_space {
let name = &self.names[&NameKey::GlobalVariable(handle)]; let name = &self.names[&NameKey::GlobalVariable(handle)];
write!(self.out, "{name}")?; write!(self.out, "{name}")?;
} }
}, }
Expression::LocalVariable(handle) => { Expression::LocalVariable(handle) => {
write!(self.out, "{}", self.names[&func_ctx.name_key(handle)])? write!(self.out, "{}", self.names[&func_ctx.name_key(handle)])?
} }
@ -3680,6 +3817,52 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
Ok(()) Ok(())
} }
/// Find the [`BindingArraySamplerInfo`] from an expression so that such an access
/// can be generated later.
fn sampler_binding_array_info_from_expression(
&mut self,
module: &Module,
func_ctx: &back::FunctionCtx<'_>,
base: Handle<crate::Expression>,
resolved: &TypeInner,
) -> Option<BindingArraySamplerInfo> {
if let TypeInner::BindingArray {
base: base_ty_handle,
..
} = *resolved
{
let base_ty = &module.types[base_ty_handle].inner;
if let TypeInner::Sampler { comparison, .. } = *base_ty {
let base = &func_ctx.expressions[base];
if let crate::Expression::GlobalVariable(handle) = *base {
let variable = &module.global_variables[handle];
let sampler_heap_name = match comparison {
true => COMPARISON_SAMPLER_HEAP_VAR,
false => SAMPLER_HEAP_VAR,
};
return Some(BindingArraySamplerInfo {
sampler_heap_name,
sampler_index_buffer_name: self
.wrapped
.sampler_index_buffers
.get(&super::SamplerIndexBufferKey {
group: variable.binding.unwrap().group,
})
.unwrap()
.clone(),
binding_array_base_index_name: self.names[&NameKey::GlobalVariable(handle)]
.clone(),
});
}
}
}
None
}
fn write_named_expr( fn write_named_expr(
&mut self, &mut self,
module: &Module, module: &Module,

View File

@ -498,7 +498,7 @@ impl Options {
index: 0, index: 0,
interpolation: None, interpolation: None,
}), }),
None => Err(EntryPointError::MissingBindTarget(res_binding.clone())), None => Err(EntryPointError::MissingBindTarget(*res_binding)),
} }
} }

View File

@ -5323,8 +5323,7 @@ template <typename A>
None => false, None => false,
}; };
if !good { if !good {
ep_error = ep_error = Some(super::EntryPointError::MissingBindTarget(*br));
Some(super::EntryPointError::MissingBindTarget(br.clone()));
break; break;
} }
} }

View File

@ -966,7 +966,7 @@ pub enum Binding {
} }
/// Pipeline binding information for global resources. /// Pipeline binding information for global resources.
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
#[cfg_attr(feature = "serialize", derive(Serialize))] #[cfg_attr(feature = "serialize", derive(Serialize))]
#[cfg_attr(feature = "deserialize", derive(Deserialize))] #[cfg_attr(feature = "deserialize", derive(Deserialize))]
#[cfg_attr(feature = "arbitrary", derive(Arbitrary))] #[cfg_attr(feature = "arbitrary", derive(Arbitrary))]

View File

@ -778,7 +778,7 @@ impl super::Validator {
} }
if let Some(ref bind) = var.binding { if let Some(ref bind) = var.binding {
if !self.ep_resource_bindings.insert(bind.clone()) { if !self.ep_resource_bindings.insert(*bind) {
if self.flags.contains(super::ValidationFlags::BINDINGS) { if self.flags.contains(super::ValidationFlags::BINDINGS) {
return Err(EntryPointError::BindingCollision(var_handle) return Err(EntryPointError::BindingCollision(var_handle)
.with_span_handle(var_handle, &module.global_variables)); .with_span_handle(var_handle, &module.global_variables));

View File

@ -57,6 +57,9 @@
(group: 0, binding: 2): (space: 1, register: 0), (group: 0, binding: 2): (space: 1, register: 0),
}, },
fake_missing_bindings: false, fake_missing_bindings: false,
sampler_buffer_binding_map: {
(group: 0): (space: 2, register: 0),
},
special_constants_binding: Some((space: 0, register: 1)), special_constants_binding: Some((space: 0, register: 1)),
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
restrict_indexing: true restrict_indexing: true

View File

@ -12,8 +12,11 @@ Texture2DArray<float4> texture_array_2darray[5] : register(t0, space2);
Texture2DMS<float4> texture_array_multisampled[5] : register(t0, space3); Texture2DMS<float4> texture_array_multisampled[5] : register(t0, space3);
Texture2D<float> texture_array_depth[5] : register(t0, space4); Texture2D<float> texture_array_depth[5] : register(t0, space4);
RWTexture2D<float4> texture_array_storage[5] : register(u0, space5); RWTexture2D<float4> texture_array_storage[5] : register(u0, space5);
SamplerState samp[5] : register(s0, space6); SamplerState nagaSamplerHeap[2048]: register(s0, space0);
SamplerComparisonState samp_comp[5] : register(s0, space7); SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space255);
static const uint samp = 0;
static const uint samp_comp = 0;
cbuffer uni : register(b0, space8) { UniformIndex uni; } cbuffer uni : register(b0, space8) { UniformIndex uni; }
struct FragmentInput_main { struct FragmentInput_main {
@ -66,22 +69,22 @@ float4 main(FragmentInput_main fragmentinput_main) : SV_Target0
u2_ = (_e27 + NagaDimensions2D(texture_array_unbounded[uniform_index])); u2_ = (_e27 + NagaDimensions2D(texture_array_unbounded[uniform_index]));
uint2 _e32 = u2_; uint2 _e32 = u2_;
u2_ = (_e32 + NagaDimensions2D(texture_array_unbounded[NonUniformResourceIndex(non_uniform_index)])); u2_ = (_e32 + NagaDimensions2D(texture_array_unbounded[NonUniformResourceIndex(non_uniform_index)]));
float4 _e38 = texture_array_bounded[0].Gather(samp[0], uv); float4 _e38 = texture_array_bounded[0].Gather(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv);
float4 _e39 = v4_; float4 _e39 = v4_;
v4_ = (_e39 + _e38); v4_ = (_e39 + _e38);
float4 _e45 = texture_array_bounded[uniform_index].Gather(samp[uniform_index], uv); float4 _e45 = texture_array_bounded[uniform_index].Gather(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv);
float4 _e46 = v4_; float4 _e46 = v4_;
v4_ = (_e46 + _e45); v4_ = (_e46 + _e45);
float4 _e52 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Gather(samp[NonUniformResourceIndex(non_uniform_index)], uv); float4 _e52 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Gather(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv);
float4 _e53 = v4_; float4 _e53 = v4_;
v4_ = (_e53 + _e52); v4_ = (_e53 + _e52);
float4 _e60 = texture_array_depth[0].GatherCmp(samp_comp[0], uv, 0.0); float4 _e60 = texture_array_depth[0].GatherCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + 0]], uv, 0.0);
float4 _e61 = v4_; float4 _e61 = v4_;
v4_ = (_e61 + _e60); v4_ = (_e61 + _e60);
float4 _e68 = texture_array_depth[uniform_index].GatherCmp(samp_comp[uniform_index], uv, 0.0); float4 _e68 = texture_array_depth[uniform_index].GatherCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + uniform_index]], uv, 0.0);
float4 _e69 = v4_; float4 _e69 = v4_;
v4_ = (_e69 + _e68); v4_ = (_e69 + _e68);
float4 _e76 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].GatherCmp(samp_comp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0); float4 _e76 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].GatherCmp(nagaComparisonSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp_comp + non_uniform_index])], uv, 0.0);
float4 _e77 = v4_; float4 _e77 = v4_;
v4_ = (_e77 + _e76); v4_ = (_e77 + _e76);
float4 _e82 = texture_array_unbounded[0].Load(int3(pix, 0)); float4 _e82 = texture_array_unbounded[0].Load(int3(pix, 0));
@ -111,58 +114,58 @@ float4 main(FragmentInput_main fragmentinput_main) : SV_Target0
u1_ = (_e135 + NagaMSNumSamples2D(texture_array_multisampled[uniform_index])); u1_ = (_e135 + NagaMSNumSamples2D(texture_array_multisampled[uniform_index]));
uint _e140 = u1_; uint _e140 = u1_;
u1_ = (_e140 + NagaMSNumSamples2D(texture_array_multisampled[NonUniformResourceIndex(non_uniform_index)])); u1_ = (_e140 + NagaMSNumSamples2D(texture_array_multisampled[NonUniformResourceIndex(non_uniform_index)]));
float4 _e146 = texture_array_bounded[0].Sample(samp[0], uv); float4 _e146 = texture_array_bounded[0].Sample(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv);
float4 _e147 = v4_; float4 _e147 = v4_;
v4_ = (_e147 + _e146); v4_ = (_e147 + _e146);
float4 _e153 = texture_array_bounded[uniform_index].Sample(samp[uniform_index], uv); float4 _e153 = texture_array_bounded[uniform_index].Sample(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv);
float4 _e154 = v4_; float4 _e154 = v4_;
v4_ = (_e154 + _e153); v4_ = (_e154 + _e153);
float4 _e160 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Sample(samp[NonUniformResourceIndex(non_uniform_index)], uv); float4 _e160 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Sample(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv);
float4 _e161 = v4_; float4 _e161 = v4_;
v4_ = (_e161 + _e160); v4_ = (_e161 + _e160);
float4 _e168 = texture_array_bounded[0].SampleBias(samp[0], uv, 0.0); float4 _e168 = texture_array_bounded[0].SampleBias(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv, 0.0);
float4 _e169 = v4_; float4 _e169 = v4_;
v4_ = (_e169 + _e168); v4_ = (_e169 + _e168);
float4 _e176 = texture_array_bounded[uniform_index].SampleBias(samp[uniform_index], uv, 0.0); float4 _e176 = texture_array_bounded[uniform_index].SampleBias(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv, 0.0);
float4 _e177 = v4_; float4 _e177 = v4_;
v4_ = (_e177 + _e176); v4_ = (_e177 + _e176);
float4 _e184 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleBias(samp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0); float4 _e184 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleBias(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv, 0.0);
float4 _e185 = v4_; float4 _e185 = v4_;
v4_ = (_e185 + _e184); v4_ = (_e185 + _e184);
float _e192 = texture_array_depth[0].SampleCmp(samp_comp[0], uv, 0.0); float _e192 = texture_array_depth[0].SampleCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + 0]], uv, 0.0);
float _e193 = v1_; float _e193 = v1_;
v1_ = (_e193 + _e192); v1_ = (_e193 + _e192);
float _e200 = texture_array_depth[uniform_index].SampleCmp(samp_comp[uniform_index], uv, 0.0); float _e200 = texture_array_depth[uniform_index].SampleCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + uniform_index]], uv, 0.0);
float _e201 = v1_; float _e201 = v1_;
v1_ = (_e201 + _e200); v1_ = (_e201 + _e200);
float _e208 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmp(samp_comp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0); float _e208 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmp(nagaComparisonSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp_comp + non_uniform_index])], uv, 0.0);
float _e209 = v1_; float _e209 = v1_;
v1_ = (_e209 + _e208); v1_ = (_e209 + _e208);
float _e216 = texture_array_depth[0].SampleCmpLevelZero(samp_comp[0], uv, 0.0); float _e216 = texture_array_depth[0].SampleCmpLevelZero(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + 0]], uv, 0.0);
float _e217 = v1_; float _e217 = v1_;
v1_ = (_e217 + _e216); v1_ = (_e217 + _e216);
float _e224 = texture_array_depth[uniform_index].SampleCmpLevelZero(samp_comp[uniform_index], uv, 0.0); float _e224 = texture_array_depth[uniform_index].SampleCmpLevelZero(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + uniform_index]], uv, 0.0);
float _e225 = v1_; float _e225 = v1_;
v1_ = (_e225 + _e224); v1_ = (_e225 + _e224);
float _e232 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmpLevelZero(samp_comp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0); float _e232 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmpLevelZero(nagaComparisonSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp_comp + non_uniform_index])], uv, 0.0);
float _e233 = v1_; float _e233 = v1_;
v1_ = (_e233 + _e232); v1_ = (_e233 + _e232);
float4 _e239 = texture_array_bounded[0].SampleGrad(samp[0], uv, uv, uv); float4 _e239 = texture_array_bounded[0].SampleGrad(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv, uv, uv);
float4 _e240 = v4_; float4 _e240 = v4_;
v4_ = (_e240 + _e239); v4_ = (_e240 + _e239);
float4 _e246 = texture_array_bounded[uniform_index].SampleGrad(samp[uniform_index], uv, uv, uv); float4 _e246 = texture_array_bounded[uniform_index].SampleGrad(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv, uv, uv);
float4 _e247 = v4_; float4 _e247 = v4_;
v4_ = (_e247 + _e246); v4_ = (_e247 + _e246);
float4 _e253 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleGrad(samp[NonUniformResourceIndex(non_uniform_index)], uv, uv, uv); float4 _e253 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleGrad(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv, uv, uv);
float4 _e254 = v4_; float4 _e254 = v4_;
v4_ = (_e254 + _e253); v4_ = (_e254 + _e253);
float4 _e261 = texture_array_bounded[0].SampleLevel(samp[0], uv, 0.0); float4 _e261 = texture_array_bounded[0].SampleLevel(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv, 0.0);
float4 _e262 = v4_; float4 _e262 = v4_;
v4_ = (_e262 + _e261); v4_ = (_e262 + _e261);
float4 _e269 = texture_array_bounded[uniform_index].SampleLevel(samp[uniform_index], uv, 0.0); float4 _e269 = texture_array_bounded[uniform_index].SampleLevel(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv, 0.0);
float4 _e270 = v4_; float4 _e270 = v4_;
v4_ = (_e270 + _e269); v4_ = (_e270 + _e269);
float4 _e277 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleLevel(samp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0); float4 _e277 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleLevel(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv, 0.0);
float4 _e278 = v4_; float4 _e278 = v4_;
v4_ = (_e278 + _e277); v4_ = (_e278 + _e277);
float4 _e282 = v4_; float4 _e282 = v4_;

View File

@ -15,8 +15,11 @@ TextureCube<float4> image_cube : register(t5);
TextureCubeArray<float4> image_cube_array : register(t6); TextureCubeArray<float4> image_cube_array : register(t6);
Texture3D<float4> image_3d : register(t7); Texture3D<float4> image_3d : register(t7);
Texture2DMS<float4> image_aa : register(t8); Texture2DMS<float4> image_aa : register(t8);
SamplerState sampler_reg : register(s0, space1); SamplerState nagaSamplerHeap[2048]: register(s0, space0);
SamplerComparisonState sampler_cmp : register(s1, space1); SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
StructuredBuffer<uint> nagaGroup1SamplerIndexArray : register(t1, space255);
static const SamplerState sampler_reg = nagaSamplerHeap[nagaGroup1SamplerIndexArray[0]];
static const SamplerComparisonState sampler_cmp = nagaComparisonSamplerHeap[nagaGroup1SamplerIndexArray[1]];
Texture2D<float> image_2d_depth : register(t2, space1); Texture2D<float> image_2d_depth : register(t2, space1);
Texture2DArray<float> image_2d_array_depth : register(t3, space1); Texture2DArray<float> image_2d_array_depth : register(t3, space1);
TextureCube<float> image_cube_depth : register(t4, space1); TextureCube<float> image_cube_depth : register(t4, space1);

View File

@ -6,7 +6,10 @@ struct VertexOutput {
static const float c_scale = 1.2; static const float c_scale = 1.2;
Texture2D<float4> u_texture : register(t0); Texture2D<float4> u_texture : register(t0);
SamplerState u_sampler : register(s1); SamplerState nagaSamplerHeap[2048]: register(s0, space0);
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space255);
static const SamplerState u_sampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[1]];
struct VertexOutput_vert_main { struct VertexOutput_vert_main {
float2 uv_2 : LOC0; float2 uv_2 : LOC0;

View File

@ -28,7 +28,10 @@ cbuffer u_entity : register(b0, space1) { Entity u_entity; }
ByteAddressBuffer s_lights : register(t1); ByteAddressBuffer s_lights : register(t1);
cbuffer u_lights : register(b1) { Light u_lights[10]; } cbuffer u_lights : register(b1) { Light u_lights[10]; }
Texture2DArray<float> t_shadow : register(t2); Texture2DArray<float> t_shadow : register(t2);
SamplerComparisonState sampler_shadow : register(s3); SamplerState nagaSamplerHeap[2048]: register(s0, space0);
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space255);
static const SamplerComparisonState sampler_shadow = nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[3]];
struct VertexOutput_vs_main { struct VertexOutput_vs_main {
float3 world_normal : LOC0; float3 world_normal : LOC0;

View File

@ -17,7 +17,10 @@ struct Data {
cbuffer r_data : register(b0) { Data r_data; } cbuffer r_data : register(b0) { Data r_data; }
TextureCube<float4> r_texture : register(t0); TextureCube<float4> r_texture : register(t0);
SamplerState r_sampler : register(s0, space1); SamplerState nagaSamplerHeap[2048]: register(s0, space0);
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space2);
static const SamplerState r_sampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[0]];
struct VertexOutput_vs_main { struct VertexOutput_vs_main {
float3 uv : LOC0; float3 uv : LOC0;

View File

@ -1,5 +1,8 @@
Texture2D<float4> Texture : register(t0); Texture2D<float4> Texture : register(t0);
SamplerState Sampler : register(s1); SamplerState nagaSamplerHeap[2048]: register(s0, space0);
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space255);
static const SamplerState Sampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[1]];
float4 test(Texture2D<float4> Passed_Texture, SamplerState Passed_Sampler) float4 test(Texture2D<float4> Passed_Texture, SamplerState Passed_Sampler)
{ {

View File

@ -27,6 +27,7 @@ webgl = ["wgpu/webgl"]
[dependencies] [dependencies]
anyhow.workspace = true anyhow.workspace = true
arrayvec.workspace = true arrayvec.workspace = true
approx.workspace = true
bitflags.workspace = true bitflags.workspace = true
bytemuck.workspace = true bytemuck.workspace = true
cfg-if.workspace = true cfg-if.workspace = true

View File

@ -83,6 +83,16 @@ pub fn fail_if<T>(
} }
} }
/// Returns true if the provided callback fails validation.
pub fn did_fail<T>(device: &wgpu::Device, callback: impl FnOnce() -> T) -> (bool, T) {
device.push_error_scope(wgpu::ErrorFilter::Validation);
let result = callback();
let validation_error = pollster::block_on(device.pop_error_scope());
let failed = validation_error.is_some();
(failed, result)
}
/// Adds the necissary main function for our gpu test harness. /// Adds the necissary main function for our gpu test harness.
#[macro_export] #[macro_export]
macro_rules! gpu_test_main { macro_rules! gpu_test_main {

View File

@ -44,6 +44,7 @@ mod ray_tracing;
mod render_pass_ownership; mod render_pass_ownership;
mod resource_descriptor_accessor; mod resource_descriptor_accessor;
mod resource_error; mod resource_error;
mod samplers;
mod scissor_tests; mod scissor_tests;
mod shader; mod shader;
mod shader_primitive_index; mod shader_primitive_index;

543
tests/tests/samplers.rs Normal file
View File

@ -0,0 +1,543 @@
//! D3D12 samplers are fun and we're doing a decent amount of polyfilling with them.
//!
//! Do some tests to ensure things are working correctly and nothing gets mad.
use wgpu_test::{did_fail, gpu_test, valid, GpuTestConfiguration, TestParameters, TestingContext};
// A number large enough to likely cause sampler caches to run out of space
// on some devices.
const PROBABLY_PROBLEMATIC_SAMPLER_COUNT: u32 = 8 * 1024;
#[gpu_test]
static SAMPLER_DEDUPLICATION: GpuTestConfiguration =
GpuTestConfiguration::new().run_sync(sampler_deduplication);
// Create a large number of samplers from the same two descriptors.
//
// Sampler deduplication in the backend should ensure this doesn't cause any issues.
fn sampler_deduplication(ctx: TestingContext) {
// Create 2 different sampler descriptors
let desc1 = wgpu::SamplerDescriptor {
label: Some("sampler1"),
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Nearest,
min_filter: wgpu::FilterMode::Nearest,
mipmap_filter: wgpu::FilterMode::Nearest,
lod_min_clamp: 0.0,
lod_max_clamp: 100.0,
compare: None,
anisotropy_clamp: 1,
border_color: None,
};
let desc2 = wgpu::SamplerDescriptor {
label: Some("sampler2"),
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Linear,
mipmap_filter: wgpu::FilterMode::Linear,
lod_min_clamp: 0.0,
lod_max_clamp: 100.0,
compare: None,
anisotropy_clamp: 1,
border_color: None,
};
// Now create a bunch of samplers with these descriptors
let samplers = (0..PROBABLY_PROBLEMATIC_SAMPLER_COUNT)
.map(|i| {
let desc = if i % 2 == 0 { &desc1 } else { &desc2 };
valid(&ctx.device, || ctx.device.create_sampler(desc))
})
.collect::<Vec<_>>();
drop(samplers);
}
#[gpu_test]
static SAMPLER_CREATION_FAILURE: GpuTestConfiguration =
GpuTestConfiguration::new().run_sync(sampler_creation_failure);
/// We want to test that sampler creation properly fails when we hit internal sampler
/// cache limits. As we don't actually know what the limit is, we first create as many
/// samplers as we can until we get the first failure.
///
/// This failure being caught ensures that the error catching machinery on samplers
/// is working as expected.
///
/// We then clear all samplers and poll the device, which should leave the caches
/// completely empty.
///
/// We then try to create the same number of samplers to ensure the cache was entirely
/// cleared.
fn sampler_creation_failure(ctx: TestingContext) {
let desc = wgpu::SamplerDescriptor {
label: Some("sampler1"),
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Nearest,
min_filter: wgpu::FilterMode::Nearest,
mipmap_filter: wgpu::FilterMode::Nearest,
lod_min_clamp: 0.0,
lod_max_clamp: 100.0,
compare: None,
anisotropy_clamp: 1,
border_color: None,
};
let mut sampler_storage = Vec::with_capacity(PROBABLY_PROBLEMATIC_SAMPLER_COUNT as usize);
for i in 0..PROBABLY_PROBLEMATIC_SAMPLER_COUNT {
let (failed, sampler) = did_fail(&ctx.device, || {
ctx.device.create_sampler(&wgpu::SamplerDescriptor {
lod_min_clamp: i as f32 * 0.01,
..desc
})
});
if failed {
break;
}
sampler_storage.push(sampler);
}
let failed_count = sampler_storage.len();
sampler_storage.clear();
ctx.device.poll(wgpu::Maintain::Wait);
for i in 0..failed_count {
valid(&ctx.device, || {
eprintln!("Trying to create sampler {}", i);
let sampler = ctx.device.create_sampler(&wgpu::SamplerDescriptor {
lod_min_clamp: i as f32 * 0.01,
// Change the max clamp to ensure the sampler is using different cache slots from
// the previous run.
lod_max_clamp: 200.0,
..desc
});
sampler_storage.push(sampler);
});
}
}
const SINGLE_GROUP_BINDINGS: &str = r#"
@group(0) @binding(0) var texture: texture_2d<f32>;
@group(0) @binding(1) var sampler0: sampler;
@group(0) @binding(2) var sampler1: sampler;
@group(0) @binding(3) var sampler2: sampler;
@group(1) @binding(0) var<storage, read_write> results: array<vec4f, 3>;
"#;
const MULTI_GROUP_BINDINGS: &str = r#"
@group(0) @binding(0) var texture: texture_2d<f32>;
@group(0) @binding(1) var sampler0: sampler;
@group(1) @binding(0) var sampler1: sampler;
@group(2) @binding(0) var sampler2: sampler;
@group(3) @binding(0) var<storage, read_write> results: array<vec4f, 3>;
"#;
const SAMPLER_CODE: &str = r#"
@compute @workgroup_size(1, 1, 1)
fn cs_main() {
// When sampling a 2x2 texture at the bottom left, we can change the address mode
// on S/T to get different values. This allows us to make sure the right sampler
// is being used.
results[0] = textureSampleLevel(texture, sampler0, vec2f(0.0, 1.0), 0.0);
results[1] = textureSampleLevel(texture, sampler1, vec2f(0.0, 1.0), 0.0);
results[2] = textureSampleLevel(texture, sampler2, vec2f(0.0, 1.0), 0.0);
}
"#;
enum GroupType {
Single,
Multi,
}
#[gpu_test]
static SAMPLER_SINGLE_BIND_GROUP: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.test_features_limits()
// In OpenGL textures cannot be used with multiple samplers.
.skip(wgpu_test::FailureCase::backend(wgpu::Backends::GL)),
)
.run_sync(|ctx| sampler_bind_group(ctx, GroupType::Single));
#[gpu_test]
static SAMPLER_MULTI_BIND_GROUP: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.test_features_limits()
// In OpenGL textures cannot be used with multiple samplers.
.skip(wgpu_test::FailureCase::backend(wgpu::Backends::GL)),
)
.run_sync(|ctx| sampler_bind_group(ctx, GroupType::Multi));
fn sampler_bind_group(ctx: TestingContext, group_type: GroupType) {
let bindings = match group_type {
GroupType::Single => SINGLE_GROUP_BINDINGS,
GroupType::Multi => MULTI_GROUP_BINDINGS,
};
let full_shader = format!("{}\n{}", bindings, SAMPLER_CODE);
let module = ctx
.device
.create_shader_module(wgpu::ShaderModuleDescriptor {
source: wgpu::ShaderSource::Wgsl(full_shader.into()),
label: None,
});
let mut bind_group_layouts = Vec::new();
match group_type {
GroupType::Single => {
let bgl = ctx
.device
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("combination_bgl"),
entries: &[
wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Texture {
sample_type: wgpu::TextureSampleType::Float { filterable: true },
view_dimension: wgpu::TextureViewDimension::D2,
multisampled: false,
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 2,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 3,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
count: None,
},
],
});
bind_group_layouts.push(bgl);
}
GroupType::Multi => {
let bgl0 = ctx
.device
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("multiple_bgl0"),
entries: &[
wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Texture {
sample_type: wgpu::TextureSampleType::Float { filterable: true },
view_dimension: wgpu::TextureViewDimension::D2,
multisampled: false,
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
count: None,
},
],
});
let bgl1 = ctx
.device
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("multiple_bgl1"),
entries: &[wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
count: None,
}],
});
let bgl2 = ctx
.device
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("multiple_bgl2"),
entries: &[wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
count: None,
}],
});
bind_group_layouts.push(bgl0);
bind_group_layouts.push(bgl1);
bind_group_layouts.push(bgl2);
}
}
let output_bgl = ctx
.device
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("output_bgl"),
entries: &[wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: false },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
}],
});
let mut bgl_references: Vec<_> = bind_group_layouts.iter().collect();
bgl_references.push(&output_bgl);
let pipeline_layout = ctx
.device
.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: Some("pipeline_layout"),
bind_group_layouts: &bgl_references,
push_constant_ranges: &[],
});
let input_image = ctx.device.create_texture(&wgpu::TextureDescriptor {
label: Some("input_image"),
size: wgpu::Extent3d {
width: 2,
height: 2,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8Unorm,
usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
view_formats: &[],
});
let input_image_view = input_image.create_view(&wgpu::TextureViewDescriptor::default());
let image_data: [u8; 16] = [
255, 0, 0, 255, /* */ 0, 255, 0, 255, //
0, 0, 255, 255, /* */ 255, 255, 255, 255, //
];
ctx.queue.write_texture(
wgpu::TexelCopyTextureInfo {
texture: &input_image,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
&image_data,
wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(8),
rows_per_image: None,
},
wgpu::Extent3d {
width: 2,
height: 2,
depth_or_array_layers: 1,
},
);
let address_modes = [
(
wgpu::AddressMode::ClampToEdge,
wgpu::AddressMode::ClampToEdge,
),
(wgpu::AddressMode::Repeat, wgpu::AddressMode::ClampToEdge),
(wgpu::AddressMode::ClampToEdge, wgpu::AddressMode::Repeat),
];
let samplers = address_modes.map(|(address_mode_u, address_mode_v)| {
ctx.device.create_sampler(&wgpu::SamplerDescriptor {
label: None,
address_mode_u,
address_mode_v,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Linear,
mipmap_filter: wgpu::FilterMode::Nearest,
lod_min_clamp: 0.0,
lod_max_clamp: 100.0,
compare: None,
anisotropy_clamp: 1,
border_color: None,
})
});
let mut bind_groups = Vec::new();
match group_type {
GroupType::Single => {
let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("combination_bg"),
layout: &bind_group_layouts[0],
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::TextureView(&input_image_view),
},
wgpu::BindGroupEntry {
binding: 1,
resource: wgpu::BindingResource::Sampler(&samplers[0]),
},
wgpu::BindGroupEntry {
binding: 2,
resource: wgpu::BindingResource::Sampler(&samplers[1]),
},
wgpu::BindGroupEntry {
binding: 3,
resource: wgpu::BindingResource::Sampler(&samplers[2]),
},
],
});
bind_groups.push(bg);
}
GroupType::Multi => {
let bg0 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("multiple_bg0"),
layout: &bind_group_layouts[0],
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::TextureView(&input_image_view),
},
wgpu::BindGroupEntry {
binding: 1,
resource: wgpu::BindingResource::Sampler(&samplers[0]),
},
],
});
let bg1 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("multiple_bg1"),
layout: &bind_group_layouts[1],
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::Sampler(&samplers[1]),
}],
});
let bg2 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("multiple_bg2"),
layout: &bind_group_layouts[2],
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::Sampler(&samplers[2]),
}],
});
bind_groups.push(bg0);
bind_groups.push(bg1);
bind_groups.push(bg2);
}
}
let output_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("output_buffer"),
size: 48,
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
mapped_at_creation: false,
});
let transfer_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("transfer_buffer"),
size: 48,
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
mapped_at_creation: false,
});
let output_bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("output_bg"),
layout: &output_bgl,
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
buffer: &output_buffer,
offset: 0,
size: None,
}),
}],
});
let mut bg_references = bind_groups.iter().collect::<Vec<_>>();
bg_references.push(&output_bg);
let pipeline = ctx
.device
.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
label: Some("pipeline"),
layout: Some(&pipeline_layout),
module: &module,
entry_point: Some("cs_main"),
cache: None,
compilation_options: Default::default(),
});
let mut encoder = ctx
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("encoder"),
});
{
let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
label: None,
timestamp_writes: None,
});
cpass.set_pipeline(&pipeline);
for (i, &bg) in bg_references.iter().enumerate() {
cpass.set_bind_group(i as u32, bg, &[]);
}
cpass.dispatch_workgroups(1, 1, 1);
}
encoder.copy_buffer_to_buffer(&output_buffer, 0, &transfer_buffer, 0, 48);
ctx.queue.submit([encoder.finish()]);
let buffer_slice = transfer_buffer.slice(..);
buffer_slice.map_async(wgpu::MapMode::Read, |_| {});
ctx.device.poll(wgpu::Maintain::Wait);
let buffer_data = buffer_slice.get_mapped_range();
let f32_buffer: &[f32] = bytemuck::cast_slice(&buffer_data);
let correct_values: [f32; 12] = [
0.0, 0.0, 1.0, 1.0, //
0.5, 0.5, 1.0, 1.0, //
0.5, 0.0, 0.5, 1.0, //
];
let iter = f32_buffer.iter().zip(correct_values.iter());
for (&result, &value) in iter {
approx::assert_relative_eq!(result, value, max_relative = 0.02);
}
}

View File

@ -1521,9 +1521,9 @@ impl Device {
}; };
for (_, var) in module.global_variables.iter() { for (_, var) in module.global_variables.iter() {
match var.binding { match var.binding {
Some(ref br) if br.group >= self.limits.max_bind_groups => { Some(br) if br.group >= self.limits.max_bind_groups => {
return Err(pipeline::CreateShaderModuleError::InvalidGroupIndex { return Err(pipeline::CreateShaderModuleError::InvalidGroupIndex {
bind: br.clone(), bind: br,
group: br.group, group: br.group,
limit: self.limits.max_bind_groups, limit: self.limits.max_bind_groups,
}); });

View File

@ -921,7 +921,7 @@ impl Interface {
let mut resource_mapping = FastHashMap::default(); let mut resource_mapping = FastHashMap::default();
for (var_handle, var) in module.global_variables.iter() { for (var_handle, var) in module.global_variables.iter() {
let bind = match var.binding { let bind = match var.binding {
Some(ref br) => br.clone(), Some(br) => br,
_ => continue, _ => continue,
}; };
let naga_ty = &module.types[var.ty].inner; let naga_ty = &module.types[var.ty].inner;
@ -1058,7 +1058,7 @@ impl Interface {
BindingLayoutSource::Provided(layouts) => { BindingLayoutSource::Provided(layouts) => {
// update the required binding size for this buffer // update the required binding size for this buffer
if let ResourceType::Buffer { size } = res.ty { if let ResourceType::Buffer { size } = res.ty {
match shader_binding_sizes.entry(res.bind.clone()) { match shader_binding_sizes.entry(res.bind) {
Entry::Occupied(e) => { Entry::Occupied(e) => {
*e.into_mut() = size.max(*e.get()); *e.into_mut() = size.max(*e.get());
} }
@ -1118,7 +1118,7 @@ impl Interface {
} }
}; };
if let Err(error) = result { if let Err(error) = result {
return Err(StageError::Binding(res.bind.clone(), error)); return Err(StageError::Binding(res.bind, error));
} }
} }
@ -1159,8 +1159,8 @@ impl Interface {
if let Some(error) = error { if let Some(error) = error {
return Err(StageError::Filtering { return Err(StageError::Filtering {
texture: texture_bind.clone(), texture: *texture_bind,
sampler: sampler_bind.clone(), sampler: *sampler_bind,
error, error,
}); });
} }

View File

@ -77,6 +77,7 @@ dx12 = [
"dep:libloading", "dep:libloading",
"dep:range-alloc", "dep:range-alloc",
"dep:windows-core", "dep:windows-core",
"dep:ordered-float",
"gpu-allocator/d3d12", "gpu-allocator/d3d12",
"naga/hlsl-out-if-target-windows", "naga/hlsl-out-if-target-windows",
"windows/Win32_Graphics_Direct3D_Fxc", "windows/Win32_Graphics_Direct3D_Fxc",

View File

@ -154,6 +154,11 @@ impl super::Adapter {
} }
.unwrap(); .unwrap();
if options.ResourceHeapTier == Direct3D12::D3D12_RESOURCE_HEAP_TIER_1 {
// We require Tier 2 for the ability to make samplers bindless in all cases.
return None;
}
let _depth_bounds_test_supported = { let _depth_bounds_test_supported = {
let mut features2 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS2::default(); let mut features2 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS2::default();
unsafe { unsafe {
@ -195,6 +200,23 @@ impl super::Adapter {
.is_ok() .is_ok()
}; };
let mut max_sampler_descriptor_heap_size =
Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE;
{
let mut features19 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS19::default();
let res = unsafe {
device.CheckFeatureSupport(
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS19,
<*mut _>::cast(&mut features19),
size_of_val(&features19) as u32,
)
};
if res.is_ok() {
max_sampler_descriptor_heap_size = features19.MaxSamplerDescriptorHeapSize;
}
};
let shader_model = if dxc_container.is_none() { let shader_model = if dxc_container.is_none() {
naga::back::hlsl::ShaderModel::V5_1 naga::back::hlsl::ShaderModel::V5_1
} else { } else {
@ -260,6 +282,7 @@ impl super::Adapter {
// See https://github.com/gfx-rs/wgpu/issues/3552 // See https://github.com/gfx-rs/wgpu/issues/3552
suballocation_supported: !info.name.contains("Iris(R) Xe"), suballocation_supported: !info.name.contains("Iris(R) Xe"),
shader_model, shader_model,
max_sampler_descriptor_heap_size,
}; };
// Theoretically vram limited, but in practice 2^20 is the limit // Theoretically vram limited, but in practice 2^20 is the limit

View File

@ -85,7 +85,7 @@ impl super::CommandEncoder {
unsafe { unsafe {
list.SetDescriptorHeaps(&[ list.SetDescriptorHeaps(&[
Some(self.shared.heap_views.raw.clone()), Some(self.shared.heap_views.raw.clone()),
Some(self.shared.heap_samplers.raw.clone()), Some(self.shared.sampler_heap.heap().clone()),
]) ])
}; };
} }
@ -241,6 +241,21 @@ impl super::CommandEncoder {
(Pk::Transfer, _) => (), (Pk::Transfer, _) => (),
} }
} }
super::RootElement::SamplerHeap => match self.pass.kind {
Pk::Render => unsafe {
list.SetGraphicsRootDescriptorTable(
index,
self.shared.sampler_heap.gpu_descriptor_table(),
)
},
Pk::Compute => unsafe {
list.SetComputeRootDescriptorTable(
index,
self.shared.sampler_heap.gpu_descriptor_table(),
)
},
Pk::Transfer => (),
},
} }
} }
} }
@ -254,6 +269,9 @@ impl super::CommandEncoder {
other: 0, other: 0,
}; };
} }
if let Some(root_index) = layout.sampler_heap_root_index {
self.pass.root_elements[root_index as usize] = super::RootElement::SamplerHeap;
}
self.pass.layout = layout.clone(); self.pass.layout = layout.clone();
self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1; self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1;
} }
@ -907,13 +925,6 @@ impl crate::CommandEncoder for super::CommandEncoder {
root_index += 1; root_index += 1;
} }
// Bind Sampler descriptor tables.
if info.tables.contains(super::TableTypes::SAMPLERS) {
self.pass.root_elements[root_index] =
super::RootElement::Table(group.handle_samplers.unwrap().gpu);
root_index += 1;
}
// Bind root descriptors // Bind root descriptors
for ((&kind, &gpu_base), &offset) in info for ((&kind, &gpu_base), &offset) in info
.dynamic_buffers .dynamic_buffers

View File

@ -284,6 +284,11 @@ impl CpuHeap {
} }
pub(super) fn at(&self, index: u32) -> Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE { pub(super) fn at(&self, index: u32) -> Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE {
debug_assert!(
index < self.total,
"Index ({index}) out of bounds {total}",
total = self.total
);
Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE { Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE {
ptr: self.start.ptr + (self.handle_size * index) as usize, ptr: self.start.ptr + (self.handle_size * index) as usize,
} }

View File

@ -1,4 +1,5 @@
use std::{ use std::{
borrow::Cow,
ffi, ffi,
mem::{self, size_of, size_of_val}, mem::{self, size_of, size_of_val},
num::NonZeroU32, num::NonZeroU32,
@ -100,7 +101,6 @@ impl super::Device {
// maximum number of CBV/SRV/UAV descriptors in heap for Tier 1 // maximum number of CBV/SRV/UAV descriptors in heap for Tier 1
let capacity_views = limits.max_non_sampler_bindings as u64; let capacity_views = limits.max_non_sampler_bindings as u64;
let capacity_samplers = 2_048;
let shared = super::DeviceShared { let shared = super::DeviceShared {
zero_buffer, zero_buffer,
@ -141,11 +141,7 @@ impl super::Device {
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
capacity_views, capacity_views,
)?, )?,
heap_samplers: descriptor::GeneralHeap::new( sampler_heap: super::sampler::SamplerHeap::new(&raw, &private_caps)?,
&raw,
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
capacity_samplers,
)?,
}; };
let mut rtv_pool = let mut rtv_pool =
@ -188,10 +184,6 @@ impl super::Device {
raw.clone(), raw.clone(),
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
)), )),
sampler_pool: Mutex::new(descriptor::CpuPool::new(
raw,
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
)),
library: Arc::clone(library), library: Arc::clone(library),
#[cfg(feature = "renderdoc")] #[cfg(feature = "renderdoc")]
render_doc: Default::default(), render_doc: Default::default(),
@ -678,8 +670,6 @@ impl crate::Device for super::Device {
&self, &self,
desc: &crate::SamplerDescriptor, desc: &crate::SamplerDescriptor,
) -> Result<super::Sampler, crate::DeviceError> { ) -> Result<super::Sampler, crate::DeviceError> {
let handle = self.sampler_pool.lock().alloc_handle()?;
let reduction = match desc.compare { let reduction = match desc.compare {
Some(_) => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_COMPARISON, Some(_) => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_COMPARISON,
None => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_STANDARD, None => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_STANDARD,
@ -697,34 +687,39 @@ impl crate::Device for super::Device {
let border_color = conv::map_border_color(desc.border_color); let border_color = conv::map_border_color(desc.border_color);
unsafe { let raw_desc = Direct3D12::D3D12_SAMPLER_DESC {
self.raw.CreateSampler( Filter: filter,
&Direct3D12::D3D12_SAMPLER_DESC { AddressU: conv::map_address_mode(desc.address_modes[0]),
Filter: filter, AddressV: conv::map_address_mode(desc.address_modes[1]),
AddressU: conv::map_address_mode(desc.address_modes[0]), AddressW: conv::map_address_mode(desc.address_modes[2]),
AddressV: conv::map_address_mode(desc.address_modes[1]), MipLODBias: 0f32,
AddressW: conv::map_address_mode(desc.address_modes[2]), MaxAnisotropy: desc.anisotropy_clamp as u32,
MipLODBias: 0f32,
MaxAnisotropy: desc.anisotropy_clamp as u32,
ComparisonFunc: conv::map_comparison( ComparisonFunc: conv::map_comparison(
desc.compare.unwrap_or(wgt::CompareFunction::Always), desc.compare.unwrap_or(wgt::CompareFunction::Always),
), ),
BorderColor: border_color, BorderColor: border_color,
MinLOD: desc.lod_clamp.start, MinLOD: desc.lod_clamp.start,
MaxLOD: desc.lod_clamp.end, MaxLOD: desc.lod_clamp.end,
},
handle.raw,
)
}; };
let index = self
.shared
.sampler_heap
.create_sampler(&self.raw, raw_desc)?;
self.counters.samplers.add(1); self.counters.samplers.add(1);
Ok(super::Sampler { handle }) Ok(super::Sampler {
index,
desc: raw_desc,
})
} }
unsafe fn destroy_sampler(&self, sampler: super::Sampler) { unsafe fn destroy_sampler(&self, sampler: super::Sampler) {
self.sampler_pool.lock().free_handle(sampler.handle); self.shared
.sampler_heap
.destroy_sampler(sampler.desc, sampler.index);
self.counters.samplers.sub(1); self.counters.samplers.sub(1);
} }
@ -763,12 +758,8 @@ impl crate::Device for super::Device {
&self, &self,
desc: &crate::BindGroupLayoutDescriptor, desc: &crate::BindGroupLayoutDescriptor,
) -> Result<super::BindGroupLayout, crate::DeviceError> { ) -> Result<super::BindGroupLayout, crate::DeviceError> {
let ( let mut num_views = 0;
mut num_buffer_views, let mut has_sampler_in_group = false;
mut num_samplers,
mut num_texture_views,
mut num_acceleration_structures,
) = (0, 0, 0, 0);
for entry in desc.entries.iter() { for entry in desc.entries.iter() {
let count = entry.count.map_or(1, NonZeroU32::get); let count = entry.count.map_or(1, NonZeroU32::get);
match entry.ty { match entry.ty {
@ -776,18 +767,20 @@ impl crate::Device for super::Device {
has_dynamic_offset: true, has_dynamic_offset: true,
.. ..
} => {} } => {}
wgt::BindingType::Buffer { .. } => num_buffer_views += count, wgt::BindingType::Buffer { .. }
wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { | wgt::BindingType::Texture { .. }
num_texture_views += count | wgt::BindingType::StorageTexture { .. }
} | wgt::BindingType::AccelerationStructure => num_views += count,
wgt::BindingType::Sampler { .. } => num_samplers += count, wgt::BindingType::Sampler { .. } => has_sampler_in_group = true,
wgt::BindingType::AccelerationStructure => num_acceleration_structures += count,
} }
} }
if has_sampler_in_group {
num_views += 1;
}
self.counters.bind_group_layouts.add(1); self.counters.bind_group_layouts.add(1);
let num_views = num_buffer_views + num_texture_views + num_acceleration_structures;
Ok(super::BindGroupLayout { Ok(super::BindGroupLayout {
entries: desc.entries.to_vec(), entries: desc.entries.to_vec(),
cpu_heap_views: if num_views != 0 { cpu_heap_views: if num_views != 0 {
@ -800,17 +793,7 @@ impl crate::Device for super::Device {
} else { } else {
None None
}, },
cpu_heap_samplers: if num_samplers != 0 { copy_counts: vec![1; num_views as usize],
let heap = descriptor::CpuHeap::new(
&self.raw,
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
num_samplers,
)?;
Some(heap)
} else {
None
},
copy_counts: vec![1; num_views.max(num_samplers) as usize],
}) })
} }
@ -841,12 +824,15 @@ impl crate::Device for super::Device {
// ... // ...
// (bind group [0]) - Space=0 // (bind group [0]) - Space=0
// View descriptor table, if any // View descriptor table, if any
// Sampler descriptor table, if any // Sampler buffer descriptor table, if any
// Root descriptors (for dynamic offset buffers) // Root descriptors (for dynamic offset buffers)
// (bind group [1]) - Space=0 // (bind group [1]) - Space=0
// ... // ...
// (bind group [2]) - Space=0 // (bind group [2]) - Space=0
// Special constant buffer: Space=0 // Special constant buffer: Space=0
// Sampler descriptor tables: Space=0
// SamplerState Array: Space=0, Register=0-2047
// SamplerComparisonState Array: Space=0, Register=2048-4095
//TODO: put lower bind group indices further down the root signature. See: //TODO: put lower bind group indices further down the root signature. See:
// https://microsoft.github.io/DirectX-Specs/d3d/ResourceBinding.html#binding-model // https://microsoft.github.io/DirectX-Specs/d3d/ResourceBinding.html#binding-model
@ -854,12 +840,10 @@ impl crate::Device for super::Device {
// on Vulkan-like layout compatibility rules. // on Vulkan-like layout compatibility rules.
let mut binding_map = hlsl::BindingMap::default(); let mut binding_map = hlsl::BindingMap::default();
let (mut bind_cbv, mut bind_srv, mut bind_uav, mut bind_sampler) = ( let mut sampler_buffer_binding_map = hlsl::SamplerIndexBufferBindingMap::default();
hlsl::BindTarget::default(), let mut bind_cbv = hlsl::BindTarget::default();
hlsl::BindTarget::default(), let mut bind_srv = hlsl::BindTarget::default();
hlsl::BindTarget::default(), let mut bind_uav = hlsl::BindTarget::default();
hlsl::BindTarget::default(),
);
let mut parameters = Vec::new(); let mut parameters = Vec::new();
let mut push_constants_target = None; let mut push_constants_target = None;
let mut root_constant_info = None; let mut root_constant_info = None;
@ -886,7 +870,7 @@ impl crate::Device for super::Device {
}, },
ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL, ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL,
}); });
let binding = bind_cbv.clone(); let binding = bind_cbv;
bind_cbv.register += 1; bind_cbv.register += 1;
root_constant_info = Some(super::RootConstantInfo { root_constant_info = Some(super::RootConstantInfo {
root_index: parameter_index as u32, root_index: parameter_index as u32,
@ -900,19 +884,34 @@ impl crate::Device for super::Device {
// Collect the whole number of bindings we will create upfront. // Collect the whole number of bindings we will create upfront.
// It allows us to preallocate enough storage to avoid reallocation, // It allows us to preallocate enough storage to avoid reallocation,
// which could cause invalid pointers. // which could cause invalid pointers.
let total_non_dynamic_entries = desc let mut total_non_dynamic_entries = 0_usize;
.bind_group_layouts let mut sampler_in_any_bind_group = false;
.iter() for bgl in desc.bind_group_layouts {
.flat_map(|bgl| { let mut sampler_in_bind_group = false;
bgl.entries.iter().map(|entry| match entry.ty {
for entry in &bgl.entries {
match entry.ty {
wgt::BindingType::Buffer { wgt::BindingType::Buffer {
has_dynamic_offset: true, has_dynamic_offset: true,
.. ..
} => 0, } => {}
_ => 1, wgt::BindingType::Sampler(_) => sampler_in_bind_group = true,
}) _ => total_non_dynamic_entries += 1,
}) }
.sum(); }
if sampler_in_bind_group {
// One for the sampler buffer
total_non_dynamic_entries += 1;
sampler_in_any_bind_group = true;
}
}
if sampler_in_any_bind_group {
// Two for the sampler arrays themselves
total_non_dynamic_entries += 2;
}
let mut ranges = Vec::with_capacity(total_non_dynamic_entries); let mut ranges = Vec::with_capacity(total_non_dynamic_entries);
let mut bind_group_infos = let mut bind_group_infos =
@ -926,10 +925,11 @@ impl crate::Device for super::Device {
let mut visibility_view_static = wgt::ShaderStages::empty(); let mut visibility_view_static = wgt::ShaderStages::empty();
let mut visibility_view_dynamic = wgt::ShaderStages::empty(); let mut visibility_view_dynamic = wgt::ShaderStages::empty();
let mut visibility_sampler = wgt::ShaderStages::empty();
for entry in bgl.entries.iter() { for entry in bgl.entries.iter() {
match entry.ty { match entry.ty {
wgt::BindingType::Sampler { .. } => visibility_sampler |= entry.visibility, wgt::BindingType::Sampler { .. } => {
visibility_view_static |= wgt::ShaderStages::all()
}
wgt::BindingType::Buffer { wgt::BindingType::Buffer {
has_dynamic_offset: true, has_dynamic_offset: true,
.. ..
@ -939,7 +939,7 @@ impl crate::Device for super::Device {
} }
// SRV/CBV/UAV descriptor tables // SRV/CBV/UAV descriptor tables
let mut range_base = ranges.len(); let range_base = ranges.len();
for entry in bgl.entries.iter() { for entry in bgl.entries.iter() {
let range_ty = match entry.ty { let range_ty = match entry.ty {
wgt::BindingType::Buffer { wgt::BindingType::Buffer {
@ -963,7 +963,7 @@ impl crate::Device for super::Device {
}, },
hlsl::BindTarget { hlsl::BindTarget {
binding_array_size: entry.count.map(NonZeroU32::get), binding_array_size: entry.count.map(NonZeroU32::get),
..bt.clone() ..*bt
}, },
); );
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE { ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
@ -976,6 +976,44 @@ impl crate::Device for super::Device {
}); });
bt.register += entry.count.map(NonZeroU32::get).unwrap_or(1); bt.register += entry.count.map(NonZeroU32::get).unwrap_or(1);
} }
let mut sampler_index_within_bind_group = 0;
for entry in bgl.entries.iter() {
if let wgt::BindingType::Sampler(_) = entry.ty {
binding_map.insert(
naga::ResourceBinding {
group: index as u32,
binding: entry.binding,
},
hlsl::BindTarget {
// Naga does not use the space field for samplers
space: 255,
register: sampler_index_within_bind_group,
binding_array_size: None,
},
);
sampler_index_within_bind_group += 1;
}
}
if sampler_index_within_bind_group != 0 {
sampler_buffer_binding_map.insert(
hlsl::SamplerIndexBufferKey {
group: index as u32,
},
bind_srv,
);
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
RangeType: Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
NumDescriptors: 1,
BaseShaderRegister: bind_srv.register,
RegisterSpace: bind_srv.space as u32,
OffsetInDescriptorsFromTableStart:
Direct3D12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
});
bind_srv.register += 1;
}
if ranges.len() > range_base { if ranges.len() > range_base {
let range = &ranges[range_base..]; let range = &ranges[range_base..];
parameters.push(Direct3D12::D3D12_ROOT_PARAMETER { parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
@ -991,50 +1029,6 @@ impl crate::Device for super::Device {
info.tables |= super::TableTypes::SRV_CBV_UAV; info.tables |= super::TableTypes::SRV_CBV_UAV;
} }
// Sampler descriptor tables
range_base = ranges.len();
for entry in bgl.entries.iter() {
let range_ty = match entry.ty {
wgt::BindingType::Sampler { .. } => {
Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER
}
_ => continue,
};
binding_map.insert(
naga::ResourceBinding {
group: index as u32,
binding: entry.binding,
},
hlsl::BindTarget {
binding_array_size: entry.count.map(NonZeroU32::get),
..bind_sampler.clone()
},
);
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
RangeType: range_ty,
NumDescriptors: entry.count.map_or(1, |count| count.get()),
BaseShaderRegister: bind_sampler.register,
RegisterSpace: bind_sampler.space as u32,
OffsetInDescriptorsFromTableStart:
Direct3D12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
});
bind_sampler.register += entry.count.map(NonZeroU32::get).unwrap_or(1);
}
if ranges.len() > range_base {
let range = &ranges[range_base..];
parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 {
DescriptorTable: Direct3D12::D3D12_ROOT_DESCRIPTOR_TABLE {
NumDescriptorRanges: range.len() as u32,
pDescriptorRanges: range.as_ptr(),
},
},
ShaderVisibility: conv::map_visibility(visibility_sampler),
});
info.tables |= super::TableTypes::SAMPLERS;
}
// Root (dynamic) descriptor tables // Root (dynamic) descriptor tables
let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic); let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic);
for entry in bgl.entries.iter() { for entry in bgl.entries.iter() {
@ -1072,7 +1066,7 @@ impl crate::Device for super::Device {
}, },
hlsl::BindTarget { hlsl::BindTarget {
binding_array_size: entry.count.map(NonZeroU32::get), binding_array_size: entry.count.map(NonZeroU32::get),
..bt.clone() ..*bt
}, },
); );
info.dynamic_buffers.push(kind); info.dynamic_buffers.push(kind);
@ -1094,6 +1088,62 @@ impl crate::Device for super::Device {
bind_group_infos.push(info); bind_group_infos.push(info);
} }
let sampler_heap_target = hlsl::SamplerHeapBindTargets {
standard_samplers: hlsl::BindTarget {
space: 0,
register: 0,
binding_array_size: None,
},
comparison_samplers: hlsl::BindTarget {
space: 0,
register: 2048,
binding_array_size: None,
},
};
let mut sampler_heap_root_index = None;
if sampler_in_any_bind_group {
// Sampler descriptor tables
//
// We bind two sampler ranges pointing to the same descriptor heap, using two different register ranges.
//
// We bind them as normal samplers in registers 0-2047 and comparison samplers in registers 2048-4095.
// Tier 2 hardware guarantees that the type of sampler only needs to match if the sampler is actually
// accessed in the shader. As such, we can bind the same array of samplers to both registers.
//
// We do this because HLSL does not allow you to alias registers at all.
let range_base = ranges.len();
// Standard samplers, registers 0-2047
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
RangeType: Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
NumDescriptors: 2048,
BaseShaderRegister: 0,
RegisterSpace: 0,
OffsetInDescriptorsFromTableStart: 0,
});
// Comparison samplers, registers 2048-4095
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
RangeType: Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
NumDescriptors: 2048,
BaseShaderRegister: 2048,
RegisterSpace: 0,
OffsetInDescriptorsFromTableStart: 0,
});
let range = &ranges[range_base..];
sampler_heap_root_index = Some(parameters.len() as super::RootIndex);
parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 {
DescriptorTable: Direct3D12::D3D12_ROOT_DESCRIPTOR_TABLE {
NumDescriptorRanges: range.len() as u32,
pDescriptorRanges: range.as_ptr(),
},
},
ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL,
});
}
// Ensure that we didn't reallocate! // Ensure that we didn't reallocate!
debug_assert_eq!(ranges.len(), total_non_dynamic_entries); debug_assert_eq!(ranges.len(), total_non_dynamic_entries);
@ -1113,7 +1163,7 @@ impl crate::Device for super::Device {
}, },
ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL, // really needed for VS and CS only, ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL, // really needed for VS and CS only,
}); });
let binding = bind_cbv.clone(); let binding = bind_cbv;
bind_cbv.register += 1; bind_cbv.register += 1;
(Some(parameter_index as u32), Some(binding)) (Some(parameter_index as u32), Some(binding))
} else { } else {
@ -1231,6 +1281,7 @@ impl crate::Device for super::Device {
total_root_elements: parameters.len() as super::RootIndex, total_root_elements: parameters.len() as super::RootIndex,
special_constants, special_constants,
root_constant_info, root_constant_info,
sampler_heap_root_index,
}, },
bind_group_infos, bind_group_infos,
naga_options: hlsl::Options { naga_options: hlsl::Options {
@ -1241,6 +1292,8 @@ impl crate::Device for super::Device {
push_constants_target, push_constants_target,
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
restrict_indexing: true, restrict_indexing: true,
sampler_heap_target,
sampler_buffer_binding_map,
}, },
}) })
} }
@ -1267,14 +1320,6 @@ impl crate::Device for super::Device {
if let Some(ref mut inner) = cpu_views { if let Some(ref mut inner) = cpu_views {
inner.stage.clear(); inner.stage.clear();
} }
let mut cpu_samplers = desc
.layout
.cpu_heap_samplers
.as_ref()
.map(|cpu_heap| cpu_heap.inner.lock());
if let Some(ref mut inner) = cpu_samplers {
inner.stage.clear();
}
let mut dynamic_buffers = Vec::new(); let mut dynamic_buffers = Vec::new();
let layout_and_entry_iter = desc.entries.iter().map(|entry| { let layout_and_entry_iter = desc.entries.iter().map(|entry| {
@ -1286,6 +1331,8 @@ impl crate::Device for super::Device {
.expect("internal error: no layout entry found with binding slot"); .expect("internal error: no layout entry found with binding slot");
(layout, entry) (layout, entry)
}); });
let mut sampler_indexes: Vec<super::sampler::SamplerIndex> = Vec::new();
for (layout, entry) in layout_and_entry_iter { for (layout, entry) in layout_and_entry_iter {
match layout.ty { match layout.ty {
wgt::BindingType::Buffer { wgt::BindingType::Buffer {
@ -1390,8 +1437,8 @@ impl crate::Device for super::Device {
wgt::BindingType::Sampler { .. } => { wgt::BindingType::Sampler { .. } => {
let start = entry.resource_index as usize; let start = entry.resource_index as usize;
let end = start + entry.count as usize; let end = start + entry.count as usize;
for data in &desc.samplers[start..end] { for &data in &desc.samplers[start..end] {
cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); sampler_indexes.push(data.index);
} }
} }
wgt::BindingType::AccelerationStructure => { wgt::BindingType::AccelerationStructure => {
@ -1424,6 +1471,92 @@ impl crate::Device for super::Device {
} }
} }
let sampler_index_buffer = if !sampler_indexes.is_empty() {
let buffer_size = (sampler_indexes.len() * size_of::<u32>()) as u64;
let label = if let Some(label) = desc.label {
Cow::Owned(format!("{} (Internal Sampler Index Buffer)", label))
} else {
Cow::Borrowed("Internal Sampler Index Buffer")
};
let buffer_desc = crate::BufferDescriptor {
label: None,
size: buffer_size,
usage: crate::BufferUses::STORAGE_READ_ONLY | crate::BufferUses::MAP_WRITE,
// D3D12 backend doesn't care about the memory flags
memory_flags: crate::MemoryFlags::empty(),
};
let raw_buffer_desc = Direct3D12::D3D12_RESOURCE_DESC {
Dimension: Direct3D12::D3D12_RESOURCE_DIMENSION_BUFFER,
Alignment: 0,
Width: buffer_size,
Height: 1,
DepthOrArraySize: 1,
MipLevels: 1,
Format: Dxgi::Common::DXGI_FORMAT_UNKNOWN,
SampleDesc: Dxgi::Common::DXGI_SAMPLE_DESC {
Count: 1,
Quality: 0,
},
Layout: Direct3D12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
Flags: Direct3D12::D3D12_RESOURCE_FLAG_NONE,
};
let (buffer, allocation) =
super::suballocation::create_buffer_resource(self, &buffer_desc, raw_buffer_desc)?;
unsafe { buffer.SetName(&windows::core::HSTRING::from(&*label)) }
.into_device_result("SetName")?;
let mut mapping = ptr::null_mut::<ffi::c_void>();
unsafe { buffer.Map(0, None, Some(&mut mapping)) }.into_device_result("Map")?;
assert!(!mapping.is_null());
assert_eq!(mapping as usize % 4, 0);
unsafe {
ptr::copy_nonoverlapping(
sampler_indexes.as_ptr(),
mapping.cast(),
sampler_indexes.len(),
)
};
// The unmapping is not needed, as all memory is coherent in d3d12, but lets be nice to our address space.
unsafe { buffer.Unmap(0, None) };
let srv_desc = Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC {
Format: Dxgi::Common::DXGI_FORMAT_UNKNOWN,
ViewDimension: Direct3D12::D3D12_SRV_DIMENSION_BUFFER,
Anonymous: Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC_0 {
Buffer: Direct3D12::D3D12_BUFFER_SRV {
FirstElement: 0,
NumElements: sampler_indexes.len() as u32,
StructureByteStride: 4,
Flags: Direct3D12::D3D12_BUFFER_SRV_FLAG_NONE,
},
},
Shader4ComponentMapping: Direct3D12::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
};
let inner = cpu_views.as_mut().unwrap();
let cpu_index = inner.stage.len() as u32;
let srv = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index);
unsafe {
self.raw
.CreateShaderResourceView(&buffer, Some(&srv_desc), srv)
};
cpu_views.as_mut().unwrap().stage.push(srv);
Some(super::SamplerIndexBuffer { buffer, allocation })
} else {
None
};
let handle_views = match cpu_views { let handle_views = match cpu_views {
Some(inner) => { Some(inner) => {
let dual = unsafe { let dual = unsafe {
@ -1438,26 +1571,12 @@ impl crate::Device for super::Device {
} }
None => None, None => None,
}; };
let handle_samplers = match cpu_samplers {
Some(inner) => {
let dual = unsafe {
descriptor::upload(
&self.raw,
&inner,
&self.shared.heap_samplers,
&desc.layout.copy_counts,
)
}?;
Some(dual)
}
None => None,
};
self.counters.bind_groups.add(1); self.counters.bind_groups.add(1);
Ok(super::BindGroup { Ok(super::BindGroup {
handle_views, handle_views,
handle_samplers, sampler_index_buffer,
dynamic_buffers, dynamic_buffers,
}) })
} }
@ -1466,8 +1585,14 @@ impl crate::Device for super::Device {
if let Some(dual) = group.handle_views { if let Some(dual) = group.handle_views {
self.shared.heap_views.free_slice(dual); self.shared.heap_views.free_slice(dual);
} }
if let Some(dual) = group.handle_samplers {
self.shared.heap_samplers.free_slice(dual); if let Some(sampler_buffer) = group.sampler_index_buffer {
// Make sure the buffer is dropped before the allocation
drop(sampler_buffer.buffer);
if let Some(allocation) = sampler_buffer.allocation {
super::suballocation::free_buffer_allocation(self, allocation, &self.mem_allocator);
}
} }
self.counters.bind_groups.sub(1); self.counters.bind_groups.sub(1);

View File

@ -13,13 +13,52 @@ and destination states match, and they are for storage sync.
For now, all resources are created with "committed" memory. For now, all resources are created with "committed" memory.
## Sampler Descriptor Management
At most one descriptor heap of each type can be bound at once. This
means that the descriptors from all bind groups need to be present
in the same heap, and they need to be contiguous within that heap.
This is not a problem for the SRV/CBV/UAV heap as it can be sized into
the millions of entries. However the sampler heap is limited to 2048 entries.
In order to work around this limitation, we refer to samplers indirectly by index.
The entire sampler heap is bound at once and a buffer containing all sampler indexes
for that bind group is bound. The shader then uses the index to look up the sampler
in the heap. To help visualize this, the generated HLSL looks like this:
```wgsl
@group(0) @binding(2) var myLinearSampler: sampler;
@group(1) @binding(1) var myAnisoSampler: sampler;
@group(1) @binding(4) var myCompSampler: sampler;
```
```cpp
// These bindings alias the same descriptors. Depending on the type, the shader will use the correct one.
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s2048, space1);
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space0);
StructuredBuffer<uint> nagaGroup1SamplerIndexArray : register(t1, space0);
// Indexes into group 0 index array
static const SamplerState myLinearSampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[0]];
// Indexes into group 1 index array
static const SamplerState myAnisoSampler = nagaSamplerHeap[nagaGroup1SamplerIndexArray[0]];
static const SamplerComparisonState myCompSampler = nagaComparisonSamplerHeap[nagaGroup1SamplerIndexArray[1]];
```
Without this transform we would need separate set of sampler descriptors for each unique combination of samplers
in a bind group. This results in a lot of duplication and makes it easy to hit the 2048 limit. With the transform
the limit is merely 2048 unique samplers in existence, which is much more reasonable.
## Resource binding ## Resource binding
See ['Device::create_pipeline_layout`] documentation for the structure See ['Device::create_pipeline_layout`] documentation for the structure
of the root signature corresponding to WebGPU pipeline layout. of the root signature corresponding to WebGPU pipeline layout.
Binding groups is mostly straightforward, with one big caveat: Binding groups is mostly straightforward, with one big caveat:
all bindings have to be reset whenever the pipeline layout changes. all bindings have to be reset whenever the root signature changes.
This is the rule of D3D12, and we can do nothing to help it. This is the rule of D3D12, and we can do nothing to help it.
We detect this change at both [`crate::CommandEncoder::set_bind_group`] We detect this change at both [`crate::CommandEncoder::set_bind_group`]
@ -39,6 +78,7 @@ mod conv;
mod descriptor; mod descriptor;
mod device; mod device;
mod instance; mod instance;
mod sampler;
mod shader_compilation; mod shader_compilation;
mod suballocation; mod suballocation;
mod types; mod types;
@ -518,6 +558,7 @@ struct PrivateCapabilities {
casting_fully_typed_format_supported: bool, casting_fully_typed_format_supported: bool,
suballocation_supported: bool, suballocation_supported: bool,
shader_model: naga::back::hlsl::ShaderModel, shader_model: naga::back::hlsl::ShaderModel,
max_sampler_descriptor_heap_size: u32,
} }
#[derive(Default)] #[derive(Default)]
@ -575,7 +616,7 @@ struct DeviceShared {
zero_buffer: Direct3D12::ID3D12Resource, zero_buffer: Direct3D12::ID3D12Resource,
cmd_signatures: CommandSignatures, cmd_signatures: CommandSignatures,
heap_views: descriptor::GeneralHeap, heap_views: descriptor::GeneralHeap,
heap_samplers: descriptor::GeneralHeap, sampler_heap: sampler::SamplerHeap,
} }
unsafe impl Send for DeviceShared {} unsafe impl Send for DeviceShared {}
@ -591,7 +632,6 @@ pub struct Device {
rtv_pool: Mutex<descriptor::CpuPool>, rtv_pool: Mutex<descriptor::CpuPool>,
dsv_pool: Mutex<descriptor::CpuPool>, dsv_pool: Mutex<descriptor::CpuPool>,
srv_uav_pool: Mutex<descriptor::CpuPool>, srv_uav_pool: Mutex<descriptor::CpuPool>,
sampler_pool: Mutex<descriptor::CpuPool>,
// library // library
library: Arc<D3D12Lib>, library: Arc<D3D12Lib>,
#[cfg(feature = "renderdoc")] #[cfg(feature = "renderdoc")]
@ -645,7 +685,7 @@ struct PassResolve {
format: Dxgi::Common::DXGI_FORMAT, format: Dxgi::Common::DXGI_FORMAT,
} }
#[derive(Clone, Copy)] #[derive(Clone, Copy, Debug)]
enum RootElement { enum RootElement {
Empty, Empty,
Constant, Constant,
@ -664,6 +704,8 @@ enum RootElement {
kind: BufferViewKind, kind: BufferViewKind,
address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE, address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE,
}, },
/// Descriptor table referring to the entire sampler heap.
SamplerHeap,
} }
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
@ -700,6 +742,7 @@ impl PassState {
total_root_elements: 0, total_root_elements: 0,
special_constants: None, special_constants: None,
root_constant_info: None, root_constant_info: None,
sampler_heap_root_index: None,
}, },
root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS], root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS],
constant_data: [0; MAX_ROOT_ELEMENTS], constant_data: [0; MAX_ROOT_ELEMENTS],
@ -853,7 +896,8 @@ unsafe impl Sync for TextureView {}
#[derive(Debug)] #[derive(Debug)]
pub struct Sampler { pub struct Sampler {
handle: descriptor::Handle, index: sampler::SamplerIndex,
desc: Direct3D12::D3D12_SAMPLER_DESC,
} }
impl crate::DynSampler for Sampler {} impl crate::DynSampler for Sampler {}
@ -893,7 +937,6 @@ pub struct BindGroupLayout {
/// Sorted list of entries. /// Sorted list of entries.
entries: Vec<wgt::BindGroupLayoutEntry>, entries: Vec<wgt::BindGroupLayoutEntry>,
cpu_heap_views: Option<descriptor::CpuHeap>, cpu_heap_views: Option<descriptor::CpuHeap>,
cpu_heap_samplers: Option<descriptor::CpuHeap>,
copy_counts: Vec<u32>, // all 1's copy_counts: Vec<u32>, // all 1's
} }
@ -906,10 +949,16 @@ enum BufferViewKind {
UnorderedAccess, UnorderedAccess,
} }
#[derive(Debug)]
struct SamplerIndexBuffer {
buffer: Direct3D12::ID3D12Resource,
allocation: Option<suballocation::AllocationWrapper>,
}
#[derive(Debug)] #[derive(Debug)]
pub struct BindGroup { pub struct BindGroup {
handle_views: Option<descriptor::DualHandle>, handle_views: Option<descriptor::DualHandle>,
handle_samplers: Option<descriptor::DualHandle>, sampler_index_buffer: Option<SamplerIndexBuffer>,
dynamic_buffers: Vec<Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE>, dynamic_buffers: Vec<Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE>,
} }
@ -945,6 +994,7 @@ struct PipelineLayoutShared {
total_root_elements: RootIndex, total_root_elements: RootIndex,
special_constants: Option<PipelineLayoutSpecialConstants>, special_constants: Option<PipelineLayoutSpecialConstants>,
root_constant_info: Option<RootConstantInfo>, root_constant_info: Option<RootConstantInfo>,
sampler_heap_root_index: Option<RootIndex>,
} }
unsafe impl Send for PipelineLayoutShared {} unsafe impl Send for PipelineLayoutShared {}

View File

@ -0,0 +1,251 @@
//! Sampler management for DX12.
//!
//! Nearly identical to the Vulkan sampler cache, with added descriptor heap management.
use std::collections::{hash_map::Entry, HashMap};
use ordered_float::OrderedFloat;
use parking_lot::Mutex;
use windows::Win32::Graphics::Direct3D12::*;
use crate::dx12::HResult;
/// The index of a sampler in the global sampler heap.
///
/// This is a type-safe, transparent wrapper around a u32.
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub(crate) struct SamplerIndex(u32);
/// [`D3D12_SAMPLER_DESC`] is not hashable, so we wrap it in a newtype that is.
///
/// We use [`OrderedFloat`] to allow for floating point values to be compared and
/// hashed in a defined way.
#[derive(Debug, Copy, Clone)]
struct HashableSamplerDesc(D3D12_SAMPLER_DESC);
impl PartialEq for HashableSamplerDesc {
fn eq(&self, other: &Self) -> bool {
self.0.Filter == other.0.Filter
&& self.0.AddressU == other.0.AddressU
&& self.0.AddressV == other.0.AddressV
&& self.0.AddressW == other.0.AddressW
&& OrderedFloat(self.0.MipLODBias) == OrderedFloat(other.0.MipLODBias)
&& self.0.MaxAnisotropy == other.0.MaxAnisotropy
&& self.0.ComparisonFunc == other.0.ComparisonFunc
&& self.0.BorderColor.map(OrderedFloat) == other.0.BorderColor.map(OrderedFloat)
&& OrderedFloat(self.0.MinLOD) == OrderedFloat(other.0.MinLOD)
&& OrderedFloat(self.0.MaxLOD) == OrderedFloat(other.0.MaxLOD)
}
}
impl Eq for HashableSamplerDesc {}
impl std::hash::Hash for HashableSamplerDesc {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.0.Filter.0.hash(state);
self.0.AddressU.0.hash(state);
self.0.AddressV.0.hash(state);
self.0.AddressW.0.hash(state);
OrderedFloat(self.0.MipLODBias).hash(state);
self.0.MaxAnisotropy.hash(state);
self.0.ComparisonFunc.0.hash(state);
self.0.BorderColor.map(OrderedFloat).hash(state);
OrderedFloat(self.0.MinLOD).hash(state);
OrderedFloat(self.0.MaxLOD).hash(state);
}
}
/// Entry in the sampler cache.
struct CacheEntry {
index: SamplerIndex,
ref_count: u32,
}
/// Container for the mutable management state of the sampler heap.
///
/// We have this separated, using interior mutability, to allow for the outside world
/// to access the heap directly without needing to take the lock.
pub(crate) struct SamplerHeapState {
/// Mapping from the sampler description to the index within the heap and the refcount.
mapping: HashMap<HashableSamplerDesc, CacheEntry>,
/// List of free sampler indices.
freelist: Vec<SamplerIndex>,
}
/// Global sampler heap for the device.
///
/// As D3D12 only allows 2048 samplers to be in a single heap, we need to cache
/// samplers aggressively and refer to them in shaders by index.
pub(crate) struct SamplerHeap {
/// Mutable management state of the sampler heap.
state: Mutex<SamplerHeapState>,
/// The heap itself.
heap: ID3D12DescriptorHeap,
/// The CPU-side handle to the first descriptor in the heap.
///
/// Both the CPU and GPU handles point to the same descriptor, just in
/// different contexts.
heap_cpu_start_handle: D3D12_CPU_DESCRIPTOR_HANDLE,
/// The GPU-side handle to the first descriptor in the heap.
///
/// Both the CPU and GPU handles point to the same descriptor, just in
/// different contexts.
heap_gpu_start_handle: D3D12_GPU_DESCRIPTOR_HANDLE,
/// This is the device-specific size of sampler descriptors.
descriptor_stride: u32,
}
impl SamplerHeap {
pub fn new(
device: &ID3D12Device,
private_caps: &super::PrivateCapabilities,
) -> Result<Self, crate::DeviceError> {
profiling::scope!("SamplerHeap::new");
// WARP can report this as 2M or more. We clamp it to 64k to be safe.
const SAMPLER_HEAP_SIZE_CLAMP: u32 = 64 * 1024;
let max_unique_samplers = private_caps
.max_sampler_descriptor_heap_size
.min(SAMPLER_HEAP_SIZE_CLAMP);
let desc = D3D12_DESCRIPTOR_HEAP_DESC {
Type: D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
NumDescriptors: max_unique_samplers,
Flags: D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
NodeMask: 0,
};
let heap = unsafe { device.CreateDescriptorHeap::<ID3D12DescriptorHeap>(&desc) }
.into_device_result("Failed to create global GPU-Visible Sampler Descriptor Heap")?;
let heap_cpu_start_handle = unsafe { heap.GetCPUDescriptorHandleForHeapStart() };
let heap_gpu_start_handle = unsafe { heap.GetGPUDescriptorHandleForHeapStart() };
let descriptor_stride =
unsafe { device.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) };
Ok(Self {
state: Mutex::new(SamplerHeapState {
mapping: HashMap::new(),
// Reverse so that samplers get allocated starting from zero.
freelist: (0..max_unique_samplers).map(SamplerIndex).rev().collect(),
}),
heap,
heap_cpu_start_handle,
heap_gpu_start_handle,
descriptor_stride,
})
}
/// Returns a reference to the raw descriptor heap.
pub fn heap(&self) -> &ID3D12DescriptorHeap {
&self.heap
}
/// Returns a reference the handle to be bound to the descriptor table.
pub fn gpu_descriptor_table(&self) -> D3D12_GPU_DESCRIPTOR_HANDLE {
self.heap_gpu_start_handle
}
/// Add a sampler with the given description to the heap.
///
/// If the sampler already exists, the refcount is incremented and the existing index is returned.
///
/// If the sampler does not exist, a new sampler is created and the index is returned.
///
/// If the heap is full, an error is returned.
pub fn create_sampler(
&self,
device: &ID3D12Device,
desc: D3D12_SAMPLER_DESC,
) -> Result<SamplerIndex, crate::DeviceError> {
profiling::scope!("SamplerHeap::create_sampler");
let hashable_desc = HashableSamplerDesc(desc);
// Eagarly dereference the lock to allow split borrows.
let state = &mut *self.state.lock();
// Lookup the sampler in the mapping.
match state.mapping.entry(hashable_desc) {
Entry::Occupied(occupied_entry) => {
// We have found a match, so increment the refcount and return the index.
let entry = occupied_entry.into_mut();
entry.ref_count += 1;
Ok(entry.index)
}
Entry::Vacant(vacant_entry) => {
// We need to create a new sampler.
// Try to get a new index from the freelist.
let Some(index) = state.freelist.pop() else {
// If the freelist is empty, we have hit the maximum number of samplers.
log::error!("There is no more room in the global sampler heap for more unique samplers. Your device supports a maximum of {} unique samplers.", state.mapping.len());
return Err(crate::DeviceError::OutOfMemory);
};
// Compute the CPU side handle for the new sampler.
let handle = D3D12_CPU_DESCRIPTOR_HANDLE {
ptr: self.heap_cpu_start_handle.ptr
+ self.descriptor_stride as usize * index.0 as usize,
};
unsafe {
device.CreateSampler(&desc, handle);
}
// Insert the new sampler into the mapping.
vacant_entry.insert(CacheEntry {
index,
ref_count: 1,
});
Ok(index)
}
}
}
/// Decrement the refcount of the sampler with the given description.
///
/// If the refcount reaches zero, the sampler is destroyed and the index is returned to the freelist.
///
/// The provided index is checked against the index of the sampler with the given description, ensuring
/// that there isn't a clerical error from the caller.
pub fn destroy_sampler(&self, desc: D3D12_SAMPLER_DESC, provided_index: SamplerIndex) {
profiling::scope!("SamplerHeap::destroy_sampler");
// Eagarly dereference the lock to allow split borrows.
let state = &mut *self.state.lock();
// Get the index of the sampler to destroy.
let Entry::Occupied(mut hash_map_entry) = state.mapping.entry(HashableSamplerDesc(desc))
else {
log::error!(
"Tried to destroy a sampler that doesn't exist. Sampler description: {:#?}",
desc
);
return;
};
let cache_entry = hash_map_entry.get_mut();
// Ensure that the provided index matches the index of the sampler to destroy.
assert_eq!(
cache_entry.index, provided_index,
"Mismatched sampler index, this is an implementation bug"
);
// Decrement the refcount of the sampler.
cache_entry.ref_count -= 1;
// If we are the last reference, remove the sampler from the mapping and return the index to the freelist.
//
// As samplers only exist as descriptors in the heap, there is nothing needed to be done to destroy the sampler.
if cache_entry.ref_count == 0 {
state.freelist.push(cache_entry.index);
hash_map_entry.remove();
}
}
}

View File

@ -244,7 +244,7 @@ impl super::Device {
} }
naga::AddressSpace::Uniform | naga::AddressSpace::Storage { .. } => { naga::AddressSpace::Uniform | naga::AddressSpace::Storage { .. } => {
let br = match var.binding { let br = match var.binding {
Some(ref br) => br.clone(), Some(br) => br,
None => continue, None => continue,
}; };
let storage_access_store = match var.space { let storage_access_store = match var.space {