mirror of
https://github.com/gfx-rs/wgpu.git
synced 2025-12-08 21:26:17 +00:00
Unconditionally Generate Bindless Samplers in DX12 (#6766)
This commit is contained in:
parent
2298cd2dd6
commit
436f716715
10
Cargo.lock
generated
10
Cargo.lock
generated
@ -159,6 +159,15 @@ version = "1.0.95"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
|
||||
|
||||
[[package]]
|
||||
name = "approx"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arbitrary"
|
||||
version = "1.4.1"
|
||||
@ -4187,6 +4196,7 @@ name = "wgpu-test"
|
||||
version = "24.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"approx",
|
||||
"arrayvec",
|
||||
"bitflags 2.8.0",
|
||||
"bytemuck",
|
||||
|
||||
@ -79,6 +79,7 @@ version = "24.0.0"
|
||||
anyhow = "1.0.95"
|
||||
argh = "0.1.13"
|
||||
arrayvec = "0.7"
|
||||
approx = "0.5"
|
||||
bincode = "1"
|
||||
bit-vec = "0.8"
|
||||
bitflags = "2.7"
|
||||
|
||||
@ -1190,6 +1190,83 @@ impl<W: Write> super::Writer<'_, W> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Writes out the sampler heap declarations if they haven't been written yet.
|
||||
pub(super) fn write_sampler_heaps(&mut self) -> BackendResult {
|
||||
if self.wrapped.sampler_heaps {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
writeln!(
|
||||
self.out,
|
||||
"SamplerState {}[2048]: register(s{}, space{});",
|
||||
super::writer::SAMPLER_HEAP_VAR,
|
||||
self.options.sampler_heap_target.standard_samplers.register,
|
||||
self.options.sampler_heap_target.standard_samplers.space
|
||||
)?;
|
||||
writeln!(
|
||||
self.out,
|
||||
"SamplerComparisonState {}[2048]: register(s{}, space{});",
|
||||
super::writer::COMPARISON_SAMPLER_HEAP_VAR,
|
||||
self.options
|
||||
.sampler_heap_target
|
||||
.comparison_samplers
|
||||
.register,
|
||||
self.options.sampler_heap_target.comparison_samplers.space
|
||||
)?;
|
||||
|
||||
self.wrapped.sampler_heaps = true;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Writes out the sampler index buffer declaration if it hasn't been written yet.
|
||||
pub(super) fn write_wrapped_sampler_buffer(
|
||||
&mut self,
|
||||
key: super::SamplerIndexBufferKey,
|
||||
) -> BackendResult {
|
||||
// The astute will notice that we do a double hash lookup, but we do this to avoid
|
||||
// holding a mutable reference to `self` while trying to call `write_sampler_heaps`.
|
||||
//
|
||||
// We only pay this double lookup cost when we actually need to write out the sampler
|
||||
// buffer, which should be not be common.
|
||||
|
||||
if self.wrapped.sampler_index_buffers.contains_key(&key) {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
self.write_sampler_heaps()?;
|
||||
|
||||
// Because the group number can be arbitrary, we use the namer to generate a unique name
|
||||
// instead of adding it to the reserved name list.
|
||||
let sampler_array_name = self
|
||||
.namer
|
||||
.call(&format!("nagaGroup{}SamplerIndexArray", key.group));
|
||||
|
||||
let bind_target = match self.options.sampler_buffer_binding_map.get(&key) {
|
||||
Some(&bind_target) => bind_target,
|
||||
None if self.options.fake_missing_bindings => super::BindTarget {
|
||||
space: u8::MAX,
|
||||
register: key.group,
|
||||
binding_array_size: None,
|
||||
},
|
||||
None => {
|
||||
unreachable!("Sampler buffer of group {key:?} not bound to a register");
|
||||
}
|
||||
};
|
||||
|
||||
writeln!(
|
||||
self.out,
|
||||
"StructuredBuffer<uint> {sampler_array_name} : register(t{}, space{});",
|
||||
bind_target.register, bind_target.space
|
||||
)?;
|
||||
|
||||
self.wrapped
|
||||
.sampler_index_buffers
|
||||
.insert(key, sampler_array_name);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) fn write_texture_coordinates(
|
||||
&mut self,
|
||||
kind: &str,
|
||||
|
||||
@ -820,6 +820,8 @@ pub const RESERVED: &[&str] = &[
|
||||
super::writer::FREXP_FUNCTION,
|
||||
super::writer::EXTRACT_BITS_FUNCTION,
|
||||
super::writer::INSERT_BITS_FUNCTION,
|
||||
super::writer::SAMPLER_HEAP_VAR,
|
||||
super::writer::COMPARISON_SAMPLER_HEAP_VAR,
|
||||
];
|
||||
|
||||
// DXC scalar types, from https://github.com/microsoft/DirectXShaderCompiler/blob/18c9e114f9c314f93e68fbc72ce207d4ed2e65ae/tools/clang/lib/AST/ASTContextHLSL.cpp#L48-L254
|
||||
|
||||
@ -92,6 +92,15 @@ float3x2 GetMatmOnBaz(Baz obj) {
|
||||
We also emit an analogous `Set` function, as well as functions for
|
||||
accessing individual columns by dynamic index.
|
||||
|
||||
## Sampler Handling
|
||||
|
||||
Due to limitations in how sampler heaps work in D3D12, we need to access samplers
|
||||
through a layer of indirection. Instead of directly binding samplers, we bind the entire
|
||||
sampler heap as both a standard and a comparison sampler heap. We then use a sampler
|
||||
index buffer for each bind group. This buffer is accessed in the shader to get the actual
|
||||
sampler index within the heap. See the wgpu_hal dx12 backend documentation for more
|
||||
information.
|
||||
|
||||
[hlsl]: https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl
|
||||
[ilov]: https://gpuweb.github.io/gpuweb/wgsl/#internal-value-layout
|
||||
[16bb]: https://github.com/microsoft/DirectXShaderCompiler/wiki/Buffer-Packing#constant-buffer-packing
|
||||
@ -110,11 +119,14 @@ use thiserror::Error;
|
||||
|
||||
use crate::{back, proc};
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
|
||||
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)]
|
||||
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
|
||||
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
|
||||
pub struct BindTarget {
|
||||
pub space: u8,
|
||||
/// For regular bindings this is the register number.
|
||||
///
|
||||
/// For sampler bindings, this is the index to use into the bind group's sampler index buffer.
|
||||
pub register: u32,
|
||||
/// If the binding is an unsized binding array, this overrides the size.
|
||||
pub binding_array_size: Option<u32>,
|
||||
@ -179,6 +191,43 @@ impl crate::ImageDimension {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)]
|
||||
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
|
||||
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
|
||||
pub struct SamplerIndexBufferKey {
|
||||
pub group: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
|
||||
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
|
||||
#[cfg_attr(feature = "deserialize", serde(default))]
|
||||
pub struct SamplerHeapBindTargets {
|
||||
pub standard_samplers: BindTarget,
|
||||
pub comparison_samplers: BindTarget,
|
||||
}
|
||||
|
||||
impl Default for SamplerHeapBindTargets {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
standard_samplers: BindTarget {
|
||||
space: 0,
|
||||
register: 0,
|
||||
binding_array_size: None,
|
||||
},
|
||||
comparison_samplers: BindTarget {
|
||||
space: 1,
|
||||
register: 0,
|
||||
binding_array_size: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We use a BTreeMap here so that we can hash it.
|
||||
pub type SamplerIndexBufferBindingMap =
|
||||
std::collections::BTreeMap<SamplerIndexBufferKey, BindTarget>;
|
||||
|
||||
/// Shorthand result used internally by the backend
|
||||
type BackendResult = Result<(), Error>;
|
||||
|
||||
@ -207,6 +256,10 @@ pub struct Options {
|
||||
pub special_constants_binding: Option<BindTarget>,
|
||||
/// Bind target of the push constant buffer
|
||||
pub push_constants_target: Option<BindTarget>,
|
||||
/// Bind target of the sampler heap and comparison sampler heap.
|
||||
pub sampler_heap_target: SamplerHeapBindTargets,
|
||||
/// Mapping of each bind group's sampler index buffer to a bind target.
|
||||
pub sampler_buffer_binding_map: SamplerIndexBufferBindingMap,
|
||||
/// Should workgroup variables be zero initialized (by polyfilling)?
|
||||
pub zero_initialize_workgroup_memory: bool,
|
||||
/// Should we restrict indexing of vectors, matrices and arrays?
|
||||
@ -220,6 +273,8 @@ impl Default for Options {
|
||||
binding_map: BindingMap::default(),
|
||||
fake_missing_bindings: true,
|
||||
special_constants_binding: None,
|
||||
sampler_heap_target: SamplerHeapBindTargets::default(),
|
||||
sampler_buffer_binding_map: std::collections::BTreeMap::default(),
|
||||
push_constants_target: None,
|
||||
zero_initialize_workgroup_memory: true,
|
||||
restrict_indexing: true,
|
||||
@ -233,13 +288,13 @@ impl Options {
|
||||
res_binding: &crate::ResourceBinding,
|
||||
) -> Result<BindTarget, EntryPointError> {
|
||||
match self.binding_map.get(res_binding) {
|
||||
Some(target) => Ok(target.clone()),
|
||||
Some(target) => Ok(*target),
|
||||
None if self.fake_missing_bindings => Ok(BindTarget {
|
||||
space: res_binding.group as u8,
|
||||
register: res_binding.binding,
|
||||
binding_array_size: None,
|
||||
}),
|
||||
None => Err(EntryPointError::MissingBinding(res_binding.clone())),
|
||||
None => Err(EntryPointError::MissingBinding(*res_binding)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -279,6 +334,10 @@ struct Wrapped {
|
||||
struct_matrix_access: crate::FastHashSet<help::WrappedStructMatrixAccess>,
|
||||
mat_cx2s: crate::FastHashSet<help::WrappedMatCx2>,
|
||||
math: crate::FastHashSet<help::WrappedMath>,
|
||||
/// If true, the sampler heaps have been written out.
|
||||
sampler_heaps: bool,
|
||||
// Mapping from SamplerIndexBufferKey to the name the namer returned.
|
||||
sampler_index_buffers: crate::FastHashMap<SamplerIndexBufferKey, String>,
|
||||
}
|
||||
|
||||
impl Wrapped {
|
||||
|
||||
@ -24,6 +24,8 @@ pub(crate) const MODF_FUNCTION: &str = "naga_modf";
|
||||
pub(crate) const FREXP_FUNCTION: &str = "naga_frexp";
|
||||
pub(crate) const EXTRACT_BITS_FUNCTION: &str = "naga_extractBits";
|
||||
pub(crate) const INSERT_BITS_FUNCTION: &str = "naga_insertBits";
|
||||
pub(crate) const SAMPLER_HEAP_VAR: &str = "nagaSamplerHeap";
|
||||
pub(crate) const COMPARISON_SAMPLER_HEAP_VAR: &str = "nagaComparisonSamplerHeap";
|
||||
|
||||
struct EpStructMember {
|
||||
name: String,
|
||||
@ -94,6 +96,16 @@ const fn is_subgroup_builtin_binding(binding: &Option<crate::Binding>) -> bool {
|
||||
)
|
||||
}
|
||||
|
||||
/// Information for how to generate a `binding_array<sampler>` access.
|
||||
struct BindingArraySamplerInfo {
|
||||
/// Variable name of the sampler heap
|
||||
sampler_heap_name: &'static str,
|
||||
/// Variable name of the sampler index buffer
|
||||
sampler_index_buffer_name: String,
|
||||
/// Variable name of the base index _into_ the sampler index buffer
|
||||
binding_array_base_index_name: String,
|
||||
}
|
||||
|
||||
impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
pub fn new(out: W, options: &'a Options) -> Self {
|
||||
Self {
|
||||
@ -143,11 +155,11 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
) {
|
||||
use crate::Expression;
|
||||
self.need_bake_expressions.clear();
|
||||
for (fun_handle, expr) in func.expressions.iter() {
|
||||
let expr_info = &info[fun_handle];
|
||||
let min_ref_count = func.expressions[fun_handle].bake_ref_count();
|
||||
for (exp_handle, expr) in func.expressions.iter() {
|
||||
let expr_info = &info[exp_handle];
|
||||
let min_ref_count = func.expressions[exp_handle].bake_ref_count();
|
||||
if min_ref_count <= expr_info.ref_count {
|
||||
self.need_bake_expressions.insert(fun_handle);
|
||||
self.need_bake_expressions.insert(exp_handle);
|
||||
}
|
||||
|
||||
if let Expression::Math { fun, arg, .. } = *expr {
|
||||
@ -172,7 +184,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
self.need_bake_expressions.insert(arg);
|
||||
}
|
||||
crate::MathFunction::CountLeadingZeros => {
|
||||
let inner = info[fun_handle].ty.inner_with(&module.types);
|
||||
let inner = info[exp_handle].ty.inner_with(&module.types);
|
||||
if let Some(ScalarKind::Sint) = inner.scalar_kind() {
|
||||
self.need_bake_expressions.insert(arg);
|
||||
}
|
||||
@ -187,6 +199,14 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
self.need_bake_expressions.insert(expr);
|
||||
}
|
||||
}
|
||||
|
||||
if let Expression::GlobalVariable(_) = *expr {
|
||||
let inner = info[exp_handle].ty.inner_with(&module.types);
|
||||
|
||||
if let TypeInner::Sampler { .. } = *inner {
|
||||
self.need_bake_expressions.insert(exp_handle);
|
||||
}
|
||||
}
|
||||
}
|
||||
for statement in func.body.iter() {
|
||||
match *statement {
|
||||
@ -814,6 +834,18 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
}
|
||||
}
|
||||
|
||||
let handle_ty = match *inner {
|
||||
TypeInner::BindingArray { ref base, .. } => &module.types[*base].inner,
|
||||
_ => inner,
|
||||
};
|
||||
|
||||
// Samplers are handled entirely differently, so defer entirely to that method.
|
||||
let is_sampler = matches!(*handle_ty, TypeInner::Sampler { .. });
|
||||
|
||||
if is_sampler {
|
||||
return self.write_global_sampler(module, handle, global);
|
||||
}
|
||||
|
||||
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-variable-register
|
||||
let register_ty = match global.space {
|
||||
crate::AddressSpace::Function => unreachable!("Function address space"),
|
||||
@ -843,13 +875,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
register
|
||||
}
|
||||
crate::AddressSpace::Handle => {
|
||||
let handle_ty = match *inner {
|
||||
TypeInner::BindingArray { ref base, .. } => &module.types[*base].inner,
|
||||
_ => inner,
|
||||
};
|
||||
|
||||
let register = match *handle_ty {
|
||||
TypeInner::Sampler { .. } => "s",
|
||||
// all storage textures are UAV, unconditionally
|
||||
TypeInner::Image {
|
||||
class: crate::ImageClass::Storage { .. },
|
||||
@ -956,6 +982,66 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_global_sampler(
|
||||
&mut self,
|
||||
module: &Module,
|
||||
handle: Handle<crate::GlobalVariable>,
|
||||
global: &crate::GlobalVariable,
|
||||
) -> BackendResult {
|
||||
let binding = *global.binding.as_ref().unwrap();
|
||||
|
||||
let key = super::SamplerIndexBufferKey {
|
||||
group: binding.group,
|
||||
};
|
||||
self.write_wrapped_sampler_buffer(key)?;
|
||||
|
||||
// This was already validated, so we can confidently unwrap it.
|
||||
let bt = self.options.resolve_resource_binding(&binding).unwrap();
|
||||
|
||||
match module.types[global.ty].inner {
|
||||
TypeInner::Sampler { comparison } => {
|
||||
// If we are generating a static access, we create a variable for the sampler.
|
||||
//
|
||||
// This prevents the DXIL from containing multiple lookups for the sampler, which
|
||||
// the backend compiler will then have to eliminate. AMD does seem to be able to
|
||||
// eliminate these, but better safe than sorry.
|
||||
|
||||
write!(self.out, "static const ")?;
|
||||
self.write_type(module, global.ty)?;
|
||||
|
||||
let heap_var = if comparison {
|
||||
COMPARISON_SAMPLER_HEAP_VAR
|
||||
} else {
|
||||
SAMPLER_HEAP_VAR
|
||||
};
|
||||
|
||||
let index_buffer_name = &self.wrapped.sampler_index_buffers[&key];
|
||||
let name = &self.names[&NameKey::GlobalVariable(handle)];
|
||||
writeln!(
|
||||
self.out,
|
||||
" {name} = {heap_var}[{index_buffer_name}[{register}]];",
|
||||
register = bt.register
|
||||
)?;
|
||||
}
|
||||
TypeInner::BindingArray { .. } => {
|
||||
// If we are generating a binding array, we cannot directly access the sampler as the index
|
||||
// into the sampler index buffer is unknown at compile time. Instead we generate a constant
|
||||
// that represents the "base" index into the sampler index buffer. This constant is added
|
||||
// to the user provided index to get the final index into the sampler index buffer.
|
||||
|
||||
let name = &self.names[&NameKey::GlobalVariable(handle)];
|
||||
writeln!(
|
||||
self.out,
|
||||
"static const uint {name} = {register};",
|
||||
register = bt.register
|
||||
)?;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Helper method used to write global constants
|
||||
///
|
||||
/// # Notes
|
||||
@ -2670,7 +2756,16 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
};
|
||||
|
||||
self.write_expr(module, base, func_ctx)?;
|
||||
|
||||
let array_sampler_info = self.sampler_binding_array_info_from_expression(
|
||||
module, func_ctx, base, resolved,
|
||||
);
|
||||
|
||||
if let Some(ref info) = array_sampler_info {
|
||||
write!(self.out, "{}[", info.sampler_heap_name)?;
|
||||
} else {
|
||||
write!(self.out, "[")?;
|
||||
}
|
||||
|
||||
let needs_bound_check = self.options.restrict_indexing
|
||||
&& !indexing_binding_array
|
||||
@ -2715,7 +2810,17 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
if non_uniform_qualifier {
|
||||
write!(self.out, "NonUniformResourceIndex(")?;
|
||||
}
|
||||
if let Some(ref info) = array_sampler_info {
|
||||
write!(
|
||||
self.out,
|
||||
"{}[{} + ",
|
||||
info.sampler_index_buffer_name, info.binding_array_base_index_name,
|
||||
)?;
|
||||
}
|
||||
self.write_expr(module, index, func_ctx)?;
|
||||
if array_sampler_info.is_some() {
|
||||
write!(self.out, "]")?;
|
||||
}
|
||||
if non_uniform_qualifier {
|
||||
write!(self.out, ")")?;
|
||||
}
|
||||
@ -2730,43 +2835,6 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
{
|
||||
// do nothing, the chain is written on `Load`/`Store`
|
||||
} else {
|
||||
fn write_access<W: fmt::Write>(
|
||||
writer: &mut super::Writer<'_, W>,
|
||||
resolved: &TypeInner,
|
||||
base_ty_handle: Option<Handle<crate::Type>>,
|
||||
index: u32,
|
||||
) -> BackendResult {
|
||||
match *resolved {
|
||||
// We specifically lift the ValuePointer to this case. While `[0]` is valid
|
||||
// HLSL for any vector behind a value pointer, FXC completely miscompiles
|
||||
// it and generates completely nonsensical DXBC.
|
||||
//
|
||||
// See https://github.com/gfx-rs/naga/issues/2095 for more details.
|
||||
TypeInner::Vector { .. } | TypeInner::ValuePointer { .. } => {
|
||||
// Write vector access as a swizzle
|
||||
write!(writer.out, ".{}", back::COMPONENTS[index as usize])?
|
||||
}
|
||||
TypeInner::Matrix { .. }
|
||||
| TypeInner::Array { .. }
|
||||
| TypeInner::BindingArray { .. } => write!(writer.out, "[{index}]")?,
|
||||
TypeInner::Struct { .. } => {
|
||||
// This will never panic in case the type is a `Struct`, this is not true
|
||||
// for other types so we can only check while inside this match arm
|
||||
let ty = base_ty_handle.unwrap();
|
||||
|
||||
write!(
|
||||
writer.out,
|
||||
".{}",
|
||||
&writer.names[&NameKey::StructMember(ty, index)]
|
||||
)?
|
||||
}
|
||||
ref other => {
|
||||
return Err(Error::Custom(format!("Cannot index {other:?}")))
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// We write the matrix column access in a special way since
|
||||
// the type of `base` is our special __matCx2 struct.
|
||||
if let Some(MatrixType {
|
||||
@ -2816,8 +2884,60 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
}
|
||||
}
|
||||
|
||||
let array_sampler_info = self.sampler_binding_array_info_from_expression(
|
||||
module, func_ctx, base, resolved,
|
||||
);
|
||||
|
||||
if let Some(ref info) = array_sampler_info {
|
||||
write!(
|
||||
self.out,
|
||||
"{}[{}",
|
||||
info.sampler_heap_name, info.sampler_index_buffer_name
|
||||
)?;
|
||||
}
|
||||
|
||||
self.write_expr(module, base, func_ctx)?;
|
||||
write_access(self, resolved, base_ty_handle, index)?;
|
||||
|
||||
match *resolved {
|
||||
// We specifically lift the ValuePointer to this case. While `[0]` is valid
|
||||
// HLSL for any vector behind a value pointer, FXC completely miscompiles
|
||||
// it and generates completely nonsensical DXBC.
|
||||
//
|
||||
// See https://github.com/gfx-rs/naga/issues/2095 for more details.
|
||||
TypeInner::Vector { .. } | TypeInner::ValuePointer { .. } => {
|
||||
// Write vector access as a swizzle
|
||||
write!(self.out, ".{}", back::COMPONENTS[index as usize])?
|
||||
}
|
||||
TypeInner::Matrix { .. }
|
||||
| TypeInner::Array { .. }
|
||||
| TypeInner::BindingArray { .. } => {
|
||||
if let Some(ref info) = array_sampler_info {
|
||||
write!(
|
||||
self.out,
|
||||
"[{} + {index}]",
|
||||
info.binding_array_base_index_name
|
||||
)?;
|
||||
} else {
|
||||
write!(self.out, "[{index}]")?;
|
||||
}
|
||||
}
|
||||
TypeInner::Struct { .. } => {
|
||||
// This will never panic in case the type is a `Struct`, this is not true
|
||||
// for other types so we can only check while inside this match arm
|
||||
let ty = base_ty_handle.unwrap();
|
||||
|
||||
write!(
|
||||
self.out,
|
||||
".{}",
|
||||
&self.names[&NameKey::StructMember(ty, index)]
|
||||
)?
|
||||
}
|
||||
ref other => return Err(Error::Custom(format!("Cannot index {other:?}"))),
|
||||
}
|
||||
|
||||
if array_sampler_info.is_some() {
|
||||
write!(self.out, "]")?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Expression::FunctionArgument(pos) => {
|
||||
@ -2958,13 +3078,30 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
write!(self.out, ".x")?;
|
||||
}
|
||||
}
|
||||
Expression::GlobalVariable(handle) => match module.global_variables[handle].space {
|
||||
crate::AddressSpace::Storage { .. } => {}
|
||||
_ => {
|
||||
Expression::GlobalVariable(handle) => {
|
||||
let global_variable = &module.global_variables[handle];
|
||||
let ty = &module.types[global_variable.ty].inner;
|
||||
|
||||
// In the case of binding arrays of samplers, we need to not write anything
|
||||
// as the we are in the wrong position to fully write the expression.
|
||||
//
|
||||
// The entire writing is done by AccessIndex.
|
||||
let is_binding_array_of_samplers = match *ty {
|
||||
TypeInner::BindingArray { base, .. } => {
|
||||
let base_ty = &module.types[base].inner;
|
||||
matches!(*base_ty, TypeInner::Sampler { .. })
|
||||
}
|
||||
_ => false,
|
||||
};
|
||||
|
||||
let is_storage_space =
|
||||
matches!(global_variable.space, crate::AddressSpace::Storage { .. });
|
||||
|
||||
if !is_binding_array_of_samplers && !is_storage_space {
|
||||
let name = &self.names[&NameKey::GlobalVariable(handle)];
|
||||
write!(self.out, "{name}")?;
|
||||
}
|
||||
},
|
||||
}
|
||||
Expression::LocalVariable(handle) => {
|
||||
write!(self.out, "{}", self.names[&func_ctx.name_key(handle)])?
|
||||
}
|
||||
@ -3680,6 +3817,52 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Find the [`BindingArraySamplerInfo`] from an expression so that such an access
|
||||
/// can be generated later.
|
||||
fn sampler_binding_array_info_from_expression(
|
||||
&mut self,
|
||||
module: &Module,
|
||||
func_ctx: &back::FunctionCtx<'_>,
|
||||
base: Handle<crate::Expression>,
|
||||
resolved: &TypeInner,
|
||||
) -> Option<BindingArraySamplerInfo> {
|
||||
if let TypeInner::BindingArray {
|
||||
base: base_ty_handle,
|
||||
..
|
||||
} = *resolved
|
||||
{
|
||||
let base_ty = &module.types[base_ty_handle].inner;
|
||||
if let TypeInner::Sampler { comparison, .. } = *base_ty {
|
||||
let base = &func_ctx.expressions[base];
|
||||
|
||||
if let crate::Expression::GlobalVariable(handle) = *base {
|
||||
let variable = &module.global_variables[handle];
|
||||
|
||||
let sampler_heap_name = match comparison {
|
||||
true => COMPARISON_SAMPLER_HEAP_VAR,
|
||||
false => SAMPLER_HEAP_VAR,
|
||||
};
|
||||
|
||||
return Some(BindingArraySamplerInfo {
|
||||
sampler_heap_name,
|
||||
sampler_index_buffer_name: self
|
||||
.wrapped
|
||||
.sampler_index_buffers
|
||||
.get(&super::SamplerIndexBufferKey {
|
||||
group: variable.binding.unwrap().group,
|
||||
})
|
||||
.unwrap()
|
||||
.clone(),
|
||||
binding_array_base_index_name: self.names[&NameKey::GlobalVariable(handle)]
|
||||
.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn write_named_expr(
|
||||
&mut self,
|
||||
module: &Module,
|
||||
|
||||
@ -498,7 +498,7 @@ impl Options {
|
||||
index: 0,
|
||||
interpolation: None,
|
||||
}),
|
||||
None => Err(EntryPointError::MissingBindTarget(res_binding.clone())),
|
||||
None => Err(EntryPointError::MissingBindTarget(*res_binding)),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -5323,8 +5323,7 @@ template <typename A>
|
||||
None => false,
|
||||
};
|
||||
if !good {
|
||||
ep_error =
|
||||
Some(super::EntryPointError::MissingBindTarget(br.clone()));
|
||||
ep_error = Some(super::EntryPointError::MissingBindTarget(*br));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -966,7 +966,7 @@ pub enum Binding {
|
||||
}
|
||||
|
||||
/// Pipeline binding information for global resources.
|
||||
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||
#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||
#[cfg_attr(feature = "serialize", derive(Serialize))]
|
||||
#[cfg_attr(feature = "deserialize", derive(Deserialize))]
|
||||
#[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
|
||||
|
||||
@ -778,7 +778,7 @@ impl super::Validator {
|
||||
}
|
||||
|
||||
if let Some(ref bind) = var.binding {
|
||||
if !self.ep_resource_bindings.insert(bind.clone()) {
|
||||
if !self.ep_resource_bindings.insert(*bind) {
|
||||
if self.flags.contains(super::ValidationFlags::BINDINGS) {
|
||||
return Err(EntryPointError::BindingCollision(var_handle)
|
||||
.with_span_handle(var_handle, &module.global_variables));
|
||||
|
||||
@ -57,6 +57,9 @@
|
||||
(group: 0, binding: 2): (space: 1, register: 0),
|
||||
},
|
||||
fake_missing_bindings: false,
|
||||
sampler_buffer_binding_map: {
|
||||
(group: 0): (space: 2, register: 0),
|
||||
},
|
||||
special_constants_binding: Some((space: 0, register: 1)),
|
||||
zero_initialize_workgroup_memory: true,
|
||||
restrict_indexing: true
|
||||
|
||||
@ -12,8 +12,11 @@ Texture2DArray<float4> texture_array_2darray[5] : register(t0, space2);
|
||||
Texture2DMS<float4> texture_array_multisampled[5] : register(t0, space3);
|
||||
Texture2D<float> texture_array_depth[5] : register(t0, space4);
|
||||
RWTexture2D<float4> texture_array_storage[5] : register(u0, space5);
|
||||
SamplerState samp[5] : register(s0, space6);
|
||||
SamplerComparisonState samp_comp[5] : register(s0, space7);
|
||||
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
|
||||
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space255);
|
||||
static const uint samp = 0;
|
||||
static const uint samp_comp = 0;
|
||||
cbuffer uni : register(b0, space8) { UniformIndex uni; }
|
||||
|
||||
struct FragmentInput_main {
|
||||
@ -66,22 +69,22 @@ float4 main(FragmentInput_main fragmentinput_main) : SV_Target0
|
||||
u2_ = (_e27 + NagaDimensions2D(texture_array_unbounded[uniform_index]));
|
||||
uint2 _e32 = u2_;
|
||||
u2_ = (_e32 + NagaDimensions2D(texture_array_unbounded[NonUniformResourceIndex(non_uniform_index)]));
|
||||
float4 _e38 = texture_array_bounded[0].Gather(samp[0], uv);
|
||||
float4 _e38 = texture_array_bounded[0].Gather(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv);
|
||||
float4 _e39 = v4_;
|
||||
v4_ = (_e39 + _e38);
|
||||
float4 _e45 = texture_array_bounded[uniform_index].Gather(samp[uniform_index], uv);
|
||||
float4 _e45 = texture_array_bounded[uniform_index].Gather(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv);
|
||||
float4 _e46 = v4_;
|
||||
v4_ = (_e46 + _e45);
|
||||
float4 _e52 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Gather(samp[NonUniformResourceIndex(non_uniform_index)], uv);
|
||||
float4 _e52 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Gather(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv);
|
||||
float4 _e53 = v4_;
|
||||
v4_ = (_e53 + _e52);
|
||||
float4 _e60 = texture_array_depth[0].GatherCmp(samp_comp[0], uv, 0.0);
|
||||
float4 _e60 = texture_array_depth[0].GatherCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + 0]], uv, 0.0);
|
||||
float4 _e61 = v4_;
|
||||
v4_ = (_e61 + _e60);
|
||||
float4 _e68 = texture_array_depth[uniform_index].GatherCmp(samp_comp[uniform_index], uv, 0.0);
|
||||
float4 _e68 = texture_array_depth[uniform_index].GatherCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + uniform_index]], uv, 0.0);
|
||||
float4 _e69 = v4_;
|
||||
v4_ = (_e69 + _e68);
|
||||
float4 _e76 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].GatherCmp(samp_comp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0);
|
||||
float4 _e76 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].GatherCmp(nagaComparisonSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp_comp + non_uniform_index])], uv, 0.0);
|
||||
float4 _e77 = v4_;
|
||||
v4_ = (_e77 + _e76);
|
||||
float4 _e82 = texture_array_unbounded[0].Load(int3(pix, 0));
|
||||
@ -111,58 +114,58 @@ float4 main(FragmentInput_main fragmentinput_main) : SV_Target0
|
||||
u1_ = (_e135 + NagaMSNumSamples2D(texture_array_multisampled[uniform_index]));
|
||||
uint _e140 = u1_;
|
||||
u1_ = (_e140 + NagaMSNumSamples2D(texture_array_multisampled[NonUniformResourceIndex(non_uniform_index)]));
|
||||
float4 _e146 = texture_array_bounded[0].Sample(samp[0], uv);
|
||||
float4 _e146 = texture_array_bounded[0].Sample(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv);
|
||||
float4 _e147 = v4_;
|
||||
v4_ = (_e147 + _e146);
|
||||
float4 _e153 = texture_array_bounded[uniform_index].Sample(samp[uniform_index], uv);
|
||||
float4 _e153 = texture_array_bounded[uniform_index].Sample(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv);
|
||||
float4 _e154 = v4_;
|
||||
v4_ = (_e154 + _e153);
|
||||
float4 _e160 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Sample(samp[NonUniformResourceIndex(non_uniform_index)], uv);
|
||||
float4 _e160 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Sample(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv);
|
||||
float4 _e161 = v4_;
|
||||
v4_ = (_e161 + _e160);
|
||||
float4 _e168 = texture_array_bounded[0].SampleBias(samp[0], uv, 0.0);
|
||||
float4 _e168 = texture_array_bounded[0].SampleBias(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv, 0.0);
|
||||
float4 _e169 = v4_;
|
||||
v4_ = (_e169 + _e168);
|
||||
float4 _e176 = texture_array_bounded[uniform_index].SampleBias(samp[uniform_index], uv, 0.0);
|
||||
float4 _e176 = texture_array_bounded[uniform_index].SampleBias(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv, 0.0);
|
||||
float4 _e177 = v4_;
|
||||
v4_ = (_e177 + _e176);
|
||||
float4 _e184 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleBias(samp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0);
|
||||
float4 _e184 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleBias(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv, 0.0);
|
||||
float4 _e185 = v4_;
|
||||
v4_ = (_e185 + _e184);
|
||||
float _e192 = texture_array_depth[0].SampleCmp(samp_comp[0], uv, 0.0);
|
||||
float _e192 = texture_array_depth[0].SampleCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + 0]], uv, 0.0);
|
||||
float _e193 = v1_;
|
||||
v1_ = (_e193 + _e192);
|
||||
float _e200 = texture_array_depth[uniform_index].SampleCmp(samp_comp[uniform_index], uv, 0.0);
|
||||
float _e200 = texture_array_depth[uniform_index].SampleCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + uniform_index]], uv, 0.0);
|
||||
float _e201 = v1_;
|
||||
v1_ = (_e201 + _e200);
|
||||
float _e208 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmp(samp_comp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0);
|
||||
float _e208 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmp(nagaComparisonSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp_comp + non_uniform_index])], uv, 0.0);
|
||||
float _e209 = v1_;
|
||||
v1_ = (_e209 + _e208);
|
||||
float _e216 = texture_array_depth[0].SampleCmpLevelZero(samp_comp[0], uv, 0.0);
|
||||
float _e216 = texture_array_depth[0].SampleCmpLevelZero(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + 0]], uv, 0.0);
|
||||
float _e217 = v1_;
|
||||
v1_ = (_e217 + _e216);
|
||||
float _e224 = texture_array_depth[uniform_index].SampleCmpLevelZero(samp_comp[uniform_index], uv, 0.0);
|
||||
float _e224 = texture_array_depth[uniform_index].SampleCmpLevelZero(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + uniform_index]], uv, 0.0);
|
||||
float _e225 = v1_;
|
||||
v1_ = (_e225 + _e224);
|
||||
float _e232 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmpLevelZero(samp_comp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0);
|
||||
float _e232 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmpLevelZero(nagaComparisonSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp_comp + non_uniform_index])], uv, 0.0);
|
||||
float _e233 = v1_;
|
||||
v1_ = (_e233 + _e232);
|
||||
float4 _e239 = texture_array_bounded[0].SampleGrad(samp[0], uv, uv, uv);
|
||||
float4 _e239 = texture_array_bounded[0].SampleGrad(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv, uv, uv);
|
||||
float4 _e240 = v4_;
|
||||
v4_ = (_e240 + _e239);
|
||||
float4 _e246 = texture_array_bounded[uniform_index].SampleGrad(samp[uniform_index], uv, uv, uv);
|
||||
float4 _e246 = texture_array_bounded[uniform_index].SampleGrad(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv, uv, uv);
|
||||
float4 _e247 = v4_;
|
||||
v4_ = (_e247 + _e246);
|
||||
float4 _e253 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleGrad(samp[NonUniformResourceIndex(non_uniform_index)], uv, uv, uv);
|
||||
float4 _e253 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleGrad(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv, uv, uv);
|
||||
float4 _e254 = v4_;
|
||||
v4_ = (_e254 + _e253);
|
||||
float4 _e261 = texture_array_bounded[0].SampleLevel(samp[0], uv, 0.0);
|
||||
float4 _e261 = texture_array_bounded[0].SampleLevel(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv, 0.0);
|
||||
float4 _e262 = v4_;
|
||||
v4_ = (_e262 + _e261);
|
||||
float4 _e269 = texture_array_bounded[uniform_index].SampleLevel(samp[uniform_index], uv, 0.0);
|
||||
float4 _e269 = texture_array_bounded[uniform_index].SampleLevel(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv, 0.0);
|
||||
float4 _e270 = v4_;
|
||||
v4_ = (_e270 + _e269);
|
||||
float4 _e277 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleLevel(samp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0);
|
||||
float4 _e277 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleLevel(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv, 0.0);
|
||||
float4 _e278 = v4_;
|
||||
v4_ = (_e278 + _e277);
|
||||
float4 _e282 = v4_;
|
||||
|
||||
@ -15,8 +15,11 @@ TextureCube<float4> image_cube : register(t5);
|
||||
TextureCubeArray<float4> image_cube_array : register(t6);
|
||||
Texture3D<float4> image_3d : register(t7);
|
||||
Texture2DMS<float4> image_aa : register(t8);
|
||||
SamplerState sampler_reg : register(s0, space1);
|
||||
SamplerComparisonState sampler_cmp : register(s1, space1);
|
||||
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
|
||||
StructuredBuffer<uint> nagaGroup1SamplerIndexArray : register(t1, space255);
|
||||
static const SamplerState sampler_reg = nagaSamplerHeap[nagaGroup1SamplerIndexArray[0]];
|
||||
static const SamplerComparisonState sampler_cmp = nagaComparisonSamplerHeap[nagaGroup1SamplerIndexArray[1]];
|
||||
Texture2D<float> image_2d_depth : register(t2, space1);
|
||||
Texture2DArray<float> image_2d_array_depth : register(t3, space1);
|
||||
TextureCube<float> image_cube_depth : register(t4, space1);
|
||||
|
||||
@ -6,7 +6,10 @@ struct VertexOutput {
|
||||
static const float c_scale = 1.2;
|
||||
|
||||
Texture2D<float4> u_texture : register(t0);
|
||||
SamplerState u_sampler : register(s1);
|
||||
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
|
||||
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space255);
|
||||
static const SamplerState u_sampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[1]];
|
||||
|
||||
struct VertexOutput_vert_main {
|
||||
float2 uv_2 : LOC0;
|
||||
|
||||
@ -28,7 +28,10 @@ cbuffer u_entity : register(b0, space1) { Entity u_entity; }
|
||||
ByteAddressBuffer s_lights : register(t1);
|
||||
cbuffer u_lights : register(b1) { Light u_lights[10]; }
|
||||
Texture2DArray<float> t_shadow : register(t2);
|
||||
SamplerComparisonState sampler_shadow : register(s3);
|
||||
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
|
||||
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space255);
|
||||
static const SamplerComparisonState sampler_shadow = nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[3]];
|
||||
|
||||
struct VertexOutput_vs_main {
|
||||
float3 world_normal : LOC0;
|
||||
|
||||
@ -17,7 +17,10 @@ struct Data {
|
||||
|
||||
cbuffer r_data : register(b0) { Data r_data; }
|
||||
TextureCube<float4> r_texture : register(t0);
|
||||
SamplerState r_sampler : register(s0, space1);
|
||||
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
|
||||
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space2);
|
||||
static const SamplerState r_sampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[0]];
|
||||
|
||||
struct VertexOutput_vs_main {
|
||||
float3 uv : LOC0;
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
Texture2D<float4> Texture : register(t0);
|
||||
SamplerState Sampler : register(s1);
|
||||
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
|
||||
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space255);
|
||||
static const SamplerState Sampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[1]];
|
||||
|
||||
float4 test(Texture2D<float4> Passed_Texture, SamplerState Passed_Sampler)
|
||||
{
|
||||
|
||||
@ -27,6 +27,7 @@ webgl = ["wgpu/webgl"]
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
arrayvec.workspace = true
|
||||
approx.workspace = true
|
||||
bitflags.workspace = true
|
||||
bytemuck.workspace = true
|
||||
cfg-if.workspace = true
|
||||
|
||||
@ -83,6 +83,16 @@ pub fn fail_if<T>(
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the provided callback fails validation.
|
||||
pub fn did_fail<T>(device: &wgpu::Device, callback: impl FnOnce() -> T) -> (bool, T) {
|
||||
device.push_error_scope(wgpu::ErrorFilter::Validation);
|
||||
let result = callback();
|
||||
let validation_error = pollster::block_on(device.pop_error_scope());
|
||||
let failed = validation_error.is_some();
|
||||
|
||||
(failed, result)
|
||||
}
|
||||
|
||||
/// Adds the necissary main function for our gpu test harness.
|
||||
#[macro_export]
|
||||
macro_rules! gpu_test_main {
|
||||
|
||||
@ -44,6 +44,7 @@ mod ray_tracing;
|
||||
mod render_pass_ownership;
|
||||
mod resource_descriptor_accessor;
|
||||
mod resource_error;
|
||||
mod samplers;
|
||||
mod scissor_tests;
|
||||
mod shader;
|
||||
mod shader_primitive_index;
|
||||
|
||||
543
tests/tests/samplers.rs
Normal file
543
tests/tests/samplers.rs
Normal file
@ -0,0 +1,543 @@
|
||||
//! D3D12 samplers are fun and we're doing a decent amount of polyfilling with them.
|
||||
//!
|
||||
//! Do some tests to ensure things are working correctly and nothing gets mad.
|
||||
|
||||
use wgpu_test::{did_fail, gpu_test, valid, GpuTestConfiguration, TestParameters, TestingContext};
|
||||
|
||||
// A number large enough to likely cause sampler caches to run out of space
|
||||
// on some devices.
|
||||
const PROBABLY_PROBLEMATIC_SAMPLER_COUNT: u32 = 8 * 1024;
|
||||
|
||||
#[gpu_test]
|
||||
static SAMPLER_DEDUPLICATION: GpuTestConfiguration =
|
||||
GpuTestConfiguration::new().run_sync(sampler_deduplication);
|
||||
|
||||
// Create a large number of samplers from the same two descriptors.
|
||||
//
|
||||
// Sampler deduplication in the backend should ensure this doesn't cause any issues.
|
||||
fn sampler_deduplication(ctx: TestingContext) {
|
||||
// Create 2 different sampler descriptors
|
||||
let desc1 = wgpu::SamplerDescriptor {
|
||||
label: Some("sampler1"),
|
||||
address_mode_u: wgpu::AddressMode::ClampToEdge,
|
||||
address_mode_v: wgpu::AddressMode::ClampToEdge,
|
||||
address_mode_w: wgpu::AddressMode::ClampToEdge,
|
||||
mag_filter: wgpu::FilterMode::Nearest,
|
||||
min_filter: wgpu::FilterMode::Nearest,
|
||||
mipmap_filter: wgpu::FilterMode::Nearest,
|
||||
lod_min_clamp: 0.0,
|
||||
lod_max_clamp: 100.0,
|
||||
compare: None,
|
||||
anisotropy_clamp: 1,
|
||||
border_color: None,
|
||||
};
|
||||
|
||||
let desc2 = wgpu::SamplerDescriptor {
|
||||
label: Some("sampler2"),
|
||||
address_mode_u: wgpu::AddressMode::ClampToEdge,
|
||||
address_mode_v: wgpu::AddressMode::ClampToEdge,
|
||||
address_mode_w: wgpu::AddressMode::ClampToEdge,
|
||||
mag_filter: wgpu::FilterMode::Linear,
|
||||
min_filter: wgpu::FilterMode::Linear,
|
||||
mipmap_filter: wgpu::FilterMode::Linear,
|
||||
lod_min_clamp: 0.0,
|
||||
lod_max_clamp: 100.0,
|
||||
compare: None,
|
||||
anisotropy_clamp: 1,
|
||||
border_color: None,
|
||||
};
|
||||
|
||||
// Now create a bunch of samplers with these descriptors
|
||||
let samplers = (0..PROBABLY_PROBLEMATIC_SAMPLER_COUNT)
|
||||
.map(|i| {
|
||||
let desc = if i % 2 == 0 { &desc1 } else { &desc2 };
|
||||
valid(&ctx.device, || ctx.device.create_sampler(desc))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
drop(samplers);
|
||||
}
|
||||
|
||||
#[gpu_test]
|
||||
static SAMPLER_CREATION_FAILURE: GpuTestConfiguration =
|
||||
GpuTestConfiguration::new().run_sync(sampler_creation_failure);
|
||||
|
||||
/// We want to test that sampler creation properly fails when we hit internal sampler
|
||||
/// cache limits. As we don't actually know what the limit is, we first create as many
|
||||
/// samplers as we can until we get the first failure.
|
||||
///
|
||||
/// This failure being caught ensures that the error catching machinery on samplers
|
||||
/// is working as expected.
|
||||
///
|
||||
/// We then clear all samplers and poll the device, which should leave the caches
|
||||
/// completely empty.
|
||||
///
|
||||
/// We then try to create the same number of samplers to ensure the cache was entirely
|
||||
/// cleared.
|
||||
fn sampler_creation_failure(ctx: TestingContext) {
|
||||
let desc = wgpu::SamplerDescriptor {
|
||||
label: Some("sampler1"),
|
||||
address_mode_u: wgpu::AddressMode::ClampToEdge,
|
||||
address_mode_v: wgpu::AddressMode::ClampToEdge,
|
||||
address_mode_w: wgpu::AddressMode::ClampToEdge,
|
||||
mag_filter: wgpu::FilterMode::Nearest,
|
||||
min_filter: wgpu::FilterMode::Nearest,
|
||||
mipmap_filter: wgpu::FilterMode::Nearest,
|
||||
lod_min_clamp: 0.0,
|
||||
lod_max_clamp: 100.0,
|
||||
compare: None,
|
||||
anisotropy_clamp: 1,
|
||||
border_color: None,
|
||||
};
|
||||
|
||||
let mut sampler_storage = Vec::with_capacity(PROBABLY_PROBLEMATIC_SAMPLER_COUNT as usize);
|
||||
|
||||
for i in 0..PROBABLY_PROBLEMATIC_SAMPLER_COUNT {
|
||||
let (failed, sampler) = did_fail(&ctx.device, || {
|
||||
ctx.device.create_sampler(&wgpu::SamplerDescriptor {
|
||||
lod_min_clamp: i as f32 * 0.01,
|
||||
..desc
|
||||
})
|
||||
});
|
||||
|
||||
if failed {
|
||||
break;
|
||||
}
|
||||
|
||||
sampler_storage.push(sampler);
|
||||
}
|
||||
|
||||
let failed_count = sampler_storage.len();
|
||||
|
||||
sampler_storage.clear();
|
||||
ctx.device.poll(wgpu::Maintain::Wait);
|
||||
|
||||
for i in 0..failed_count {
|
||||
valid(&ctx.device, || {
|
||||
eprintln!("Trying to create sampler {}", i);
|
||||
let sampler = ctx.device.create_sampler(&wgpu::SamplerDescriptor {
|
||||
lod_min_clamp: i as f32 * 0.01,
|
||||
// Change the max clamp to ensure the sampler is using different cache slots from
|
||||
// the previous run.
|
||||
lod_max_clamp: 200.0,
|
||||
..desc
|
||||
});
|
||||
sampler_storage.push(sampler);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const SINGLE_GROUP_BINDINGS: &str = r#"
|
||||
@group(0) @binding(0) var texture: texture_2d<f32>;
|
||||
@group(0) @binding(1) var sampler0: sampler;
|
||||
@group(0) @binding(2) var sampler1: sampler;
|
||||
@group(0) @binding(3) var sampler2: sampler;
|
||||
|
||||
@group(1) @binding(0) var<storage, read_write> results: array<vec4f, 3>;
|
||||
"#;
|
||||
|
||||
const MULTI_GROUP_BINDINGS: &str = r#"
|
||||
@group(0) @binding(0) var texture: texture_2d<f32>;
|
||||
@group(0) @binding(1) var sampler0: sampler;
|
||||
@group(1) @binding(0) var sampler1: sampler;
|
||||
@group(2) @binding(0) var sampler2: sampler;
|
||||
|
||||
@group(3) @binding(0) var<storage, read_write> results: array<vec4f, 3>;
|
||||
"#;
|
||||
|
||||
const SAMPLER_CODE: &str = r#"
|
||||
@compute @workgroup_size(1, 1, 1)
|
||||
fn cs_main() {
|
||||
// When sampling a 2x2 texture at the bottom left, we can change the address mode
|
||||
// on S/T to get different values. This allows us to make sure the right sampler
|
||||
// is being used.
|
||||
results[0] = textureSampleLevel(texture, sampler0, vec2f(0.0, 1.0), 0.0);
|
||||
results[1] = textureSampleLevel(texture, sampler1, vec2f(0.0, 1.0), 0.0);
|
||||
results[2] = textureSampleLevel(texture, sampler2, vec2f(0.0, 1.0), 0.0);
|
||||
}
|
||||
"#;
|
||||
|
||||
enum GroupType {
|
||||
Single,
|
||||
Multi,
|
||||
}
|
||||
|
||||
#[gpu_test]
|
||||
static SAMPLER_SINGLE_BIND_GROUP: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.test_features_limits()
|
||||
// In OpenGL textures cannot be used with multiple samplers.
|
||||
.skip(wgpu_test::FailureCase::backend(wgpu::Backends::GL)),
|
||||
)
|
||||
.run_sync(|ctx| sampler_bind_group(ctx, GroupType::Single));
|
||||
|
||||
#[gpu_test]
|
||||
static SAMPLER_MULTI_BIND_GROUP: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(
|
||||
TestParameters::default()
|
||||
.test_features_limits()
|
||||
// In OpenGL textures cannot be used with multiple samplers.
|
||||
.skip(wgpu_test::FailureCase::backend(wgpu::Backends::GL)),
|
||||
)
|
||||
.run_sync(|ctx| sampler_bind_group(ctx, GroupType::Multi));
|
||||
|
||||
fn sampler_bind_group(ctx: TestingContext, group_type: GroupType) {
|
||||
let bindings = match group_type {
|
||||
GroupType::Single => SINGLE_GROUP_BINDINGS,
|
||||
GroupType::Multi => MULTI_GROUP_BINDINGS,
|
||||
};
|
||||
|
||||
let full_shader = format!("{}\n{}", bindings, SAMPLER_CODE);
|
||||
|
||||
let module = ctx
|
||||
.device
|
||||
.create_shader_module(wgpu::ShaderModuleDescriptor {
|
||||
source: wgpu::ShaderSource::Wgsl(full_shader.into()),
|
||||
label: None,
|
||||
});
|
||||
|
||||
let mut bind_group_layouts = Vec::new();
|
||||
|
||||
match group_type {
|
||||
GroupType::Single => {
|
||||
let bgl = ctx
|
||||
.device
|
||||
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||
label: Some("combination_bgl"),
|
||||
entries: &[
|
||||
wgpu::BindGroupLayoutEntry {
|
||||
binding: 0,
|
||||
visibility: wgpu::ShaderStages::COMPUTE,
|
||||
ty: wgpu::BindingType::Texture {
|
||||
sample_type: wgpu::TextureSampleType::Float { filterable: true },
|
||||
view_dimension: wgpu::TextureViewDimension::D2,
|
||||
multisampled: false,
|
||||
},
|
||||
count: None,
|
||||
},
|
||||
wgpu::BindGroupLayoutEntry {
|
||||
binding: 1,
|
||||
visibility: wgpu::ShaderStages::COMPUTE,
|
||||
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||||
count: None,
|
||||
},
|
||||
wgpu::BindGroupLayoutEntry {
|
||||
binding: 2,
|
||||
visibility: wgpu::ShaderStages::COMPUTE,
|
||||
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||||
count: None,
|
||||
},
|
||||
wgpu::BindGroupLayoutEntry {
|
||||
binding: 3,
|
||||
visibility: wgpu::ShaderStages::COMPUTE,
|
||||
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||||
count: None,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
bind_group_layouts.push(bgl);
|
||||
}
|
||||
GroupType::Multi => {
|
||||
let bgl0 = ctx
|
||||
.device
|
||||
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||
label: Some("multiple_bgl0"),
|
||||
entries: &[
|
||||
wgpu::BindGroupLayoutEntry {
|
||||
binding: 0,
|
||||
visibility: wgpu::ShaderStages::COMPUTE,
|
||||
ty: wgpu::BindingType::Texture {
|
||||
sample_type: wgpu::TextureSampleType::Float { filterable: true },
|
||||
view_dimension: wgpu::TextureViewDimension::D2,
|
||||
multisampled: false,
|
||||
},
|
||||
count: None,
|
||||
},
|
||||
wgpu::BindGroupLayoutEntry {
|
||||
binding: 1,
|
||||
visibility: wgpu::ShaderStages::COMPUTE,
|
||||
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||||
count: None,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
let bgl1 = ctx
|
||||
.device
|
||||
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||
label: Some("multiple_bgl1"),
|
||||
entries: &[wgpu::BindGroupLayoutEntry {
|
||||
binding: 0,
|
||||
visibility: wgpu::ShaderStages::COMPUTE,
|
||||
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||||
count: None,
|
||||
}],
|
||||
});
|
||||
|
||||
let bgl2 = ctx
|
||||
.device
|
||||
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||
label: Some("multiple_bgl2"),
|
||||
entries: &[wgpu::BindGroupLayoutEntry {
|
||||
binding: 0,
|
||||
visibility: wgpu::ShaderStages::COMPUTE,
|
||||
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||||
count: None,
|
||||
}],
|
||||
});
|
||||
|
||||
bind_group_layouts.push(bgl0);
|
||||
bind_group_layouts.push(bgl1);
|
||||
bind_group_layouts.push(bgl2);
|
||||
}
|
||||
}
|
||||
|
||||
let output_bgl = ctx
|
||||
.device
|
||||
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||
label: Some("output_bgl"),
|
||||
entries: &[wgpu::BindGroupLayoutEntry {
|
||||
binding: 0,
|
||||
visibility: wgpu::ShaderStages::COMPUTE,
|
||||
ty: wgpu::BindingType::Buffer {
|
||||
ty: wgpu::BufferBindingType::Storage { read_only: false },
|
||||
has_dynamic_offset: false,
|
||||
min_binding_size: None,
|
||||
},
|
||||
count: None,
|
||||
}],
|
||||
});
|
||||
|
||||
let mut bgl_references: Vec<_> = bind_group_layouts.iter().collect();
|
||||
|
||||
bgl_references.push(&output_bgl);
|
||||
|
||||
let pipeline_layout = ctx
|
||||
.device
|
||||
.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
|
||||
label: Some("pipeline_layout"),
|
||||
bind_group_layouts: &bgl_references,
|
||||
push_constant_ranges: &[],
|
||||
});
|
||||
|
||||
let input_image = ctx.device.create_texture(&wgpu::TextureDescriptor {
|
||||
label: Some("input_image"),
|
||||
size: wgpu::Extent3d {
|
||||
width: 2,
|
||||
height: 2,
|
||||
depth_or_array_layers: 1,
|
||||
},
|
||||
mip_level_count: 1,
|
||||
sample_count: 1,
|
||||
dimension: wgpu::TextureDimension::D2,
|
||||
format: wgpu::TextureFormat::Rgba8Unorm,
|
||||
usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
|
||||
view_formats: &[],
|
||||
});
|
||||
|
||||
let input_image_view = input_image.create_view(&wgpu::TextureViewDescriptor::default());
|
||||
|
||||
let image_data: [u8; 16] = [
|
||||
255, 0, 0, 255, /* */ 0, 255, 0, 255, //
|
||||
0, 0, 255, 255, /* */ 255, 255, 255, 255, //
|
||||
];
|
||||
|
||||
ctx.queue.write_texture(
|
||||
wgpu::TexelCopyTextureInfo {
|
||||
texture: &input_image,
|
||||
mip_level: 0,
|
||||
origin: wgpu::Origin3d::ZERO,
|
||||
aspect: wgpu::TextureAspect::All,
|
||||
},
|
||||
&image_data,
|
||||
wgpu::TexelCopyBufferLayout {
|
||||
offset: 0,
|
||||
bytes_per_row: Some(8),
|
||||
rows_per_image: None,
|
||||
},
|
||||
wgpu::Extent3d {
|
||||
width: 2,
|
||||
height: 2,
|
||||
depth_or_array_layers: 1,
|
||||
},
|
||||
);
|
||||
|
||||
let address_modes = [
|
||||
(
|
||||
wgpu::AddressMode::ClampToEdge,
|
||||
wgpu::AddressMode::ClampToEdge,
|
||||
),
|
||||
(wgpu::AddressMode::Repeat, wgpu::AddressMode::ClampToEdge),
|
||||
(wgpu::AddressMode::ClampToEdge, wgpu::AddressMode::Repeat),
|
||||
];
|
||||
|
||||
let samplers = address_modes.map(|(address_mode_u, address_mode_v)| {
|
||||
ctx.device.create_sampler(&wgpu::SamplerDescriptor {
|
||||
label: None,
|
||||
address_mode_u,
|
||||
address_mode_v,
|
||||
address_mode_w: wgpu::AddressMode::ClampToEdge,
|
||||
mag_filter: wgpu::FilterMode::Linear,
|
||||
min_filter: wgpu::FilterMode::Linear,
|
||||
mipmap_filter: wgpu::FilterMode::Nearest,
|
||||
lod_min_clamp: 0.0,
|
||||
lod_max_clamp: 100.0,
|
||||
compare: None,
|
||||
anisotropy_clamp: 1,
|
||||
border_color: None,
|
||||
})
|
||||
});
|
||||
|
||||
let mut bind_groups = Vec::new();
|
||||
|
||||
match group_type {
|
||||
GroupType::Single => {
|
||||
let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||
label: Some("combination_bg"),
|
||||
layout: &bind_group_layouts[0],
|
||||
entries: &[
|
||||
wgpu::BindGroupEntry {
|
||||
binding: 0,
|
||||
resource: wgpu::BindingResource::TextureView(&input_image_view),
|
||||
},
|
||||
wgpu::BindGroupEntry {
|
||||
binding: 1,
|
||||
resource: wgpu::BindingResource::Sampler(&samplers[0]),
|
||||
},
|
||||
wgpu::BindGroupEntry {
|
||||
binding: 2,
|
||||
resource: wgpu::BindingResource::Sampler(&samplers[1]),
|
||||
},
|
||||
wgpu::BindGroupEntry {
|
||||
binding: 3,
|
||||
resource: wgpu::BindingResource::Sampler(&samplers[2]),
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
bind_groups.push(bg);
|
||||
}
|
||||
GroupType::Multi => {
|
||||
let bg0 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||
label: Some("multiple_bg0"),
|
||||
layout: &bind_group_layouts[0],
|
||||
entries: &[
|
||||
wgpu::BindGroupEntry {
|
||||
binding: 0,
|
||||
resource: wgpu::BindingResource::TextureView(&input_image_view),
|
||||
},
|
||||
wgpu::BindGroupEntry {
|
||||
binding: 1,
|
||||
resource: wgpu::BindingResource::Sampler(&samplers[0]),
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
let bg1 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||
label: Some("multiple_bg1"),
|
||||
layout: &bind_group_layouts[1],
|
||||
entries: &[wgpu::BindGroupEntry {
|
||||
binding: 0,
|
||||
resource: wgpu::BindingResource::Sampler(&samplers[1]),
|
||||
}],
|
||||
});
|
||||
|
||||
let bg2 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||
label: Some("multiple_bg2"),
|
||||
layout: &bind_group_layouts[2],
|
||||
entries: &[wgpu::BindGroupEntry {
|
||||
binding: 0,
|
||||
resource: wgpu::BindingResource::Sampler(&samplers[2]),
|
||||
}],
|
||||
});
|
||||
|
||||
bind_groups.push(bg0);
|
||||
bind_groups.push(bg1);
|
||||
bind_groups.push(bg2);
|
||||
}
|
||||
}
|
||||
|
||||
let output_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: Some("output_buffer"),
|
||||
size: 48,
|
||||
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
|
||||
let transfer_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: Some("transfer_buffer"),
|
||||
size: 48,
|
||||
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
|
||||
let output_bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||
label: Some("output_bg"),
|
||||
layout: &output_bgl,
|
||||
entries: &[wgpu::BindGroupEntry {
|
||||
binding: 0,
|
||||
resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
|
||||
buffer: &output_buffer,
|
||||
offset: 0,
|
||||
size: None,
|
||||
}),
|
||||
}],
|
||||
});
|
||||
|
||||
let mut bg_references = bind_groups.iter().collect::<Vec<_>>();
|
||||
|
||||
bg_references.push(&output_bg);
|
||||
|
||||
let pipeline = ctx
|
||||
.device
|
||||
.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
|
||||
label: Some("pipeline"),
|
||||
layout: Some(&pipeline_layout),
|
||||
module: &module,
|
||||
entry_point: Some("cs_main"),
|
||||
cache: None,
|
||||
compilation_options: Default::default(),
|
||||
});
|
||||
|
||||
let mut encoder = ctx
|
||||
.device
|
||||
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
||||
label: Some("encoder"),
|
||||
});
|
||||
|
||||
{
|
||||
let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
|
||||
label: None,
|
||||
timestamp_writes: None,
|
||||
});
|
||||
cpass.set_pipeline(&pipeline);
|
||||
for (i, &bg) in bg_references.iter().enumerate() {
|
||||
cpass.set_bind_group(i as u32, bg, &[]);
|
||||
}
|
||||
cpass.dispatch_workgroups(1, 1, 1);
|
||||
}
|
||||
|
||||
encoder.copy_buffer_to_buffer(&output_buffer, 0, &transfer_buffer, 0, 48);
|
||||
|
||||
ctx.queue.submit([encoder.finish()]);
|
||||
let buffer_slice = transfer_buffer.slice(..);
|
||||
buffer_slice.map_async(wgpu::MapMode::Read, |_| {});
|
||||
|
||||
ctx.device.poll(wgpu::Maintain::Wait);
|
||||
|
||||
let buffer_data = buffer_slice.get_mapped_range();
|
||||
|
||||
let f32_buffer: &[f32] = bytemuck::cast_slice(&buffer_data);
|
||||
|
||||
let correct_values: [f32; 12] = [
|
||||
0.0, 0.0, 1.0, 1.0, //
|
||||
0.5, 0.5, 1.0, 1.0, //
|
||||
0.5, 0.0, 0.5, 1.0, //
|
||||
];
|
||||
let iter = f32_buffer.iter().zip(correct_values.iter());
|
||||
for (&result, &value) in iter {
|
||||
approx::assert_relative_eq!(result, value, max_relative = 0.02);
|
||||
}
|
||||
}
|
||||
@ -1521,9 +1521,9 @@ impl Device {
|
||||
};
|
||||
for (_, var) in module.global_variables.iter() {
|
||||
match var.binding {
|
||||
Some(ref br) if br.group >= self.limits.max_bind_groups => {
|
||||
Some(br) if br.group >= self.limits.max_bind_groups => {
|
||||
return Err(pipeline::CreateShaderModuleError::InvalidGroupIndex {
|
||||
bind: br.clone(),
|
||||
bind: br,
|
||||
group: br.group,
|
||||
limit: self.limits.max_bind_groups,
|
||||
});
|
||||
|
||||
@ -921,7 +921,7 @@ impl Interface {
|
||||
let mut resource_mapping = FastHashMap::default();
|
||||
for (var_handle, var) in module.global_variables.iter() {
|
||||
let bind = match var.binding {
|
||||
Some(ref br) => br.clone(),
|
||||
Some(br) => br,
|
||||
_ => continue,
|
||||
};
|
||||
let naga_ty = &module.types[var.ty].inner;
|
||||
@ -1058,7 +1058,7 @@ impl Interface {
|
||||
BindingLayoutSource::Provided(layouts) => {
|
||||
// update the required binding size for this buffer
|
||||
if let ResourceType::Buffer { size } = res.ty {
|
||||
match shader_binding_sizes.entry(res.bind.clone()) {
|
||||
match shader_binding_sizes.entry(res.bind) {
|
||||
Entry::Occupied(e) => {
|
||||
*e.into_mut() = size.max(*e.get());
|
||||
}
|
||||
@ -1118,7 +1118,7 @@ impl Interface {
|
||||
}
|
||||
};
|
||||
if let Err(error) = result {
|
||||
return Err(StageError::Binding(res.bind.clone(), error));
|
||||
return Err(StageError::Binding(res.bind, error));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1159,8 +1159,8 @@ impl Interface {
|
||||
|
||||
if let Some(error) = error {
|
||||
return Err(StageError::Filtering {
|
||||
texture: texture_bind.clone(),
|
||||
sampler: sampler_bind.clone(),
|
||||
texture: *texture_bind,
|
||||
sampler: *sampler_bind,
|
||||
error,
|
||||
});
|
||||
}
|
||||
|
||||
@ -77,6 +77,7 @@ dx12 = [
|
||||
"dep:libloading",
|
||||
"dep:range-alloc",
|
||||
"dep:windows-core",
|
||||
"dep:ordered-float",
|
||||
"gpu-allocator/d3d12",
|
||||
"naga/hlsl-out-if-target-windows",
|
||||
"windows/Win32_Graphics_Direct3D_Fxc",
|
||||
|
||||
@ -154,6 +154,11 @@ impl super::Adapter {
|
||||
}
|
||||
.unwrap();
|
||||
|
||||
if options.ResourceHeapTier == Direct3D12::D3D12_RESOURCE_HEAP_TIER_1 {
|
||||
// We require Tier 2 for the ability to make samplers bindless in all cases.
|
||||
return None;
|
||||
}
|
||||
|
||||
let _depth_bounds_test_supported = {
|
||||
let mut features2 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS2::default();
|
||||
unsafe {
|
||||
@ -195,6 +200,23 @@ impl super::Adapter {
|
||||
.is_ok()
|
||||
};
|
||||
|
||||
let mut max_sampler_descriptor_heap_size =
|
||||
Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE;
|
||||
{
|
||||
let mut features19 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS19::default();
|
||||
let res = unsafe {
|
||||
device.CheckFeatureSupport(
|
||||
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS19,
|
||||
<*mut _>::cast(&mut features19),
|
||||
size_of_val(&features19) as u32,
|
||||
)
|
||||
};
|
||||
|
||||
if res.is_ok() {
|
||||
max_sampler_descriptor_heap_size = features19.MaxSamplerDescriptorHeapSize;
|
||||
}
|
||||
};
|
||||
|
||||
let shader_model = if dxc_container.is_none() {
|
||||
naga::back::hlsl::ShaderModel::V5_1
|
||||
} else {
|
||||
@ -260,6 +282,7 @@ impl super::Adapter {
|
||||
// See https://github.com/gfx-rs/wgpu/issues/3552
|
||||
suballocation_supported: !info.name.contains("Iris(R) Xe"),
|
||||
shader_model,
|
||||
max_sampler_descriptor_heap_size,
|
||||
};
|
||||
|
||||
// Theoretically vram limited, but in practice 2^20 is the limit
|
||||
|
||||
@ -85,7 +85,7 @@ impl super::CommandEncoder {
|
||||
unsafe {
|
||||
list.SetDescriptorHeaps(&[
|
||||
Some(self.shared.heap_views.raw.clone()),
|
||||
Some(self.shared.heap_samplers.raw.clone()),
|
||||
Some(self.shared.sampler_heap.heap().clone()),
|
||||
])
|
||||
};
|
||||
}
|
||||
@ -241,6 +241,21 @@ impl super::CommandEncoder {
|
||||
(Pk::Transfer, _) => (),
|
||||
}
|
||||
}
|
||||
super::RootElement::SamplerHeap => match self.pass.kind {
|
||||
Pk::Render => unsafe {
|
||||
list.SetGraphicsRootDescriptorTable(
|
||||
index,
|
||||
self.shared.sampler_heap.gpu_descriptor_table(),
|
||||
)
|
||||
},
|
||||
Pk::Compute => unsafe {
|
||||
list.SetComputeRootDescriptorTable(
|
||||
index,
|
||||
self.shared.sampler_heap.gpu_descriptor_table(),
|
||||
)
|
||||
},
|
||||
Pk::Transfer => (),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -254,6 +269,9 @@ impl super::CommandEncoder {
|
||||
other: 0,
|
||||
};
|
||||
}
|
||||
if let Some(root_index) = layout.sampler_heap_root_index {
|
||||
self.pass.root_elements[root_index as usize] = super::RootElement::SamplerHeap;
|
||||
}
|
||||
self.pass.layout = layout.clone();
|
||||
self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1;
|
||||
}
|
||||
@ -907,13 +925,6 @@ impl crate::CommandEncoder for super::CommandEncoder {
|
||||
root_index += 1;
|
||||
}
|
||||
|
||||
// Bind Sampler descriptor tables.
|
||||
if info.tables.contains(super::TableTypes::SAMPLERS) {
|
||||
self.pass.root_elements[root_index] =
|
||||
super::RootElement::Table(group.handle_samplers.unwrap().gpu);
|
||||
root_index += 1;
|
||||
}
|
||||
|
||||
// Bind root descriptors
|
||||
for ((&kind, &gpu_base), &offset) in info
|
||||
.dynamic_buffers
|
||||
|
||||
@ -284,6 +284,11 @@ impl CpuHeap {
|
||||
}
|
||||
|
||||
pub(super) fn at(&self, index: u32) -> Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||
debug_assert!(
|
||||
index < self.total,
|
||||
"Index ({index}) out of bounds {total}",
|
||||
total = self.total
|
||||
);
|
||||
Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||
ptr: self.start.ptr + (self.handle_size * index) as usize,
|
||||
}
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
ffi,
|
||||
mem::{self, size_of, size_of_val},
|
||||
num::NonZeroU32,
|
||||
@ -100,7 +101,6 @@ impl super::Device {
|
||||
|
||||
// maximum number of CBV/SRV/UAV descriptors in heap for Tier 1
|
||||
let capacity_views = limits.max_non_sampler_bindings as u64;
|
||||
let capacity_samplers = 2_048;
|
||||
|
||||
let shared = super::DeviceShared {
|
||||
zero_buffer,
|
||||
@ -141,11 +141,7 @@ impl super::Device {
|
||||
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||
capacity_views,
|
||||
)?,
|
||||
heap_samplers: descriptor::GeneralHeap::new(
|
||||
&raw,
|
||||
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
||||
capacity_samplers,
|
||||
)?,
|
||||
sampler_heap: super::sampler::SamplerHeap::new(&raw, &private_caps)?,
|
||||
};
|
||||
|
||||
let mut rtv_pool =
|
||||
@ -188,10 +184,6 @@ impl super::Device {
|
||||
raw.clone(),
|
||||
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||
)),
|
||||
sampler_pool: Mutex::new(descriptor::CpuPool::new(
|
||||
raw,
|
||||
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
||||
)),
|
||||
library: Arc::clone(library),
|
||||
#[cfg(feature = "renderdoc")]
|
||||
render_doc: Default::default(),
|
||||
@ -678,8 +670,6 @@ impl crate::Device for super::Device {
|
||||
&self,
|
||||
desc: &crate::SamplerDescriptor,
|
||||
) -> Result<super::Sampler, crate::DeviceError> {
|
||||
let handle = self.sampler_pool.lock().alloc_handle()?;
|
||||
|
||||
let reduction = match desc.compare {
|
||||
Some(_) => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_COMPARISON,
|
||||
None => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_STANDARD,
|
||||
@ -697,9 +687,7 @@ impl crate::Device for super::Device {
|
||||
|
||||
let border_color = conv::map_border_color(desc.border_color);
|
||||
|
||||
unsafe {
|
||||
self.raw.CreateSampler(
|
||||
&Direct3D12::D3D12_SAMPLER_DESC {
|
||||
let raw_desc = Direct3D12::D3D12_SAMPLER_DESC {
|
||||
Filter: filter,
|
||||
AddressU: conv::map_address_mode(desc.address_modes[0]),
|
||||
AddressV: conv::map_address_mode(desc.address_modes[1]),
|
||||
@ -713,18 +701,25 @@ impl crate::Device for super::Device {
|
||||
BorderColor: border_color,
|
||||
MinLOD: desc.lod_clamp.start,
|
||||
MaxLOD: desc.lod_clamp.end,
|
||||
},
|
||||
handle.raw,
|
||||
)
|
||||
};
|
||||
|
||||
let index = self
|
||||
.shared
|
||||
.sampler_heap
|
||||
.create_sampler(&self.raw, raw_desc)?;
|
||||
|
||||
self.counters.samplers.add(1);
|
||||
|
||||
Ok(super::Sampler { handle })
|
||||
Ok(super::Sampler {
|
||||
index,
|
||||
desc: raw_desc,
|
||||
})
|
||||
}
|
||||
|
||||
unsafe fn destroy_sampler(&self, sampler: super::Sampler) {
|
||||
self.sampler_pool.lock().free_handle(sampler.handle);
|
||||
self.shared
|
||||
.sampler_heap
|
||||
.destroy_sampler(sampler.desc, sampler.index);
|
||||
self.counters.samplers.sub(1);
|
||||
}
|
||||
|
||||
@ -763,12 +758,8 @@ impl crate::Device for super::Device {
|
||||
&self,
|
||||
desc: &crate::BindGroupLayoutDescriptor,
|
||||
) -> Result<super::BindGroupLayout, crate::DeviceError> {
|
||||
let (
|
||||
mut num_buffer_views,
|
||||
mut num_samplers,
|
||||
mut num_texture_views,
|
||||
mut num_acceleration_structures,
|
||||
) = (0, 0, 0, 0);
|
||||
let mut num_views = 0;
|
||||
let mut has_sampler_in_group = false;
|
||||
for entry in desc.entries.iter() {
|
||||
let count = entry.count.map_or(1, NonZeroU32::get);
|
||||
match entry.ty {
|
||||
@ -776,18 +767,20 @@ impl crate::Device for super::Device {
|
||||
has_dynamic_offset: true,
|
||||
..
|
||||
} => {}
|
||||
wgt::BindingType::Buffer { .. } => num_buffer_views += count,
|
||||
wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => {
|
||||
num_texture_views += count
|
||||
wgt::BindingType::Buffer { .. }
|
||||
| wgt::BindingType::Texture { .. }
|
||||
| wgt::BindingType::StorageTexture { .. }
|
||||
| wgt::BindingType::AccelerationStructure => num_views += count,
|
||||
wgt::BindingType::Sampler { .. } => has_sampler_in_group = true,
|
||||
}
|
||||
wgt::BindingType::Sampler { .. } => num_samplers += count,
|
||||
wgt::BindingType::AccelerationStructure => num_acceleration_structures += count,
|
||||
}
|
||||
|
||||
if has_sampler_in_group {
|
||||
num_views += 1;
|
||||
}
|
||||
|
||||
self.counters.bind_group_layouts.add(1);
|
||||
|
||||
let num_views = num_buffer_views + num_texture_views + num_acceleration_structures;
|
||||
Ok(super::BindGroupLayout {
|
||||
entries: desc.entries.to_vec(),
|
||||
cpu_heap_views: if num_views != 0 {
|
||||
@ -800,17 +793,7 @@ impl crate::Device for super::Device {
|
||||
} else {
|
||||
None
|
||||
},
|
||||
cpu_heap_samplers: if num_samplers != 0 {
|
||||
let heap = descriptor::CpuHeap::new(
|
||||
&self.raw,
|
||||
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
||||
num_samplers,
|
||||
)?;
|
||||
Some(heap)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
copy_counts: vec![1; num_views.max(num_samplers) as usize],
|
||||
copy_counts: vec![1; num_views as usize],
|
||||
})
|
||||
}
|
||||
|
||||
@ -841,12 +824,15 @@ impl crate::Device for super::Device {
|
||||
// ...
|
||||
// (bind group [0]) - Space=0
|
||||
// View descriptor table, if any
|
||||
// Sampler descriptor table, if any
|
||||
// Sampler buffer descriptor table, if any
|
||||
// Root descriptors (for dynamic offset buffers)
|
||||
// (bind group [1]) - Space=0
|
||||
// ...
|
||||
// (bind group [2]) - Space=0
|
||||
// Special constant buffer: Space=0
|
||||
// Sampler descriptor tables: Space=0
|
||||
// SamplerState Array: Space=0, Register=0-2047
|
||||
// SamplerComparisonState Array: Space=0, Register=2048-4095
|
||||
|
||||
//TODO: put lower bind group indices further down the root signature. See:
|
||||
// https://microsoft.github.io/DirectX-Specs/d3d/ResourceBinding.html#binding-model
|
||||
@ -854,12 +840,10 @@ impl crate::Device for super::Device {
|
||||
// on Vulkan-like layout compatibility rules.
|
||||
|
||||
let mut binding_map = hlsl::BindingMap::default();
|
||||
let (mut bind_cbv, mut bind_srv, mut bind_uav, mut bind_sampler) = (
|
||||
hlsl::BindTarget::default(),
|
||||
hlsl::BindTarget::default(),
|
||||
hlsl::BindTarget::default(),
|
||||
hlsl::BindTarget::default(),
|
||||
);
|
||||
let mut sampler_buffer_binding_map = hlsl::SamplerIndexBufferBindingMap::default();
|
||||
let mut bind_cbv = hlsl::BindTarget::default();
|
||||
let mut bind_srv = hlsl::BindTarget::default();
|
||||
let mut bind_uav = hlsl::BindTarget::default();
|
||||
let mut parameters = Vec::new();
|
||||
let mut push_constants_target = None;
|
||||
let mut root_constant_info = None;
|
||||
@ -886,7 +870,7 @@ impl crate::Device for super::Device {
|
||||
},
|
||||
ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL,
|
||||
});
|
||||
let binding = bind_cbv.clone();
|
||||
let binding = bind_cbv;
|
||||
bind_cbv.register += 1;
|
||||
root_constant_info = Some(super::RootConstantInfo {
|
||||
root_index: parameter_index as u32,
|
||||
@ -900,19 +884,34 @@ impl crate::Device for super::Device {
|
||||
// Collect the whole number of bindings we will create upfront.
|
||||
// It allows us to preallocate enough storage to avoid reallocation,
|
||||
// which could cause invalid pointers.
|
||||
let total_non_dynamic_entries = desc
|
||||
.bind_group_layouts
|
||||
.iter()
|
||||
.flat_map(|bgl| {
|
||||
bgl.entries.iter().map(|entry| match entry.ty {
|
||||
let mut total_non_dynamic_entries = 0_usize;
|
||||
let mut sampler_in_any_bind_group = false;
|
||||
for bgl in desc.bind_group_layouts {
|
||||
let mut sampler_in_bind_group = false;
|
||||
|
||||
for entry in &bgl.entries {
|
||||
match entry.ty {
|
||||
wgt::BindingType::Buffer {
|
||||
has_dynamic_offset: true,
|
||||
..
|
||||
} => 0,
|
||||
_ => 1,
|
||||
})
|
||||
})
|
||||
.sum();
|
||||
} => {}
|
||||
wgt::BindingType::Sampler(_) => sampler_in_bind_group = true,
|
||||
_ => total_non_dynamic_entries += 1,
|
||||
}
|
||||
}
|
||||
|
||||
if sampler_in_bind_group {
|
||||
// One for the sampler buffer
|
||||
total_non_dynamic_entries += 1;
|
||||
sampler_in_any_bind_group = true;
|
||||
}
|
||||
}
|
||||
|
||||
if sampler_in_any_bind_group {
|
||||
// Two for the sampler arrays themselves
|
||||
total_non_dynamic_entries += 2;
|
||||
}
|
||||
|
||||
let mut ranges = Vec::with_capacity(total_non_dynamic_entries);
|
||||
|
||||
let mut bind_group_infos =
|
||||
@ -926,10 +925,11 @@ impl crate::Device for super::Device {
|
||||
|
||||
let mut visibility_view_static = wgt::ShaderStages::empty();
|
||||
let mut visibility_view_dynamic = wgt::ShaderStages::empty();
|
||||
let mut visibility_sampler = wgt::ShaderStages::empty();
|
||||
for entry in bgl.entries.iter() {
|
||||
match entry.ty {
|
||||
wgt::BindingType::Sampler { .. } => visibility_sampler |= entry.visibility,
|
||||
wgt::BindingType::Sampler { .. } => {
|
||||
visibility_view_static |= wgt::ShaderStages::all()
|
||||
}
|
||||
wgt::BindingType::Buffer {
|
||||
has_dynamic_offset: true,
|
||||
..
|
||||
@ -939,7 +939,7 @@ impl crate::Device for super::Device {
|
||||
}
|
||||
|
||||
// SRV/CBV/UAV descriptor tables
|
||||
let mut range_base = ranges.len();
|
||||
let range_base = ranges.len();
|
||||
for entry in bgl.entries.iter() {
|
||||
let range_ty = match entry.ty {
|
||||
wgt::BindingType::Buffer {
|
||||
@ -963,7 +963,7 @@ impl crate::Device for super::Device {
|
||||
},
|
||||
hlsl::BindTarget {
|
||||
binding_array_size: entry.count.map(NonZeroU32::get),
|
||||
..bt.clone()
|
||||
..*bt
|
||||
},
|
||||
);
|
||||
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
|
||||
@ -976,6 +976,44 @@ impl crate::Device for super::Device {
|
||||
});
|
||||
bt.register += entry.count.map(NonZeroU32::get).unwrap_or(1);
|
||||
}
|
||||
|
||||
let mut sampler_index_within_bind_group = 0;
|
||||
for entry in bgl.entries.iter() {
|
||||
if let wgt::BindingType::Sampler(_) = entry.ty {
|
||||
binding_map.insert(
|
||||
naga::ResourceBinding {
|
||||
group: index as u32,
|
||||
binding: entry.binding,
|
||||
},
|
||||
hlsl::BindTarget {
|
||||
// Naga does not use the space field for samplers
|
||||
space: 255,
|
||||
register: sampler_index_within_bind_group,
|
||||
binding_array_size: None,
|
||||
},
|
||||
);
|
||||
sampler_index_within_bind_group += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if sampler_index_within_bind_group != 0 {
|
||||
sampler_buffer_binding_map.insert(
|
||||
hlsl::SamplerIndexBufferKey {
|
||||
group: index as u32,
|
||||
},
|
||||
bind_srv,
|
||||
);
|
||||
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
|
||||
RangeType: Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
|
||||
NumDescriptors: 1,
|
||||
BaseShaderRegister: bind_srv.register,
|
||||
RegisterSpace: bind_srv.space as u32,
|
||||
OffsetInDescriptorsFromTableStart:
|
||||
Direct3D12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
|
||||
});
|
||||
bind_srv.register += 1;
|
||||
}
|
||||
|
||||
if ranges.len() > range_base {
|
||||
let range = &ranges[range_base..];
|
||||
parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
|
||||
@ -991,50 +1029,6 @@ impl crate::Device for super::Device {
|
||||
info.tables |= super::TableTypes::SRV_CBV_UAV;
|
||||
}
|
||||
|
||||
// Sampler descriptor tables
|
||||
range_base = ranges.len();
|
||||
for entry in bgl.entries.iter() {
|
||||
let range_ty = match entry.ty {
|
||||
wgt::BindingType::Sampler { .. } => {
|
||||
Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
binding_map.insert(
|
||||
naga::ResourceBinding {
|
||||
group: index as u32,
|
||||
binding: entry.binding,
|
||||
},
|
||||
hlsl::BindTarget {
|
||||
binding_array_size: entry.count.map(NonZeroU32::get),
|
||||
..bind_sampler.clone()
|
||||
},
|
||||
);
|
||||
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
|
||||
RangeType: range_ty,
|
||||
NumDescriptors: entry.count.map_or(1, |count| count.get()),
|
||||
BaseShaderRegister: bind_sampler.register,
|
||||
RegisterSpace: bind_sampler.space as u32,
|
||||
OffsetInDescriptorsFromTableStart:
|
||||
Direct3D12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
|
||||
});
|
||||
bind_sampler.register += entry.count.map(NonZeroU32::get).unwrap_or(1);
|
||||
}
|
||||
if ranges.len() > range_base {
|
||||
let range = &ranges[range_base..];
|
||||
parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
|
||||
ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
|
||||
Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 {
|
||||
DescriptorTable: Direct3D12::D3D12_ROOT_DESCRIPTOR_TABLE {
|
||||
NumDescriptorRanges: range.len() as u32,
|
||||
pDescriptorRanges: range.as_ptr(),
|
||||
},
|
||||
},
|
||||
ShaderVisibility: conv::map_visibility(visibility_sampler),
|
||||
});
|
||||
info.tables |= super::TableTypes::SAMPLERS;
|
||||
}
|
||||
|
||||
// Root (dynamic) descriptor tables
|
||||
let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic);
|
||||
for entry in bgl.entries.iter() {
|
||||
@ -1072,7 +1066,7 @@ impl crate::Device for super::Device {
|
||||
},
|
||||
hlsl::BindTarget {
|
||||
binding_array_size: entry.count.map(NonZeroU32::get),
|
||||
..bt.clone()
|
||||
..*bt
|
||||
},
|
||||
);
|
||||
info.dynamic_buffers.push(kind);
|
||||
@ -1094,6 +1088,62 @@ impl crate::Device for super::Device {
|
||||
bind_group_infos.push(info);
|
||||
}
|
||||
|
||||
let sampler_heap_target = hlsl::SamplerHeapBindTargets {
|
||||
standard_samplers: hlsl::BindTarget {
|
||||
space: 0,
|
||||
register: 0,
|
||||
binding_array_size: None,
|
||||
},
|
||||
comparison_samplers: hlsl::BindTarget {
|
||||
space: 0,
|
||||
register: 2048,
|
||||
binding_array_size: None,
|
||||
},
|
||||
};
|
||||
|
||||
let mut sampler_heap_root_index = None;
|
||||
if sampler_in_any_bind_group {
|
||||
// Sampler descriptor tables
|
||||
//
|
||||
// We bind two sampler ranges pointing to the same descriptor heap, using two different register ranges.
|
||||
//
|
||||
// We bind them as normal samplers in registers 0-2047 and comparison samplers in registers 2048-4095.
|
||||
// Tier 2 hardware guarantees that the type of sampler only needs to match if the sampler is actually
|
||||
// accessed in the shader. As such, we can bind the same array of samplers to both registers.
|
||||
//
|
||||
// We do this because HLSL does not allow you to alias registers at all.
|
||||
let range_base = ranges.len();
|
||||
// Standard samplers, registers 0-2047
|
||||
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
|
||||
RangeType: Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
|
||||
NumDescriptors: 2048,
|
||||
BaseShaderRegister: 0,
|
||||
RegisterSpace: 0,
|
||||
OffsetInDescriptorsFromTableStart: 0,
|
||||
});
|
||||
// Comparison samplers, registers 2048-4095
|
||||
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
|
||||
RangeType: Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
|
||||
NumDescriptors: 2048,
|
||||
BaseShaderRegister: 2048,
|
||||
RegisterSpace: 0,
|
||||
OffsetInDescriptorsFromTableStart: 0,
|
||||
});
|
||||
|
||||
let range = &ranges[range_base..];
|
||||
sampler_heap_root_index = Some(parameters.len() as super::RootIndex);
|
||||
parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
|
||||
ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
|
||||
Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 {
|
||||
DescriptorTable: Direct3D12::D3D12_ROOT_DESCRIPTOR_TABLE {
|
||||
NumDescriptorRanges: range.len() as u32,
|
||||
pDescriptorRanges: range.as_ptr(),
|
||||
},
|
||||
},
|
||||
ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL,
|
||||
});
|
||||
}
|
||||
|
||||
// Ensure that we didn't reallocate!
|
||||
debug_assert_eq!(ranges.len(), total_non_dynamic_entries);
|
||||
|
||||
@ -1113,7 +1163,7 @@ impl crate::Device for super::Device {
|
||||
},
|
||||
ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL, // really needed for VS and CS only,
|
||||
});
|
||||
let binding = bind_cbv.clone();
|
||||
let binding = bind_cbv;
|
||||
bind_cbv.register += 1;
|
||||
(Some(parameter_index as u32), Some(binding))
|
||||
} else {
|
||||
@ -1231,6 +1281,7 @@ impl crate::Device for super::Device {
|
||||
total_root_elements: parameters.len() as super::RootIndex,
|
||||
special_constants,
|
||||
root_constant_info,
|
||||
sampler_heap_root_index,
|
||||
},
|
||||
bind_group_infos,
|
||||
naga_options: hlsl::Options {
|
||||
@ -1241,6 +1292,8 @@ impl crate::Device for super::Device {
|
||||
push_constants_target,
|
||||
zero_initialize_workgroup_memory: true,
|
||||
restrict_indexing: true,
|
||||
sampler_heap_target,
|
||||
sampler_buffer_binding_map,
|
||||
},
|
||||
})
|
||||
}
|
||||
@ -1267,14 +1320,6 @@ impl crate::Device for super::Device {
|
||||
if let Some(ref mut inner) = cpu_views {
|
||||
inner.stage.clear();
|
||||
}
|
||||
let mut cpu_samplers = desc
|
||||
.layout
|
||||
.cpu_heap_samplers
|
||||
.as_ref()
|
||||
.map(|cpu_heap| cpu_heap.inner.lock());
|
||||
if let Some(ref mut inner) = cpu_samplers {
|
||||
inner.stage.clear();
|
||||
}
|
||||
let mut dynamic_buffers = Vec::new();
|
||||
|
||||
let layout_and_entry_iter = desc.entries.iter().map(|entry| {
|
||||
@ -1286,6 +1331,8 @@ impl crate::Device for super::Device {
|
||||
.expect("internal error: no layout entry found with binding slot");
|
||||
(layout, entry)
|
||||
});
|
||||
let mut sampler_indexes: Vec<super::sampler::SamplerIndex> = Vec::new();
|
||||
|
||||
for (layout, entry) in layout_and_entry_iter {
|
||||
match layout.ty {
|
||||
wgt::BindingType::Buffer {
|
||||
@ -1390,8 +1437,8 @@ impl crate::Device for super::Device {
|
||||
wgt::BindingType::Sampler { .. } => {
|
||||
let start = entry.resource_index as usize;
|
||||
let end = start + entry.count as usize;
|
||||
for data in &desc.samplers[start..end] {
|
||||
cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw);
|
||||
for &data in &desc.samplers[start..end] {
|
||||
sampler_indexes.push(data.index);
|
||||
}
|
||||
}
|
||||
wgt::BindingType::AccelerationStructure => {
|
||||
@ -1424,6 +1471,92 @@ impl crate::Device for super::Device {
|
||||
}
|
||||
}
|
||||
|
||||
let sampler_index_buffer = if !sampler_indexes.is_empty() {
|
||||
let buffer_size = (sampler_indexes.len() * size_of::<u32>()) as u64;
|
||||
|
||||
let label = if let Some(label) = desc.label {
|
||||
Cow::Owned(format!("{} (Internal Sampler Index Buffer)", label))
|
||||
} else {
|
||||
Cow::Borrowed("Internal Sampler Index Buffer")
|
||||
};
|
||||
|
||||
let buffer_desc = crate::BufferDescriptor {
|
||||
label: None,
|
||||
size: buffer_size,
|
||||
usage: crate::BufferUses::STORAGE_READ_ONLY | crate::BufferUses::MAP_WRITE,
|
||||
// D3D12 backend doesn't care about the memory flags
|
||||
memory_flags: crate::MemoryFlags::empty(),
|
||||
};
|
||||
|
||||
let raw_buffer_desc = Direct3D12::D3D12_RESOURCE_DESC {
|
||||
Dimension: Direct3D12::D3D12_RESOURCE_DIMENSION_BUFFER,
|
||||
Alignment: 0,
|
||||
Width: buffer_size,
|
||||
Height: 1,
|
||||
DepthOrArraySize: 1,
|
||||
MipLevels: 1,
|
||||
Format: Dxgi::Common::DXGI_FORMAT_UNKNOWN,
|
||||
SampleDesc: Dxgi::Common::DXGI_SAMPLE_DESC {
|
||||
Count: 1,
|
||||
Quality: 0,
|
||||
},
|
||||
Layout: Direct3D12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
Flags: Direct3D12::D3D12_RESOURCE_FLAG_NONE,
|
||||
};
|
||||
|
||||
let (buffer, allocation) =
|
||||
super::suballocation::create_buffer_resource(self, &buffer_desc, raw_buffer_desc)?;
|
||||
|
||||
unsafe { buffer.SetName(&windows::core::HSTRING::from(&*label)) }
|
||||
.into_device_result("SetName")?;
|
||||
|
||||
let mut mapping = ptr::null_mut::<ffi::c_void>();
|
||||
unsafe { buffer.Map(0, None, Some(&mut mapping)) }.into_device_result("Map")?;
|
||||
|
||||
assert!(!mapping.is_null());
|
||||
assert_eq!(mapping as usize % 4, 0);
|
||||
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(
|
||||
sampler_indexes.as_ptr(),
|
||||
mapping.cast(),
|
||||
sampler_indexes.len(),
|
||||
)
|
||||
};
|
||||
|
||||
// The unmapping is not needed, as all memory is coherent in d3d12, but lets be nice to our address space.
|
||||
unsafe { buffer.Unmap(0, None) };
|
||||
|
||||
let srv_desc = Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC {
|
||||
Format: Dxgi::Common::DXGI_FORMAT_UNKNOWN,
|
||||
ViewDimension: Direct3D12::D3D12_SRV_DIMENSION_BUFFER,
|
||||
Anonymous: Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC_0 {
|
||||
Buffer: Direct3D12::D3D12_BUFFER_SRV {
|
||||
FirstElement: 0,
|
||||
NumElements: sampler_indexes.len() as u32,
|
||||
StructureByteStride: 4,
|
||||
Flags: Direct3D12::D3D12_BUFFER_SRV_FLAG_NONE,
|
||||
},
|
||||
},
|
||||
Shader4ComponentMapping: Direct3D12::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
|
||||
};
|
||||
|
||||
let inner = cpu_views.as_mut().unwrap();
|
||||
let cpu_index = inner.stage.len() as u32;
|
||||
let srv = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index);
|
||||
|
||||
unsafe {
|
||||
self.raw
|
||||
.CreateShaderResourceView(&buffer, Some(&srv_desc), srv)
|
||||
};
|
||||
|
||||
cpu_views.as_mut().unwrap().stage.push(srv);
|
||||
|
||||
Some(super::SamplerIndexBuffer { buffer, allocation })
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let handle_views = match cpu_views {
|
||||
Some(inner) => {
|
||||
let dual = unsafe {
|
||||
@ -1438,26 +1571,12 @@ impl crate::Device for super::Device {
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
let handle_samplers = match cpu_samplers {
|
||||
Some(inner) => {
|
||||
let dual = unsafe {
|
||||
descriptor::upload(
|
||||
&self.raw,
|
||||
&inner,
|
||||
&self.shared.heap_samplers,
|
||||
&desc.layout.copy_counts,
|
||||
)
|
||||
}?;
|
||||
Some(dual)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
self.counters.bind_groups.add(1);
|
||||
|
||||
Ok(super::BindGroup {
|
||||
handle_views,
|
||||
handle_samplers,
|
||||
sampler_index_buffer,
|
||||
dynamic_buffers,
|
||||
})
|
||||
}
|
||||
@ -1466,8 +1585,14 @@ impl crate::Device for super::Device {
|
||||
if let Some(dual) = group.handle_views {
|
||||
self.shared.heap_views.free_slice(dual);
|
||||
}
|
||||
if let Some(dual) = group.handle_samplers {
|
||||
self.shared.heap_samplers.free_slice(dual);
|
||||
|
||||
if let Some(sampler_buffer) = group.sampler_index_buffer {
|
||||
// Make sure the buffer is dropped before the allocation
|
||||
drop(sampler_buffer.buffer);
|
||||
|
||||
if let Some(allocation) = sampler_buffer.allocation {
|
||||
super::suballocation::free_buffer_allocation(self, allocation, &self.mem_allocator);
|
||||
}
|
||||
}
|
||||
|
||||
self.counters.bind_groups.sub(1);
|
||||
|
||||
@ -13,13 +13,52 @@ and destination states match, and they are for storage sync.
|
||||
|
||||
For now, all resources are created with "committed" memory.
|
||||
|
||||
## Sampler Descriptor Management
|
||||
|
||||
At most one descriptor heap of each type can be bound at once. This
|
||||
means that the descriptors from all bind groups need to be present
|
||||
in the same heap, and they need to be contiguous within that heap.
|
||||
This is not a problem for the SRV/CBV/UAV heap as it can be sized into
|
||||
the millions of entries. However the sampler heap is limited to 2048 entries.
|
||||
|
||||
In order to work around this limitation, we refer to samplers indirectly by index.
|
||||
The entire sampler heap is bound at once and a buffer containing all sampler indexes
|
||||
for that bind group is bound. The shader then uses the index to look up the sampler
|
||||
in the heap. To help visualize this, the generated HLSL looks like this:
|
||||
|
||||
```wgsl
|
||||
@group(0) @binding(2) var myLinearSampler: sampler;
|
||||
@group(1) @binding(1) var myAnisoSampler: sampler;
|
||||
@group(1) @binding(4) var myCompSampler: sampler;
|
||||
```
|
||||
|
||||
```cpp
|
||||
// These bindings alias the same descriptors. Depending on the type, the shader will use the correct one.
|
||||
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s2048, space1);
|
||||
|
||||
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space0);
|
||||
StructuredBuffer<uint> nagaGroup1SamplerIndexArray : register(t1, space0);
|
||||
|
||||
// Indexes into group 0 index array
|
||||
static const SamplerState myLinearSampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[0]];
|
||||
|
||||
// Indexes into group 1 index array
|
||||
static const SamplerState myAnisoSampler = nagaSamplerHeap[nagaGroup1SamplerIndexArray[0]];
|
||||
static const SamplerComparisonState myCompSampler = nagaComparisonSamplerHeap[nagaGroup1SamplerIndexArray[1]];
|
||||
```
|
||||
|
||||
Without this transform we would need separate set of sampler descriptors for each unique combination of samplers
|
||||
in a bind group. This results in a lot of duplication and makes it easy to hit the 2048 limit. With the transform
|
||||
the limit is merely 2048 unique samplers in existence, which is much more reasonable.
|
||||
|
||||
## Resource binding
|
||||
|
||||
See ['Device::create_pipeline_layout`] documentation for the structure
|
||||
of the root signature corresponding to WebGPU pipeline layout.
|
||||
|
||||
Binding groups is mostly straightforward, with one big caveat:
|
||||
all bindings have to be reset whenever the pipeline layout changes.
|
||||
all bindings have to be reset whenever the root signature changes.
|
||||
This is the rule of D3D12, and we can do nothing to help it.
|
||||
|
||||
We detect this change at both [`crate::CommandEncoder::set_bind_group`]
|
||||
@ -39,6 +78,7 @@ mod conv;
|
||||
mod descriptor;
|
||||
mod device;
|
||||
mod instance;
|
||||
mod sampler;
|
||||
mod shader_compilation;
|
||||
mod suballocation;
|
||||
mod types;
|
||||
@ -518,6 +558,7 @@ struct PrivateCapabilities {
|
||||
casting_fully_typed_format_supported: bool,
|
||||
suballocation_supported: bool,
|
||||
shader_model: naga::back::hlsl::ShaderModel,
|
||||
max_sampler_descriptor_heap_size: u32,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
@ -575,7 +616,7 @@ struct DeviceShared {
|
||||
zero_buffer: Direct3D12::ID3D12Resource,
|
||||
cmd_signatures: CommandSignatures,
|
||||
heap_views: descriptor::GeneralHeap,
|
||||
heap_samplers: descriptor::GeneralHeap,
|
||||
sampler_heap: sampler::SamplerHeap,
|
||||
}
|
||||
|
||||
unsafe impl Send for DeviceShared {}
|
||||
@ -591,7 +632,6 @@ pub struct Device {
|
||||
rtv_pool: Mutex<descriptor::CpuPool>,
|
||||
dsv_pool: Mutex<descriptor::CpuPool>,
|
||||
srv_uav_pool: Mutex<descriptor::CpuPool>,
|
||||
sampler_pool: Mutex<descriptor::CpuPool>,
|
||||
// library
|
||||
library: Arc<D3D12Lib>,
|
||||
#[cfg(feature = "renderdoc")]
|
||||
@ -645,7 +685,7 @@ struct PassResolve {
|
||||
format: Dxgi::Common::DXGI_FORMAT,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
enum RootElement {
|
||||
Empty,
|
||||
Constant,
|
||||
@ -664,6 +704,8 @@ enum RootElement {
|
||||
kind: BufferViewKind,
|
||||
address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE,
|
||||
},
|
||||
/// Descriptor table referring to the entire sampler heap.
|
||||
SamplerHeap,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
@ -700,6 +742,7 @@ impl PassState {
|
||||
total_root_elements: 0,
|
||||
special_constants: None,
|
||||
root_constant_info: None,
|
||||
sampler_heap_root_index: None,
|
||||
},
|
||||
root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS],
|
||||
constant_data: [0; MAX_ROOT_ELEMENTS],
|
||||
@ -853,7 +896,8 @@ unsafe impl Sync for TextureView {}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Sampler {
|
||||
handle: descriptor::Handle,
|
||||
index: sampler::SamplerIndex,
|
||||
desc: Direct3D12::D3D12_SAMPLER_DESC,
|
||||
}
|
||||
|
||||
impl crate::DynSampler for Sampler {}
|
||||
@ -893,7 +937,6 @@ pub struct BindGroupLayout {
|
||||
/// Sorted list of entries.
|
||||
entries: Vec<wgt::BindGroupLayoutEntry>,
|
||||
cpu_heap_views: Option<descriptor::CpuHeap>,
|
||||
cpu_heap_samplers: Option<descriptor::CpuHeap>,
|
||||
copy_counts: Vec<u32>, // all 1's
|
||||
}
|
||||
|
||||
@ -906,10 +949,16 @@ enum BufferViewKind {
|
||||
UnorderedAccess,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct SamplerIndexBuffer {
|
||||
buffer: Direct3D12::ID3D12Resource,
|
||||
allocation: Option<suballocation::AllocationWrapper>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BindGroup {
|
||||
handle_views: Option<descriptor::DualHandle>,
|
||||
handle_samplers: Option<descriptor::DualHandle>,
|
||||
sampler_index_buffer: Option<SamplerIndexBuffer>,
|
||||
dynamic_buffers: Vec<Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE>,
|
||||
}
|
||||
|
||||
@ -945,6 +994,7 @@ struct PipelineLayoutShared {
|
||||
total_root_elements: RootIndex,
|
||||
special_constants: Option<PipelineLayoutSpecialConstants>,
|
||||
root_constant_info: Option<RootConstantInfo>,
|
||||
sampler_heap_root_index: Option<RootIndex>,
|
||||
}
|
||||
|
||||
unsafe impl Send for PipelineLayoutShared {}
|
||||
|
||||
251
wgpu-hal/src/dx12/sampler.rs
Normal file
251
wgpu-hal/src/dx12/sampler.rs
Normal file
@ -0,0 +1,251 @@
|
||||
//! Sampler management for DX12.
|
||||
//!
|
||||
//! Nearly identical to the Vulkan sampler cache, with added descriptor heap management.
|
||||
|
||||
use std::collections::{hash_map::Entry, HashMap};
|
||||
|
||||
use ordered_float::OrderedFloat;
|
||||
use parking_lot::Mutex;
|
||||
use windows::Win32::Graphics::Direct3D12::*;
|
||||
|
||||
use crate::dx12::HResult;
|
||||
|
||||
/// The index of a sampler in the global sampler heap.
|
||||
///
|
||||
/// This is a type-safe, transparent wrapper around a u32.
|
||||
#[repr(transparent)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub(crate) struct SamplerIndex(u32);
|
||||
|
||||
/// [`D3D12_SAMPLER_DESC`] is not hashable, so we wrap it in a newtype that is.
|
||||
///
|
||||
/// We use [`OrderedFloat`] to allow for floating point values to be compared and
|
||||
/// hashed in a defined way.
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
struct HashableSamplerDesc(D3D12_SAMPLER_DESC);
|
||||
|
||||
impl PartialEq for HashableSamplerDesc {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0.Filter == other.0.Filter
|
||||
&& self.0.AddressU == other.0.AddressU
|
||||
&& self.0.AddressV == other.0.AddressV
|
||||
&& self.0.AddressW == other.0.AddressW
|
||||
&& OrderedFloat(self.0.MipLODBias) == OrderedFloat(other.0.MipLODBias)
|
||||
&& self.0.MaxAnisotropy == other.0.MaxAnisotropy
|
||||
&& self.0.ComparisonFunc == other.0.ComparisonFunc
|
||||
&& self.0.BorderColor.map(OrderedFloat) == other.0.BorderColor.map(OrderedFloat)
|
||||
&& OrderedFloat(self.0.MinLOD) == OrderedFloat(other.0.MinLOD)
|
||||
&& OrderedFloat(self.0.MaxLOD) == OrderedFloat(other.0.MaxLOD)
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for HashableSamplerDesc {}
|
||||
|
||||
impl std::hash::Hash for HashableSamplerDesc {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.0.Filter.0.hash(state);
|
||||
self.0.AddressU.0.hash(state);
|
||||
self.0.AddressV.0.hash(state);
|
||||
self.0.AddressW.0.hash(state);
|
||||
OrderedFloat(self.0.MipLODBias).hash(state);
|
||||
self.0.MaxAnisotropy.hash(state);
|
||||
self.0.ComparisonFunc.0.hash(state);
|
||||
self.0.BorderColor.map(OrderedFloat).hash(state);
|
||||
OrderedFloat(self.0.MinLOD).hash(state);
|
||||
OrderedFloat(self.0.MaxLOD).hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
/// Entry in the sampler cache.
|
||||
struct CacheEntry {
|
||||
index: SamplerIndex,
|
||||
ref_count: u32,
|
||||
}
|
||||
|
||||
/// Container for the mutable management state of the sampler heap.
|
||||
///
|
||||
/// We have this separated, using interior mutability, to allow for the outside world
|
||||
/// to access the heap directly without needing to take the lock.
|
||||
pub(crate) struct SamplerHeapState {
|
||||
/// Mapping from the sampler description to the index within the heap and the refcount.
|
||||
mapping: HashMap<HashableSamplerDesc, CacheEntry>,
|
||||
/// List of free sampler indices.
|
||||
freelist: Vec<SamplerIndex>,
|
||||
}
|
||||
|
||||
/// Global sampler heap for the device.
|
||||
///
|
||||
/// As D3D12 only allows 2048 samplers to be in a single heap, we need to cache
|
||||
/// samplers aggressively and refer to them in shaders by index.
|
||||
pub(crate) struct SamplerHeap {
|
||||
/// Mutable management state of the sampler heap.
|
||||
state: Mutex<SamplerHeapState>,
|
||||
|
||||
/// The heap itself.
|
||||
heap: ID3D12DescriptorHeap,
|
||||
/// The CPU-side handle to the first descriptor in the heap.
|
||||
///
|
||||
/// Both the CPU and GPU handles point to the same descriptor, just in
|
||||
/// different contexts.
|
||||
heap_cpu_start_handle: D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||
/// The GPU-side handle to the first descriptor in the heap.
|
||||
///
|
||||
/// Both the CPU and GPU handles point to the same descriptor, just in
|
||||
/// different contexts.
|
||||
heap_gpu_start_handle: D3D12_GPU_DESCRIPTOR_HANDLE,
|
||||
|
||||
/// This is the device-specific size of sampler descriptors.
|
||||
descriptor_stride: u32,
|
||||
}
|
||||
|
||||
impl SamplerHeap {
|
||||
pub fn new(
|
||||
device: &ID3D12Device,
|
||||
private_caps: &super::PrivateCapabilities,
|
||||
) -> Result<Self, crate::DeviceError> {
|
||||
profiling::scope!("SamplerHeap::new");
|
||||
|
||||
// WARP can report this as 2M or more. We clamp it to 64k to be safe.
|
||||
const SAMPLER_HEAP_SIZE_CLAMP: u32 = 64 * 1024;
|
||||
|
||||
let max_unique_samplers = private_caps
|
||||
.max_sampler_descriptor_heap_size
|
||||
.min(SAMPLER_HEAP_SIZE_CLAMP);
|
||||
|
||||
let desc = D3D12_DESCRIPTOR_HEAP_DESC {
|
||||
Type: D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
||||
NumDescriptors: max_unique_samplers,
|
||||
Flags: D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
||||
NodeMask: 0,
|
||||
};
|
||||
let heap = unsafe { device.CreateDescriptorHeap::<ID3D12DescriptorHeap>(&desc) }
|
||||
.into_device_result("Failed to create global GPU-Visible Sampler Descriptor Heap")?;
|
||||
|
||||
let heap_cpu_start_handle = unsafe { heap.GetCPUDescriptorHandleForHeapStart() };
|
||||
let heap_gpu_start_handle = unsafe { heap.GetGPUDescriptorHandleForHeapStart() };
|
||||
|
||||
let descriptor_stride =
|
||||
unsafe { device.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) };
|
||||
|
||||
Ok(Self {
|
||||
state: Mutex::new(SamplerHeapState {
|
||||
mapping: HashMap::new(),
|
||||
// Reverse so that samplers get allocated starting from zero.
|
||||
freelist: (0..max_unique_samplers).map(SamplerIndex).rev().collect(),
|
||||
}),
|
||||
heap,
|
||||
heap_cpu_start_handle,
|
||||
heap_gpu_start_handle,
|
||||
descriptor_stride,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a reference to the raw descriptor heap.
|
||||
pub fn heap(&self) -> &ID3D12DescriptorHeap {
|
||||
&self.heap
|
||||
}
|
||||
|
||||
/// Returns a reference the handle to be bound to the descriptor table.
|
||||
pub fn gpu_descriptor_table(&self) -> D3D12_GPU_DESCRIPTOR_HANDLE {
|
||||
self.heap_gpu_start_handle
|
||||
}
|
||||
|
||||
/// Add a sampler with the given description to the heap.
|
||||
///
|
||||
/// If the sampler already exists, the refcount is incremented and the existing index is returned.
|
||||
///
|
||||
/// If the sampler does not exist, a new sampler is created and the index is returned.
|
||||
///
|
||||
/// If the heap is full, an error is returned.
|
||||
pub fn create_sampler(
|
||||
&self,
|
||||
device: &ID3D12Device,
|
||||
desc: D3D12_SAMPLER_DESC,
|
||||
) -> Result<SamplerIndex, crate::DeviceError> {
|
||||
profiling::scope!("SamplerHeap::create_sampler");
|
||||
|
||||
let hashable_desc = HashableSamplerDesc(desc);
|
||||
|
||||
// Eagarly dereference the lock to allow split borrows.
|
||||
let state = &mut *self.state.lock();
|
||||
|
||||
// Lookup the sampler in the mapping.
|
||||
match state.mapping.entry(hashable_desc) {
|
||||
Entry::Occupied(occupied_entry) => {
|
||||
// We have found a match, so increment the refcount and return the index.
|
||||
let entry = occupied_entry.into_mut();
|
||||
entry.ref_count += 1;
|
||||
Ok(entry.index)
|
||||
}
|
||||
Entry::Vacant(vacant_entry) => {
|
||||
// We need to create a new sampler.
|
||||
|
||||
// Try to get a new index from the freelist.
|
||||
let Some(index) = state.freelist.pop() else {
|
||||
// If the freelist is empty, we have hit the maximum number of samplers.
|
||||
log::error!("There is no more room in the global sampler heap for more unique samplers. Your device supports a maximum of {} unique samplers.", state.mapping.len());
|
||||
return Err(crate::DeviceError::OutOfMemory);
|
||||
};
|
||||
|
||||
// Compute the CPU side handle for the new sampler.
|
||||
let handle = D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||
ptr: self.heap_cpu_start_handle.ptr
|
||||
+ self.descriptor_stride as usize * index.0 as usize,
|
||||
};
|
||||
|
||||
unsafe {
|
||||
device.CreateSampler(&desc, handle);
|
||||
}
|
||||
|
||||
// Insert the new sampler into the mapping.
|
||||
vacant_entry.insert(CacheEntry {
|
||||
index,
|
||||
ref_count: 1,
|
||||
});
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Decrement the refcount of the sampler with the given description.
|
||||
///
|
||||
/// If the refcount reaches zero, the sampler is destroyed and the index is returned to the freelist.
|
||||
///
|
||||
/// The provided index is checked against the index of the sampler with the given description, ensuring
|
||||
/// that there isn't a clerical error from the caller.
|
||||
pub fn destroy_sampler(&self, desc: D3D12_SAMPLER_DESC, provided_index: SamplerIndex) {
|
||||
profiling::scope!("SamplerHeap::destroy_sampler");
|
||||
|
||||
// Eagarly dereference the lock to allow split borrows.
|
||||
let state = &mut *self.state.lock();
|
||||
|
||||
// Get the index of the sampler to destroy.
|
||||
let Entry::Occupied(mut hash_map_entry) = state.mapping.entry(HashableSamplerDesc(desc))
|
||||
else {
|
||||
log::error!(
|
||||
"Tried to destroy a sampler that doesn't exist. Sampler description: {:#?}",
|
||||
desc
|
||||
);
|
||||
return;
|
||||
};
|
||||
let cache_entry = hash_map_entry.get_mut();
|
||||
|
||||
// Ensure that the provided index matches the index of the sampler to destroy.
|
||||
assert_eq!(
|
||||
cache_entry.index, provided_index,
|
||||
"Mismatched sampler index, this is an implementation bug"
|
||||
);
|
||||
|
||||
// Decrement the refcount of the sampler.
|
||||
cache_entry.ref_count -= 1;
|
||||
|
||||
// If we are the last reference, remove the sampler from the mapping and return the index to the freelist.
|
||||
//
|
||||
// As samplers only exist as descriptors in the heap, there is nothing needed to be done to destroy the sampler.
|
||||
if cache_entry.ref_count == 0 {
|
||||
state.freelist.push(cache_entry.index);
|
||||
hash_map_entry.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -244,7 +244,7 @@ impl super::Device {
|
||||
}
|
||||
naga::AddressSpace::Uniform | naga::AddressSpace::Storage { .. } => {
|
||||
let br = match var.binding {
|
||||
Some(ref br) => br.clone(),
|
||||
Some(br) => br,
|
||||
None => continue,
|
||||
};
|
||||
let storage_access_store = match var.space {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user