mirror of
https://github.com/gfx-rs/wgpu.git
synced 2025-12-08 21:26:17 +00:00
Unconditionally Generate Bindless Samplers in DX12 (#6766)
This commit is contained in:
parent
2298cd2dd6
commit
436f716715
10
Cargo.lock
generated
10
Cargo.lock
generated
@ -159,6 +159,15 @@ version = "1.0.95"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
|
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "approx"
|
||||||
|
version = "0.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6"
|
||||||
|
dependencies = [
|
||||||
|
"num-traits",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arbitrary"
|
name = "arbitrary"
|
||||||
version = "1.4.1"
|
version = "1.4.1"
|
||||||
@ -4187,6 +4196,7 @@ name = "wgpu-test"
|
|||||||
version = "24.0.0"
|
version = "24.0.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"approx",
|
||||||
"arrayvec",
|
"arrayvec",
|
||||||
"bitflags 2.8.0",
|
"bitflags 2.8.0",
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
|
|||||||
@ -79,6 +79,7 @@ version = "24.0.0"
|
|||||||
anyhow = "1.0.95"
|
anyhow = "1.0.95"
|
||||||
argh = "0.1.13"
|
argh = "0.1.13"
|
||||||
arrayvec = "0.7"
|
arrayvec = "0.7"
|
||||||
|
approx = "0.5"
|
||||||
bincode = "1"
|
bincode = "1"
|
||||||
bit-vec = "0.8"
|
bit-vec = "0.8"
|
||||||
bitflags = "2.7"
|
bitflags = "2.7"
|
||||||
|
|||||||
@ -1190,6 +1190,83 @@ impl<W: Write> super::Writer<'_, W> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Writes out the sampler heap declarations if they haven't been written yet.
|
||||||
|
pub(super) fn write_sampler_heaps(&mut self) -> BackendResult {
|
||||||
|
if self.wrapped.sampler_heaps {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
self.out,
|
||||||
|
"SamplerState {}[2048]: register(s{}, space{});",
|
||||||
|
super::writer::SAMPLER_HEAP_VAR,
|
||||||
|
self.options.sampler_heap_target.standard_samplers.register,
|
||||||
|
self.options.sampler_heap_target.standard_samplers.space
|
||||||
|
)?;
|
||||||
|
writeln!(
|
||||||
|
self.out,
|
||||||
|
"SamplerComparisonState {}[2048]: register(s{}, space{});",
|
||||||
|
super::writer::COMPARISON_SAMPLER_HEAP_VAR,
|
||||||
|
self.options
|
||||||
|
.sampler_heap_target
|
||||||
|
.comparison_samplers
|
||||||
|
.register,
|
||||||
|
self.options.sampler_heap_target.comparison_samplers.space
|
||||||
|
)?;
|
||||||
|
|
||||||
|
self.wrapped.sampler_heaps = true;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Writes out the sampler index buffer declaration if it hasn't been written yet.
|
||||||
|
pub(super) fn write_wrapped_sampler_buffer(
|
||||||
|
&mut self,
|
||||||
|
key: super::SamplerIndexBufferKey,
|
||||||
|
) -> BackendResult {
|
||||||
|
// The astute will notice that we do a double hash lookup, but we do this to avoid
|
||||||
|
// holding a mutable reference to `self` while trying to call `write_sampler_heaps`.
|
||||||
|
//
|
||||||
|
// We only pay this double lookup cost when we actually need to write out the sampler
|
||||||
|
// buffer, which should be not be common.
|
||||||
|
|
||||||
|
if self.wrapped.sampler_index_buffers.contains_key(&key) {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
|
self.write_sampler_heaps()?;
|
||||||
|
|
||||||
|
// Because the group number can be arbitrary, we use the namer to generate a unique name
|
||||||
|
// instead of adding it to the reserved name list.
|
||||||
|
let sampler_array_name = self
|
||||||
|
.namer
|
||||||
|
.call(&format!("nagaGroup{}SamplerIndexArray", key.group));
|
||||||
|
|
||||||
|
let bind_target = match self.options.sampler_buffer_binding_map.get(&key) {
|
||||||
|
Some(&bind_target) => bind_target,
|
||||||
|
None if self.options.fake_missing_bindings => super::BindTarget {
|
||||||
|
space: u8::MAX,
|
||||||
|
register: key.group,
|
||||||
|
binding_array_size: None,
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
unreachable!("Sampler buffer of group {key:?} not bound to a register");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
self.out,
|
||||||
|
"StructuredBuffer<uint> {sampler_array_name} : register(t{}, space{});",
|
||||||
|
bind_target.register, bind_target.space
|
||||||
|
)?;
|
||||||
|
|
||||||
|
self.wrapped
|
||||||
|
.sampler_index_buffers
|
||||||
|
.insert(key, sampler_array_name);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub(super) fn write_texture_coordinates(
|
pub(super) fn write_texture_coordinates(
|
||||||
&mut self,
|
&mut self,
|
||||||
kind: &str,
|
kind: &str,
|
||||||
|
|||||||
@ -820,6 +820,8 @@ pub const RESERVED: &[&str] = &[
|
|||||||
super::writer::FREXP_FUNCTION,
|
super::writer::FREXP_FUNCTION,
|
||||||
super::writer::EXTRACT_BITS_FUNCTION,
|
super::writer::EXTRACT_BITS_FUNCTION,
|
||||||
super::writer::INSERT_BITS_FUNCTION,
|
super::writer::INSERT_BITS_FUNCTION,
|
||||||
|
super::writer::SAMPLER_HEAP_VAR,
|
||||||
|
super::writer::COMPARISON_SAMPLER_HEAP_VAR,
|
||||||
];
|
];
|
||||||
|
|
||||||
// DXC scalar types, from https://github.com/microsoft/DirectXShaderCompiler/blob/18c9e114f9c314f93e68fbc72ce207d4ed2e65ae/tools/clang/lib/AST/ASTContextHLSL.cpp#L48-L254
|
// DXC scalar types, from https://github.com/microsoft/DirectXShaderCompiler/blob/18c9e114f9c314f93e68fbc72ce207d4ed2e65ae/tools/clang/lib/AST/ASTContextHLSL.cpp#L48-L254
|
||||||
|
|||||||
@ -92,6 +92,15 @@ float3x2 GetMatmOnBaz(Baz obj) {
|
|||||||
We also emit an analogous `Set` function, as well as functions for
|
We also emit an analogous `Set` function, as well as functions for
|
||||||
accessing individual columns by dynamic index.
|
accessing individual columns by dynamic index.
|
||||||
|
|
||||||
|
## Sampler Handling
|
||||||
|
|
||||||
|
Due to limitations in how sampler heaps work in D3D12, we need to access samplers
|
||||||
|
through a layer of indirection. Instead of directly binding samplers, we bind the entire
|
||||||
|
sampler heap as both a standard and a comparison sampler heap. We then use a sampler
|
||||||
|
index buffer for each bind group. This buffer is accessed in the shader to get the actual
|
||||||
|
sampler index within the heap. See the wgpu_hal dx12 backend documentation for more
|
||||||
|
information.
|
||||||
|
|
||||||
[hlsl]: https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl
|
[hlsl]: https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl
|
||||||
[ilov]: https://gpuweb.github.io/gpuweb/wgsl/#internal-value-layout
|
[ilov]: https://gpuweb.github.io/gpuweb/wgsl/#internal-value-layout
|
||||||
[16bb]: https://github.com/microsoft/DirectXShaderCompiler/wiki/Buffer-Packing#constant-buffer-packing
|
[16bb]: https://github.com/microsoft/DirectXShaderCompiler/wiki/Buffer-Packing#constant-buffer-packing
|
||||||
@ -110,11 +119,14 @@ use thiserror::Error;
|
|||||||
|
|
||||||
use crate::{back, proc};
|
use crate::{back, proc};
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
|
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)]
|
||||||
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
|
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
|
||||||
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
|
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
|
||||||
pub struct BindTarget {
|
pub struct BindTarget {
|
||||||
pub space: u8,
|
pub space: u8,
|
||||||
|
/// For regular bindings this is the register number.
|
||||||
|
///
|
||||||
|
/// For sampler bindings, this is the index to use into the bind group's sampler index buffer.
|
||||||
pub register: u32,
|
pub register: u32,
|
||||||
/// If the binding is an unsized binding array, this overrides the size.
|
/// If the binding is an unsized binding array, this overrides the size.
|
||||||
pub binding_array_size: Option<u32>,
|
pub binding_array_size: Option<u32>,
|
||||||
@ -179,6 +191,43 @@ impl crate::ImageDimension {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)]
|
||||||
|
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
|
||||||
|
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
|
||||||
|
pub struct SamplerIndexBufferKey {
|
||||||
|
pub group: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
||||||
|
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
|
||||||
|
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
|
||||||
|
#[cfg_attr(feature = "deserialize", serde(default))]
|
||||||
|
pub struct SamplerHeapBindTargets {
|
||||||
|
pub standard_samplers: BindTarget,
|
||||||
|
pub comparison_samplers: BindTarget,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SamplerHeapBindTargets {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
standard_samplers: BindTarget {
|
||||||
|
space: 0,
|
||||||
|
register: 0,
|
||||||
|
binding_array_size: None,
|
||||||
|
},
|
||||||
|
comparison_samplers: BindTarget {
|
||||||
|
space: 1,
|
||||||
|
register: 0,
|
||||||
|
binding_array_size: None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We use a BTreeMap here so that we can hash it.
|
||||||
|
pub type SamplerIndexBufferBindingMap =
|
||||||
|
std::collections::BTreeMap<SamplerIndexBufferKey, BindTarget>;
|
||||||
|
|
||||||
/// Shorthand result used internally by the backend
|
/// Shorthand result used internally by the backend
|
||||||
type BackendResult = Result<(), Error>;
|
type BackendResult = Result<(), Error>;
|
||||||
|
|
||||||
@ -207,6 +256,10 @@ pub struct Options {
|
|||||||
pub special_constants_binding: Option<BindTarget>,
|
pub special_constants_binding: Option<BindTarget>,
|
||||||
/// Bind target of the push constant buffer
|
/// Bind target of the push constant buffer
|
||||||
pub push_constants_target: Option<BindTarget>,
|
pub push_constants_target: Option<BindTarget>,
|
||||||
|
/// Bind target of the sampler heap and comparison sampler heap.
|
||||||
|
pub sampler_heap_target: SamplerHeapBindTargets,
|
||||||
|
/// Mapping of each bind group's sampler index buffer to a bind target.
|
||||||
|
pub sampler_buffer_binding_map: SamplerIndexBufferBindingMap,
|
||||||
/// Should workgroup variables be zero initialized (by polyfilling)?
|
/// Should workgroup variables be zero initialized (by polyfilling)?
|
||||||
pub zero_initialize_workgroup_memory: bool,
|
pub zero_initialize_workgroup_memory: bool,
|
||||||
/// Should we restrict indexing of vectors, matrices and arrays?
|
/// Should we restrict indexing of vectors, matrices and arrays?
|
||||||
@ -220,6 +273,8 @@ impl Default for Options {
|
|||||||
binding_map: BindingMap::default(),
|
binding_map: BindingMap::default(),
|
||||||
fake_missing_bindings: true,
|
fake_missing_bindings: true,
|
||||||
special_constants_binding: None,
|
special_constants_binding: None,
|
||||||
|
sampler_heap_target: SamplerHeapBindTargets::default(),
|
||||||
|
sampler_buffer_binding_map: std::collections::BTreeMap::default(),
|
||||||
push_constants_target: None,
|
push_constants_target: None,
|
||||||
zero_initialize_workgroup_memory: true,
|
zero_initialize_workgroup_memory: true,
|
||||||
restrict_indexing: true,
|
restrict_indexing: true,
|
||||||
@ -233,13 +288,13 @@ impl Options {
|
|||||||
res_binding: &crate::ResourceBinding,
|
res_binding: &crate::ResourceBinding,
|
||||||
) -> Result<BindTarget, EntryPointError> {
|
) -> Result<BindTarget, EntryPointError> {
|
||||||
match self.binding_map.get(res_binding) {
|
match self.binding_map.get(res_binding) {
|
||||||
Some(target) => Ok(target.clone()),
|
Some(target) => Ok(*target),
|
||||||
None if self.fake_missing_bindings => Ok(BindTarget {
|
None if self.fake_missing_bindings => Ok(BindTarget {
|
||||||
space: res_binding.group as u8,
|
space: res_binding.group as u8,
|
||||||
register: res_binding.binding,
|
register: res_binding.binding,
|
||||||
binding_array_size: None,
|
binding_array_size: None,
|
||||||
}),
|
}),
|
||||||
None => Err(EntryPointError::MissingBinding(res_binding.clone())),
|
None => Err(EntryPointError::MissingBinding(*res_binding)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -279,6 +334,10 @@ struct Wrapped {
|
|||||||
struct_matrix_access: crate::FastHashSet<help::WrappedStructMatrixAccess>,
|
struct_matrix_access: crate::FastHashSet<help::WrappedStructMatrixAccess>,
|
||||||
mat_cx2s: crate::FastHashSet<help::WrappedMatCx2>,
|
mat_cx2s: crate::FastHashSet<help::WrappedMatCx2>,
|
||||||
math: crate::FastHashSet<help::WrappedMath>,
|
math: crate::FastHashSet<help::WrappedMath>,
|
||||||
|
/// If true, the sampler heaps have been written out.
|
||||||
|
sampler_heaps: bool,
|
||||||
|
// Mapping from SamplerIndexBufferKey to the name the namer returned.
|
||||||
|
sampler_index_buffers: crate::FastHashMap<SamplerIndexBufferKey, String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Wrapped {
|
impl Wrapped {
|
||||||
|
|||||||
@ -24,6 +24,8 @@ pub(crate) const MODF_FUNCTION: &str = "naga_modf";
|
|||||||
pub(crate) const FREXP_FUNCTION: &str = "naga_frexp";
|
pub(crate) const FREXP_FUNCTION: &str = "naga_frexp";
|
||||||
pub(crate) const EXTRACT_BITS_FUNCTION: &str = "naga_extractBits";
|
pub(crate) const EXTRACT_BITS_FUNCTION: &str = "naga_extractBits";
|
||||||
pub(crate) const INSERT_BITS_FUNCTION: &str = "naga_insertBits";
|
pub(crate) const INSERT_BITS_FUNCTION: &str = "naga_insertBits";
|
||||||
|
pub(crate) const SAMPLER_HEAP_VAR: &str = "nagaSamplerHeap";
|
||||||
|
pub(crate) const COMPARISON_SAMPLER_HEAP_VAR: &str = "nagaComparisonSamplerHeap";
|
||||||
|
|
||||||
struct EpStructMember {
|
struct EpStructMember {
|
||||||
name: String,
|
name: String,
|
||||||
@ -94,6 +96,16 @@ const fn is_subgroup_builtin_binding(binding: &Option<crate::Binding>) -> bool {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Information for how to generate a `binding_array<sampler>` access.
|
||||||
|
struct BindingArraySamplerInfo {
|
||||||
|
/// Variable name of the sampler heap
|
||||||
|
sampler_heap_name: &'static str,
|
||||||
|
/// Variable name of the sampler index buffer
|
||||||
|
sampler_index_buffer_name: String,
|
||||||
|
/// Variable name of the base index _into_ the sampler index buffer
|
||||||
|
binding_array_base_index_name: String,
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
||||||
pub fn new(out: W, options: &'a Options) -> Self {
|
pub fn new(out: W, options: &'a Options) -> Self {
|
||||||
Self {
|
Self {
|
||||||
@ -143,11 +155,11 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
|||||||
) {
|
) {
|
||||||
use crate::Expression;
|
use crate::Expression;
|
||||||
self.need_bake_expressions.clear();
|
self.need_bake_expressions.clear();
|
||||||
for (fun_handle, expr) in func.expressions.iter() {
|
for (exp_handle, expr) in func.expressions.iter() {
|
||||||
let expr_info = &info[fun_handle];
|
let expr_info = &info[exp_handle];
|
||||||
let min_ref_count = func.expressions[fun_handle].bake_ref_count();
|
let min_ref_count = func.expressions[exp_handle].bake_ref_count();
|
||||||
if min_ref_count <= expr_info.ref_count {
|
if min_ref_count <= expr_info.ref_count {
|
||||||
self.need_bake_expressions.insert(fun_handle);
|
self.need_bake_expressions.insert(exp_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Expression::Math { fun, arg, .. } = *expr {
|
if let Expression::Math { fun, arg, .. } = *expr {
|
||||||
@ -172,7 +184,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
|||||||
self.need_bake_expressions.insert(arg);
|
self.need_bake_expressions.insert(arg);
|
||||||
}
|
}
|
||||||
crate::MathFunction::CountLeadingZeros => {
|
crate::MathFunction::CountLeadingZeros => {
|
||||||
let inner = info[fun_handle].ty.inner_with(&module.types);
|
let inner = info[exp_handle].ty.inner_with(&module.types);
|
||||||
if let Some(ScalarKind::Sint) = inner.scalar_kind() {
|
if let Some(ScalarKind::Sint) = inner.scalar_kind() {
|
||||||
self.need_bake_expressions.insert(arg);
|
self.need_bake_expressions.insert(arg);
|
||||||
}
|
}
|
||||||
@ -187,6 +199,14 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
|||||||
self.need_bake_expressions.insert(expr);
|
self.need_bake_expressions.insert(expr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Expression::GlobalVariable(_) = *expr {
|
||||||
|
let inner = info[exp_handle].ty.inner_with(&module.types);
|
||||||
|
|
||||||
|
if let TypeInner::Sampler { .. } = *inner {
|
||||||
|
self.need_bake_expressions.insert(exp_handle);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for statement in func.body.iter() {
|
for statement in func.body.iter() {
|
||||||
match *statement {
|
match *statement {
|
||||||
@ -814,6 +834,18 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let handle_ty = match *inner {
|
||||||
|
TypeInner::BindingArray { ref base, .. } => &module.types[*base].inner,
|
||||||
|
_ => inner,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Samplers are handled entirely differently, so defer entirely to that method.
|
||||||
|
let is_sampler = matches!(*handle_ty, TypeInner::Sampler { .. });
|
||||||
|
|
||||||
|
if is_sampler {
|
||||||
|
return self.write_global_sampler(module, handle, global);
|
||||||
|
}
|
||||||
|
|
||||||
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-variable-register
|
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-variable-register
|
||||||
let register_ty = match global.space {
|
let register_ty = match global.space {
|
||||||
crate::AddressSpace::Function => unreachable!("Function address space"),
|
crate::AddressSpace::Function => unreachable!("Function address space"),
|
||||||
@ -843,13 +875,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
|||||||
register
|
register
|
||||||
}
|
}
|
||||||
crate::AddressSpace::Handle => {
|
crate::AddressSpace::Handle => {
|
||||||
let handle_ty = match *inner {
|
|
||||||
TypeInner::BindingArray { ref base, .. } => &module.types[*base].inner,
|
|
||||||
_ => inner,
|
|
||||||
};
|
|
||||||
|
|
||||||
let register = match *handle_ty {
|
let register = match *handle_ty {
|
||||||
TypeInner::Sampler { .. } => "s",
|
|
||||||
// all storage textures are UAV, unconditionally
|
// all storage textures are UAV, unconditionally
|
||||||
TypeInner::Image {
|
TypeInner::Image {
|
||||||
class: crate::ImageClass::Storage { .. },
|
class: crate::ImageClass::Storage { .. },
|
||||||
@ -956,6 +982,66 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn write_global_sampler(
|
||||||
|
&mut self,
|
||||||
|
module: &Module,
|
||||||
|
handle: Handle<crate::GlobalVariable>,
|
||||||
|
global: &crate::GlobalVariable,
|
||||||
|
) -> BackendResult {
|
||||||
|
let binding = *global.binding.as_ref().unwrap();
|
||||||
|
|
||||||
|
let key = super::SamplerIndexBufferKey {
|
||||||
|
group: binding.group,
|
||||||
|
};
|
||||||
|
self.write_wrapped_sampler_buffer(key)?;
|
||||||
|
|
||||||
|
// This was already validated, so we can confidently unwrap it.
|
||||||
|
let bt = self.options.resolve_resource_binding(&binding).unwrap();
|
||||||
|
|
||||||
|
match module.types[global.ty].inner {
|
||||||
|
TypeInner::Sampler { comparison } => {
|
||||||
|
// If we are generating a static access, we create a variable for the sampler.
|
||||||
|
//
|
||||||
|
// This prevents the DXIL from containing multiple lookups for the sampler, which
|
||||||
|
// the backend compiler will then have to eliminate. AMD does seem to be able to
|
||||||
|
// eliminate these, but better safe than sorry.
|
||||||
|
|
||||||
|
write!(self.out, "static const ")?;
|
||||||
|
self.write_type(module, global.ty)?;
|
||||||
|
|
||||||
|
let heap_var = if comparison {
|
||||||
|
COMPARISON_SAMPLER_HEAP_VAR
|
||||||
|
} else {
|
||||||
|
SAMPLER_HEAP_VAR
|
||||||
|
};
|
||||||
|
|
||||||
|
let index_buffer_name = &self.wrapped.sampler_index_buffers[&key];
|
||||||
|
let name = &self.names[&NameKey::GlobalVariable(handle)];
|
||||||
|
writeln!(
|
||||||
|
self.out,
|
||||||
|
" {name} = {heap_var}[{index_buffer_name}[{register}]];",
|
||||||
|
register = bt.register
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
TypeInner::BindingArray { .. } => {
|
||||||
|
// If we are generating a binding array, we cannot directly access the sampler as the index
|
||||||
|
// into the sampler index buffer is unknown at compile time. Instead we generate a constant
|
||||||
|
// that represents the "base" index into the sampler index buffer. This constant is added
|
||||||
|
// to the user provided index to get the final index into the sampler index buffer.
|
||||||
|
|
||||||
|
let name = &self.names[&NameKey::GlobalVariable(handle)];
|
||||||
|
writeln!(
|
||||||
|
self.out,
|
||||||
|
"static const uint {name} = {register};",
|
||||||
|
register = bt.register
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Helper method used to write global constants
|
/// Helper method used to write global constants
|
||||||
///
|
///
|
||||||
/// # Notes
|
/// # Notes
|
||||||
@ -2670,7 +2756,16 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
self.write_expr(module, base, func_ctx)?;
|
self.write_expr(module, base, func_ctx)?;
|
||||||
write!(self.out, "[")?;
|
|
||||||
|
let array_sampler_info = self.sampler_binding_array_info_from_expression(
|
||||||
|
module, func_ctx, base, resolved,
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Some(ref info) = array_sampler_info {
|
||||||
|
write!(self.out, "{}[", info.sampler_heap_name)?;
|
||||||
|
} else {
|
||||||
|
write!(self.out, "[")?;
|
||||||
|
}
|
||||||
|
|
||||||
let needs_bound_check = self.options.restrict_indexing
|
let needs_bound_check = self.options.restrict_indexing
|
||||||
&& !indexing_binding_array
|
&& !indexing_binding_array
|
||||||
@ -2715,7 +2810,17 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
|||||||
if non_uniform_qualifier {
|
if non_uniform_qualifier {
|
||||||
write!(self.out, "NonUniformResourceIndex(")?;
|
write!(self.out, "NonUniformResourceIndex(")?;
|
||||||
}
|
}
|
||||||
|
if let Some(ref info) = array_sampler_info {
|
||||||
|
write!(
|
||||||
|
self.out,
|
||||||
|
"{}[{} + ",
|
||||||
|
info.sampler_index_buffer_name, info.binding_array_base_index_name,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
self.write_expr(module, index, func_ctx)?;
|
self.write_expr(module, index, func_ctx)?;
|
||||||
|
if array_sampler_info.is_some() {
|
||||||
|
write!(self.out, "]")?;
|
||||||
|
}
|
||||||
if non_uniform_qualifier {
|
if non_uniform_qualifier {
|
||||||
write!(self.out, ")")?;
|
write!(self.out, ")")?;
|
||||||
}
|
}
|
||||||
@ -2730,43 +2835,6 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
|||||||
{
|
{
|
||||||
// do nothing, the chain is written on `Load`/`Store`
|
// do nothing, the chain is written on `Load`/`Store`
|
||||||
} else {
|
} else {
|
||||||
fn write_access<W: fmt::Write>(
|
|
||||||
writer: &mut super::Writer<'_, W>,
|
|
||||||
resolved: &TypeInner,
|
|
||||||
base_ty_handle: Option<Handle<crate::Type>>,
|
|
||||||
index: u32,
|
|
||||||
) -> BackendResult {
|
|
||||||
match *resolved {
|
|
||||||
// We specifically lift the ValuePointer to this case. While `[0]` is valid
|
|
||||||
// HLSL for any vector behind a value pointer, FXC completely miscompiles
|
|
||||||
// it and generates completely nonsensical DXBC.
|
|
||||||
//
|
|
||||||
// See https://github.com/gfx-rs/naga/issues/2095 for more details.
|
|
||||||
TypeInner::Vector { .. } | TypeInner::ValuePointer { .. } => {
|
|
||||||
// Write vector access as a swizzle
|
|
||||||
write!(writer.out, ".{}", back::COMPONENTS[index as usize])?
|
|
||||||
}
|
|
||||||
TypeInner::Matrix { .. }
|
|
||||||
| TypeInner::Array { .. }
|
|
||||||
| TypeInner::BindingArray { .. } => write!(writer.out, "[{index}]")?,
|
|
||||||
TypeInner::Struct { .. } => {
|
|
||||||
// This will never panic in case the type is a `Struct`, this is not true
|
|
||||||
// for other types so we can only check while inside this match arm
|
|
||||||
let ty = base_ty_handle.unwrap();
|
|
||||||
|
|
||||||
write!(
|
|
||||||
writer.out,
|
|
||||||
".{}",
|
|
||||||
&writer.names[&NameKey::StructMember(ty, index)]
|
|
||||||
)?
|
|
||||||
}
|
|
||||||
ref other => {
|
|
||||||
return Err(Error::Custom(format!("Cannot index {other:?}")))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
// We write the matrix column access in a special way since
|
// We write the matrix column access in a special way since
|
||||||
// the type of `base` is our special __matCx2 struct.
|
// the type of `base` is our special __matCx2 struct.
|
||||||
if let Some(MatrixType {
|
if let Some(MatrixType {
|
||||||
@ -2816,8 +2884,60 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let array_sampler_info = self.sampler_binding_array_info_from_expression(
|
||||||
|
module, func_ctx, base, resolved,
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Some(ref info) = array_sampler_info {
|
||||||
|
write!(
|
||||||
|
self.out,
|
||||||
|
"{}[{}",
|
||||||
|
info.sampler_heap_name, info.sampler_index_buffer_name
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
self.write_expr(module, base, func_ctx)?;
|
self.write_expr(module, base, func_ctx)?;
|
||||||
write_access(self, resolved, base_ty_handle, index)?;
|
|
||||||
|
match *resolved {
|
||||||
|
// We specifically lift the ValuePointer to this case. While `[0]` is valid
|
||||||
|
// HLSL for any vector behind a value pointer, FXC completely miscompiles
|
||||||
|
// it and generates completely nonsensical DXBC.
|
||||||
|
//
|
||||||
|
// See https://github.com/gfx-rs/naga/issues/2095 for more details.
|
||||||
|
TypeInner::Vector { .. } | TypeInner::ValuePointer { .. } => {
|
||||||
|
// Write vector access as a swizzle
|
||||||
|
write!(self.out, ".{}", back::COMPONENTS[index as usize])?
|
||||||
|
}
|
||||||
|
TypeInner::Matrix { .. }
|
||||||
|
| TypeInner::Array { .. }
|
||||||
|
| TypeInner::BindingArray { .. } => {
|
||||||
|
if let Some(ref info) = array_sampler_info {
|
||||||
|
write!(
|
||||||
|
self.out,
|
||||||
|
"[{} + {index}]",
|
||||||
|
info.binding_array_base_index_name
|
||||||
|
)?;
|
||||||
|
} else {
|
||||||
|
write!(self.out, "[{index}]")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TypeInner::Struct { .. } => {
|
||||||
|
// This will never panic in case the type is a `Struct`, this is not true
|
||||||
|
// for other types so we can only check while inside this match arm
|
||||||
|
let ty = base_ty_handle.unwrap();
|
||||||
|
|
||||||
|
write!(
|
||||||
|
self.out,
|
||||||
|
".{}",
|
||||||
|
&self.names[&NameKey::StructMember(ty, index)]
|
||||||
|
)?
|
||||||
|
}
|
||||||
|
ref other => return Err(Error::Custom(format!("Cannot index {other:?}"))),
|
||||||
|
}
|
||||||
|
|
||||||
|
if array_sampler_info.is_some() {
|
||||||
|
write!(self.out, "]")?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Expression::FunctionArgument(pos) => {
|
Expression::FunctionArgument(pos) => {
|
||||||
@ -2958,13 +3078,30 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
|||||||
write!(self.out, ".x")?;
|
write!(self.out, ".x")?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Expression::GlobalVariable(handle) => match module.global_variables[handle].space {
|
Expression::GlobalVariable(handle) => {
|
||||||
crate::AddressSpace::Storage { .. } => {}
|
let global_variable = &module.global_variables[handle];
|
||||||
_ => {
|
let ty = &module.types[global_variable.ty].inner;
|
||||||
|
|
||||||
|
// In the case of binding arrays of samplers, we need to not write anything
|
||||||
|
// as the we are in the wrong position to fully write the expression.
|
||||||
|
//
|
||||||
|
// The entire writing is done by AccessIndex.
|
||||||
|
let is_binding_array_of_samplers = match *ty {
|
||||||
|
TypeInner::BindingArray { base, .. } => {
|
||||||
|
let base_ty = &module.types[base].inner;
|
||||||
|
matches!(*base_ty, TypeInner::Sampler { .. })
|
||||||
|
}
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
|
||||||
|
let is_storage_space =
|
||||||
|
matches!(global_variable.space, crate::AddressSpace::Storage { .. });
|
||||||
|
|
||||||
|
if !is_binding_array_of_samplers && !is_storage_space {
|
||||||
let name = &self.names[&NameKey::GlobalVariable(handle)];
|
let name = &self.names[&NameKey::GlobalVariable(handle)];
|
||||||
write!(self.out, "{name}")?;
|
write!(self.out, "{name}")?;
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
Expression::LocalVariable(handle) => {
|
Expression::LocalVariable(handle) => {
|
||||||
write!(self.out, "{}", self.names[&func_ctx.name_key(handle)])?
|
write!(self.out, "{}", self.names[&func_ctx.name_key(handle)])?
|
||||||
}
|
}
|
||||||
@ -3680,6 +3817,52 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Find the [`BindingArraySamplerInfo`] from an expression so that such an access
|
||||||
|
/// can be generated later.
|
||||||
|
fn sampler_binding_array_info_from_expression(
|
||||||
|
&mut self,
|
||||||
|
module: &Module,
|
||||||
|
func_ctx: &back::FunctionCtx<'_>,
|
||||||
|
base: Handle<crate::Expression>,
|
||||||
|
resolved: &TypeInner,
|
||||||
|
) -> Option<BindingArraySamplerInfo> {
|
||||||
|
if let TypeInner::BindingArray {
|
||||||
|
base: base_ty_handle,
|
||||||
|
..
|
||||||
|
} = *resolved
|
||||||
|
{
|
||||||
|
let base_ty = &module.types[base_ty_handle].inner;
|
||||||
|
if let TypeInner::Sampler { comparison, .. } = *base_ty {
|
||||||
|
let base = &func_ctx.expressions[base];
|
||||||
|
|
||||||
|
if let crate::Expression::GlobalVariable(handle) = *base {
|
||||||
|
let variable = &module.global_variables[handle];
|
||||||
|
|
||||||
|
let sampler_heap_name = match comparison {
|
||||||
|
true => COMPARISON_SAMPLER_HEAP_VAR,
|
||||||
|
false => SAMPLER_HEAP_VAR,
|
||||||
|
};
|
||||||
|
|
||||||
|
return Some(BindingArraySamplerInfo {
|
||||||
|
sampler_heap_name,
|
||||||
|
sampler_index_buffer_name: self
|
||||||
|
.wrapped
|
||||||
|
.sampler_index_buffers
|
||||||
|
.get(&super::SamplerIndexBufferKey {
|
||||||
|
group: variable.binding.unwrap().group,
|
||||||
|
})
|
||||||
|
.unwrap()
|
||||||
|
.clone(),
|
||||||
|
binding_array_base_index_name: self.names[&NameKey::GlobalVariable(handle)]
|
||||||
|
.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
fn write_named_expr(
|
fn write_named_expr(
|
||||||
&mut self,
|
&mut self,
|
||||||
module: &Module,
|
module: &Module,
|
||||||
|
|||||||
@ -498,7 +498,7 @@ impl Options {
|
|||||||
index: 0,
|
index: 0,
|
||||||
interpolation: None,
|
interpolation: None,
|
||||||
}),
|
}),
|
||||||
None => Err(EntryPointError::MissingBindTarget(res_binding.clone())),
|
None => Err(EntryPointError::MissingBindTarget(*res_binding)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -5323,8 +5323,7 @@ template <typename A>
|
|||||||
None => false,
|
None => false,
|
||||||
};
|
};
|
||||||
if !good {
|
if !good {
|
||||||
ep_error =
|
ep_error = Some(super::EntryPointError::MissingBindTarget(*br));
|
||||||
Some(super::EntryPointError::MissingBindTarget(br.clone()));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -966,7 +966,7 @@ pub enum Binding {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Pipeline binding information for global resources.
|
/// Pipeline binding information for global resources.
|
||||||
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||||
#[cfg_attr(feature = "serialize", derive(Serialize))]
|
#[cfg_attr(feature = "serialize", derive(Serialize))]
|
||||||
#[cfg_attr(feature = "deserialize", derive(Deserialize))]
|
#[cfg_attr(feature = "deserialize", derive(Deserialize))]
|
||||||
#[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
|
#[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
|
||||||
|
|||||||
@ -778,7 +778,7 @@ impl super::Validator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Some(ref bind) = var.binding {
|
if let Some(ref bind) = var.binding {
|
||||||
if !self.ep_resource_bindings.insert(bind.clone()) {
|
if !self.ep_resource_bindings.insert(*bind) {
|
||||||
if self.flags.contains(super::ValidationFlags::BINDINGS) {
|
if self.flags.contains(super::ValidationFlags::BINDINGS) {
|
||||||
return Err(EntryPointError::BindingCollision(var_handle)
|
return Err(EntryPointError::BindingCollision(var_handle)
|
||||||
.with_span_handle(var_handle, &module.global_variables));
|
.with_span_handle(var_handle, &module.global_variables));
|
||||||
|
|||||||
@ -57,6 +57,9 @@
|
|||||||
(group: 0, binding: 2): (space: 1, register: 0),
|
(group: 0, binding: 2): (space: 1, register: 0),
|
||||||
},
|
},
|
||||||
fake_missing_bindings: false,
|
fake_missing_bindings: false,
|
||||||
|
sampler_buffer_binding_map: {
|
||||||
|
(group: 0): (space: 2, register: 0),
|
||||||
|
},
|
||||||
special_constants_binding: Some((space: 0, register: 1)),
|
special_constants_binding: Some((space: 0, register: 1)),
|
||||||
zero_initialize_workgroup_memory: true,
|
zero_initialize_workgroup_memory: true,
|
||||||
restrict_indexing: true
|
restrict_indexing: true
|
||||||
|
|||||||
@ -12,8 +12,11 @@ Texture2DArray<float4> texture_array_2darray[5] : register(t0, space2);
|
|||||||
Texture2DMS<float4> texture_array_multisampled[5] : register(t0, space3);
|
Texture2DMS<float4> texture_array_multisampled[5] : register(t0, space3);
|
||||||
Texture2D<float> texture_array_depth[5] : register(t0, space4);
|
Texture2D<float> texture_array_depth[5] : register(t0, space4);
|
||||||
RWTexture2D<float4> texture_array_storage[5] : register(u0, space5);
|
RWTexture2D<float4> texture_array_storage[5] : register(u0, space5);
|
||||||
SamplerState samp[5] : register(s0, space6);
|
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||||
SamplerComparisonState samp_comp[5] : register(s0, space7);
|
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
|
||||||
|
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space255);
|
||||||
|
static const uint samp = 0;
|
||||||
|
static const uint samp_comp = 0;
|
||||||
cbuffer uni : register(b0, space8) { UniformIndex uni; }
|
cbuffer uni : register(b0, space8) { UniformIndex uni; }
|
||||||
|
|
||||||
struct FragmentInput_main {
|
struct FragmentInput_main {
|
||||||
@ -66,22 +69,22 @@ float4 main(FragmentInput_main fragmentinput_main) : SV_Target0
|
|||||||
u2_ = (_e27 + NagaDimensions2D(texture_array_unbounded[uniform_index]));
|
u2_ = (_e27 + NagaDimensions2D(texture_array_unbounded[uniform_index]));
|
||||||
uint2 _e32 = u2_;
|
uint2 _e32 = u2_;
|
||||||
u2_ = (_e32 + NagaDimensions2D(texture_array_unbounded[NonUniformResourceIndex(non_uniform_index)]));
|
u2_ = (_e32 + NagaDimensions2D(texture_array_unbounded[NonUniformResourceIndex(non_uniform_index)]));
|
||||||
float4 _e38 = texture_array_bounded[0].Gather(samp[0], uv);
|
float4 _e38 = texture_array_bounded[0].Gather(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv);
|
||||||
float4 _e39 = v4_;
|
float4 _e39 = v4_;
|
||||||
v4_ = (_e39 + _e38);
|
v4_ = (_e39 + _e38);
|
||||||
float4 _e45 = texture_array_bounded[uniform_index].Gather(samp[uniform_index], uv);
|
float4 _e45 = texture_array_bounded[uniform_index].Gather(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv);
|
||||||
float4 _e46 = v4_;
|
float4 _e46 = v4_;
|
||||||
v4_ = (_e46 + _e45);
|
v4_ = (_e46 + _e45);
|
||||||
float4 _e52 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Gather(samp[NonUniformResourceIndex(non_uniform_index)], uv);
|
float4 _e52 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Gather(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv);
|
||||||
float4 _e53 = v4_;
|
float4 _e53 = v4_;
|
||||||
v4_ = (_e53 + _e52);
|
v4_ = (_e53 + _e52);
|
||||||
float4 _e60 = texture_array_depth[0].GatherCmp(samp_comp[0], uv, 0.0);
|
float4 _e60 = texture_array_depth[0].GatherCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + 0]], uv, 0.0);
|
||||||
float4 _e61 = v4_;
|
float4 _e61 = v4_;
|
||||||
v4_ = (_e61 + _e60);
|
v4_ = (_e61 + _e60);
|
||||||
float4 _e68 = texture_array_depth[uniform_index].GatherCmp(samp_comp[uniform_index], uv, 0.0);
|
float4 _e68 = texture_array_depth[uniform_index].GatherCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + uniform_index]], uv, 0.0);
|
||||||
float4 _e69 = v4_;
|
float4 _e69 = v4_;
|
||||||
v4_ = (_e69 + _e68);
|
v4_ = (_e69 + _e68);
|
||||||
float4 _e76 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].GatherCmp(samp_comp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0);
|
float4 _e76 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].GatherCmp(nagaComparisonSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp_comp + non_uniform_index])], uv, 0.0);
|
||||||
float4 _e77 = v4_;
|
float4 _e77 = v4_;
|
||||||
v4_ = (_e77 + _e76);
|
v4_ = (_e77 + _e76);
|
||||||
float4 _e82 = texture_array_unbounded[0].Load(int3(pix, 0));
|
float4 _e82 = texture_array_unbounded[0].Load(int3(pix, 0));
|
||||||
@ -111,58 +114,58 @@ float4 main(FragmentInput_main fragmentinput_main) : SV_Target0
|
|||||||
u1_ = (_e135 + NagaMSNumSamples2D(texture_array_multisampled[uniform_index]));
|
u1_ = (_e135 + NagaMSNumSamples2D(texture_array_multisampled[uniform_index]));
|
||||||
uint _e140 = u1_;
|
uint _e140 = u1_;
|
||||||
u1_ = (_e140 + NagaMSNumSamples2D(texture_array_multisampled[NonUniformResourceIndex(non_uniform_index)]));
|
u1_ = (_e140 + NagaMSNumSamples2D(texture_array_multisampled[NonUniformResourceIndex(non_uniform_index)]));
|
||||||
float4 _e146 = texture_array_bounded[0].Sample(samp[0], uv);
|
float4 _e146 = texture_array_bounded[0].Sample(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv);
|
||||||
float4 _e147 = v4_;
|
float4 _e147 = v4_;
|
||||||
v4_ = (_e147 + _e146);
|
v4_ = (_e147 + _e146);
|
||||||
float4 _e153 = texture_array_bounded[uniform_index].Sample(samp[uniform_index], uv);
|
float4 _e153 = texture_array_bounded[uniform_index].Sample(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv);
|
||||||
float4 _e154 = v4_;
|
float4 _e154 = v4_;
|
||||||
v4_ = (_e154 + _e153);
|
v4_ = (_e154 + _e153);
|
||||||
float4 _e160 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Sample(samp[NonUniformResourceIndex(non_uniform_index)], uv);
|
float4 _e160 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].Sample(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv);
|
||||||
float4 _e161 = v4_;
|
float4 _e161 = v4_;
|
||||||
v4_ = (_e161 + _e160);
|
v4_ = (_e161 + _e160);
|
||||||
float4 _e168 = texture_array_bounded[0].SampleBias(samp[0], uv, 0.0);
|
float4 _e168 = texture_array_bounded[0].SampleBias(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv, 0.0);
|
||||||
float4 _e169 = v4_;
|
float4 _e169 = v4_;
|
||||||
v4_ = (_e169 + _e168);
|
v4_ = (_e169 + _e168);
|
||||||
float4 _e176 = texture_array_bounded[uniform_index].SampleBias(samp[uniform_index], uv, 0.0);
|
float4 _e176 = texture_array_bounded[uniform_index].SampleBias(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv, 0.0);
|
||||||
float4 _e177 = v4_;
|
float4 _e177 = v4_;
|
||||||
v4_ = (_e177 + _e176);
|
v4_ = (_e177 + _e176);
|
||||||
float4 _e184 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleBias(samp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0);
|
float4 _e184 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleBias(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv, 0.0);
|
||||||
float4 _e185 = v4_;
|
float4 _e185 = v4_;
|
||||||
v4_ = (_e185 + _e184);
|
v4_ = (_e185 + _e184);
|
||||||
float _e192 = texture_array_depth[0].SampleCmp(samp_comp[0], uv, 0.0);
|
float _e192 = texture_array_depth[0].SampleCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + 0]], uv, 0.0);
|
||||||
float _e193 = v1_;
|
float _e193 = v1_;
|
||||||
v1_ = (_e193 + _e192);
|
v1_ = (_e193 + _e192);
|
||||||
float _e200 = texture_array_depth[uniform_index].SampleCmp(samp_comp[uniform_index], uv, 0.0);
|
float _e200 = texture_array_depth[uniform_index].SampleCmp(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + uniform_index]], uv, 0.0);
|
||||||
float _e201 = v1_;
|
float _e201 = v1_;
|
||||||
v1_ = (_e201 + _e200);
|
v1_ = (_e201 + _e200);
|
||||||
float _e208 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmp(samp_comp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0);
|
float _e208 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmp(nagaComparisonSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp_comp + non_uniform_index])], uv, 0.0);
|
||||||
float _e209 = v1_;
|
float _e209 = v1_;
|
||||||
v1_ = (_e209 + _e208);
|
v1_ = (_e209 + _e208);
|
||||||
float _e216 = texture_array_depth[0].SampleCmpLevelZero(samp_comp[0], uv, 0.0);
|
float _e216 = texture_array_depth[0].SampleCmpLevelZero(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + 0]], uv, 0.0);
|
||||||
float _e217 = v1_;
|
float _e217 = v1_;
|
||||||
v1_ = (_e217 + _e216);
|
v1_ = (_e217 + _e216);
|
||||||
float _e224 = texture_array_depth[uniform_index].SampleCmpLevelZero(samp_comp[uniform_index], uv, 0.0);
|
float _e224 = texture_array_depth[uniform_index].SampleCmpLevelZero(nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[samp_comp + uniform_index]], uv, 0.0);
|
||||||
float _e225 = v1_;
|
float _e225 = v1_;
|
||||||
v1_ = (_e225 + _e224);
|
v1_ = (_e225 + _e224);
|
||||||
float _e232 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmpLevelZero(samp_comp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0);
|
float _e232 = texture_array_depth[NonUniformResourceIndex(non_uniform_index)].SampleCmpLevelZero(nagaComparisonSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp_comp + non_uniform_index])], uv, 0.0);
|
||||||
float _e233 = v1_;
|
float _e233 = v1_;
|
||||||
v1_ = (_e233 + _e232);
|
v1_ = (_e233 + _e232);
|
||||||
float4 _e239 = texture_array_bounded[0].SampleGrad(samp[0], uv, uv, uv);
|
float4 _e239 = texture_array_bounded[0].SampleGrad(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv, uv, uv);
|
||||||
float4 _e240 = v4_;
|
float4 _e240 = v4_;
|
||||||
v4_ = (_e240 + _e239);
|
v4_ = (_e240 + _e239);
|
||||||
float4 _e246 = texture_array_bounded[uniform_index].SampleGrad(samp[uniform_index], uv, uv, uv);
|
float4 _e246 = texture_array_bounded[uniform_index].SampleGrad(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv, uv, uv);
|
||||||
float4 _e247 = v4_;
|
float4 _e247 = v4_;
|
||||||
v4_ = (_e247 + _e246);
|
v4_ = (_e247 + _e246);
|
||||||
float4 _e253 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleGrad(samp[NonUniformResourceIndex(non_uniform_index)], uv, uv, uv);
|
float4 _e253 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleGrad(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv, uv, uv);
|
||||||
float4 _e254 = v4_;
|
float4 _e254 = v4_;
|
||||||
v4_ = (_e254 + _e253);
|
v4_ = (_e254 + _e253);
|
||||||
float4 _e261 = texture_array_bounded[0].SampleLevel(samp[0], uv, 0.0);
|
float4 _e261 = texture_array_bounded[0].SampleLevel(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + 0]], uv, 0.0);
|
||||||
float4 _e262 = v4_;
|
float4 _e262 = v4_;
|
||||||
v4_ = (_e262 + _e261);
|
v4_ = (_e262 + _e261);
|
||||||
float4 _e269 = texture_array_bounded[uniform_index].SampleLevel(samp[uniform_index], uv, 0.0);
|
float4 _e269 = texture_array_bounded[uniform_index].SampleLevel(nagaSamplerHeap[nagaGroup0SamplerIndexArray[samp + uniform_index]], uv, 0.0);
|
||||||
float4 _e270 = v4_;
|
float4 _e270 = v4_;
|
||||||
v4_ = (_e270 + _e269);
|
v4_ = (_e270 + _e269);
|
||||||
float4 _e277 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleLevel(samp[NonUniformResourceIndex(non_uniform_index)], uv, 0.0);
|
float4 _e277 = texture_array_bounded[NonUniformResourceIndex(non_uniform_index)].SampleLevel(nagaSamplerHeap[NonUniformResourceIndex(nagaGroup0SamplerIndexArray[samp + non_uniform_index])], uv, 0.0);
|
||||||
float4 _e278 = v4_;
|
float4 _e278 = v4_;
|
||||||
v4_ = (_e278 + _e277);
|
v4_ = (_e278 + _e277);
|
||||||
float4 _e282 = v4_;
|
float4 _e282 = v4_;
|
||||||
|
|||||||
@ -15,8 +15,11 @@ TextureCube<float4> image_cube : register(t5);
|
|||||||
TextureCubeArray<float4> image_cube_array : register(t6);
|
TextureCubeArray<float4> image_cube_array : register(t6);
|
||||||
Texture3D<float4> image_3d : register(t7);
|
Texture3D<float4> image_3d : register(t7);
|
||||||
Texture2DMS<float4> image_aa : register(t8);
|
Texture2DMS<float4> image_aa : register(t8);
|
||||||
SamplerState sampler_reg : register(s0, space1);
|
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||||
SamplerComparisonState sampler_cmp : register(s1, space1);
|
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
|
||||||
|
StructuredBuffer<uint> nagaGroup1SamplerIndexArray : register(t1, space255);
|
||||||
|
static const SamplerState sampler_reg = nagaSamplerHeap[nagaGroup1SamplerIndexArray[0]];
|
||||||
|
static const SamplerComparisonState sampler_cmp = nagaComparisonSamplerHeap[nagaGroup1SamplerIndexArray[1]];
|
||||||
Texture2D<float> image_2d_depth : register(t2, space1);
|
Texture2D<float> image_2d_depth : register(t2, space1);
|
||||||
Texture2DArray<float> image_2d_array_depth : register(t3, space1);
|
Texture2DArray<float> image_2d_array_depth : register(t3, space1);
|
||||||
TextureCube<float> image_cube_depth : register(t4, space1);
|
TextureCube<float> image_cube_depth : register(t4, space1);
|
||||||
|
|||||||
@ -6,7 +6,10 @@ struct VertexOutput {
|
|||||||
static const float c_scale = 1.2;
|
static const float c_scale = 1.2;
|
||||||
|
|
||||||
Texture2D<float4> u_texture : register(t0);
|
Texture2D<float4> u_texture : register(t0);
|
||||||
SamplerState u_sampler : register(s1);
|
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||||
|
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
|
||||||
|
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space255);
|
||||||
|
static const SamplerState u_sampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[1]];
|
||||||
|
|
||||||
struct VertexOutput_vert_main {
|
struct VertexOutput_vert_main {
|
||||||
float2 uv_2 : LOC0;
|
float2 uv_2 : LOC0;
|
||||||
|
|||||||
@ -28,7 +28,10 @@ cbuffer u_entity : register(b0, space1) { Entity u_entity; }
|
|||||||
ByteAddressBuffer s_lights : register(t1);
|
ByteAddressBuffer s_lights : register(t1);
|
||||||
cbuffer u_lights : register(b1) { Light u_lights[10]; }
|
cbuffer u_lights : register(b1) { Light u_lights[10]; }
|
||||||
Texture2DArray<float> t_shadow : register(t2);
|
Texture2DArray<float> t_shadow : register(t2);
|
||||||
SamplerComparisonState sampler_shadow : register(s3);
|
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||||
|
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
|
||||||
|
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space255);
|
||||||
|
static const SamplerComparisonState sampler_shadow = nagaComparisonSamplerHeap[nagaGroup0SamplerIndexArray[3]];
|
||||||
|
|
||||||
struct VertexOutput_vs_main {
|
struct VertexOutput_vs_main {
|
||||||
float3 world_normal : LOC0;
|
float3 world_normal : LOC0;
|
||||||
|
|||||||
@ -17,7 +17,10 @@ struct Data {
|
|||||||
|
|
||||||
cbuffer r_data : register(b0) { Data r_data; }
|
cbuffer r_data : register(b0) { Data r_data; }
|
||||||
TextureCube<float4> r_texture : register(t0);
|
TextureCube<float4> r_texture : register(t0);
|
||||||
SamplerState r_sampler : register(s0, space1);
|
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||||
|
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
|
||||||
|
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space2);
|
||||||
|
static const SamplerState r_sampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[0]];
|
||||||
|
|
||||||
struct VertexOutput_vs_main {
|
struct VertexOutput_vs_main {
|
||||||
float3 uv : LOC0;
|
float3 uv : LOC0;
|
||||||
|
|||||||
@ -1,5 +1,8 @@
|
|||||||
Texture2D<float4> Texture : register(t0);
|
Texture2D<float4> Texture : register(t0);
|
||||||
SamplerState Sampler : register(s1);
|
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||||
|
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s0, space1);
|
||||||
|
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space255);
|
||||||
|
static const SamplerState Sampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[1]];
|
||||||
|
|
||||||
float4 test(Texture2D<float4> Passed_Texture, SamplerState Passed_Sampler)
|
float4 test(Texture2D<float4> Passed_Texture, SamplerState Passed_Sampler)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -27,6 +27,7 @@ webgl = ["wgpu/webgl"]
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
arrayvec.workspace = true
|
arrayvec.workspace = true
|
||||||
|
approx.workspace = true
|
||||||
bitflags.workspace = true
|
bitflags.workspace = true
|
||||||
bytemuck.workspace = true
|
bytemuck.workspace = true
|
||||||
cfg-if.workspace = true
|
cfg-if.workspace = true
|
||||||
|
|||||||
@ -83,6 +83,16 @@ pub fn fail_if<T>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if the provided callback fails validation.
|
||||||
|
pub fn did_fail<T>(device: &wgpu::Device, callback: impl FnOnce() -> T) -> (bool, T) {
|
||||||
|
device.push_error_scope(wgpu::ErrorFilter::Validation);
|
||||||
|
let result = callback();
|
||||||
|
let validation_error = pollster::block_on(device.pop_error_scope());
|
||||||
|
let failed = validation_error.is_some();
|
||||||
|
|
||||||
|
(failed, result)
|
||||||
|
}
|
||||||
|
|
||||||
/// Adds the necissary main function for our gpu test harness.
|
/// Adds the necissary main function for our gpu test harness.
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! gpu_test_main {
|
macro_rules! gpu_test_main {
|
||||||
|
|||||||
@ -44,6 +44,7 @@ mod ray_tracing;
|
|||||||
mod render_pass_ownership;
|
mod render_pass_ownership;
|
||||||
mod resource_descriptor_accessor;
|
mod resource_descriptor_accessor;
|
||||||
mod resource_error;
|
mod resource_error;
|
||||||
|
mod samplers;
|
||||||
mod scissor_tests;
|
mod scissor_tests;
|
||||||
mod shader;
|
mod shader;
|
||||||
mod shader_primitive_index;
|
mod shader_primitive_index;
|
||||||
|
|||||||
543
tests/tests/samplers.rs
Normal file
543
tests/tests/samplers.rs
Normal file
@ -0,0 +1,543 @@
|
|||||||
|
//! D3D12 samplers are fun and we're doing a decent amount of polyfilling with them.
|
||||||
|
//!
|
||||||
|
//! Do some tests to ensure things are working correctly and nothing gets mad.
|
||||||
|
|
||||||
|
use wgpu_test::{did_fail, gpu_test, valid, GpuTestConfiguration, TestParameters, TestingContext};
|
||||||
|
|
||||||
|
// A number large enough to likely cause sampler caches to run out of space
|
||||||
|
// on some devices.
|
||||||
|
const PROBABLY_PROBLEMATIC_SAMPLER_COUNT: u32 = 8 * 1024;
|
||||||
|
|
||||||
|
#[gpu_test]
|
||||||
|
static SAMPLER_DEDUPLICATION: GpuTestConfiguration =
|
||||||
|
GpuTestConfiguration::new().run_sync(sampler_deduplication);
|
||||||
|
|
||||||
|
// Create a large number of samplers from the same two descriptors.
|
||||||
|
//
|
||||||
|
// Sampler deduplication in the backend should ensure this doesn't cause any issues.
|
||||||
|
fn sampler_deduplication(ctx: TestingContext) {
|
||||||
|
// Create 2 different sampler descriptors
|
||||||
|
let desc1 = wgpu::SamplerDescriptor {
|
||||||
|
label: Some("sampler1"),
|
||||||
|
address_mode_u: wgpu::AddressMode::ClampToEdge,
|
||||||
|
address_mode_v: wgpu::AddressMode::ClampToEdge,
|
||||||
|
address_mode_w: wgpu::AddressMode::ClampToEdge,
|
||||||
|
mag_filter: wgpu::FilterMode::Nearest,
|
||||||
|
min_filter: wgpu::FilterMode::Nearest,
|
||||||
|
mipmap_filter: wgpu::FilterMode::Nearest,
|
||||||
|
lod_min_clamp: 0.0,
|
||||||
|
lod_max_clamp: 100.0,
|
||||||
|
compare: None,
|
||||||
|
anisotropy_clamp: 1,
|
||||||
|
border_color: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let desc2 = wgpu::SamplerDescriptor {
|
||||||
|
label: Some("sampler2"),
|
||||||
|
address_mode_u: wgpu::AddressMode::ClampToEdge,
|
||||||
|
address_mode_v: wgpu::AddressMode::ClampToEdge,
|
||||||
|
address_mode_w: wgpu::AddressMode::ClampToEdge,
|
||||||
|
mag_filter: wgpu::FilterMode::Linear,
|
||||||
|
min_filter: wgpu::FilterMode::Linear,
|
||||||
|
mipmap_filter: wgpu::FilterMode::Linear,
|
||||||
|
lod_min_clamp: 0.0,
|
||||||
|
lod_max_clamp: 100.0,
|
||||||
|
compare: None,
|
||||||
|
anisotropy_clamp: 1,
|
||||||
|
border_color: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Now create a bunch of samplers with these descriptors
|
||||||
|
let samplers = (0..PROBABLY_PROBLEMATIC_SAMPLER_COUNT)
|
||||||
|
.map(|i| {
|
||||||
|
let desc = if i % 2 == 0 { &desc1 } else { &desc2 };
|
||||||
|
valid(&ctx.device, || ctx.device.create_sampler(desc))
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
drop(samplers);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[gpu_test]
|
||||||
|
static SAMPLER_CREATION_FAILURE: GpuTestConfiguration =
|
||||||
|
GpuTestConfiguration::new().run_sync(sampler_creation_failure);
|
||||||
|
|
||||||
|
/// We want to test that sampler creation properly fails when we hit internal sampler
|
||||||
|
/// cache limits. As we don't actually know what the limit is, we first create as many
|
||||||
|
/// samplers as we can until we get the first failure.
|
||||||
|
///
|
||||||
|
/// This failure being caught ensures that the error catching machinery on samplers
|
||||||
|
/// is working as expected.
|
||||||
|
///
|
||||||
|
/// We then clear all samplers and poll the device, which should leave the caches
|
||||||
|
/// completely empty.
|
||||||
|
///
|
||||||
|
/// We then try to create the same number of samplers to ensure the cache was entirely
|
||||||
|
/// cleared.
|
||||||
|
fn sampler_creation_failure(ctx: TestingContext) {
|
||||||
|
let desc = wgpu::SamplerDescriptor {
|
||||||
|
label: Some("sampler1"),
|
||||||
|
address_mode_u: wgpu::AddressMode::ClampToEdge,
|
||||||
|
address_mode_v: wgpu::AddressMode::ClampToEdge,
|
||||||
|
address_mode_w: wgpu::AddressMode::ClampToEdge,
|
||||||
|
mag_filter: wgpu::FilterMode::Nearest,
|
||||||
|
min_filter: wgpu::FilterMode::Nearest,
|
||||||
|
mipmap_filter: wgpu::FilterMode::Nearest,
|
||||||
|
lod_min_clamp: 0.0,
|
||||||
|
lod_max_clamp: 100.0,
|
||||||
|
compare: None,
|
||||||
|
anisotropy_clamp: 1,
|
||||||
|
border_color: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut sampler_storage = Vec::with_capacity(PROBABLY_PROBLEMATIC_SAMPLER_COUNT as usize);
|
||||||
|
|
||||||
|
for i in 0..PROBABLY_PROBLEMATIC_SAMPLER_COUNT {
|
||||||
|
let (failed, sampler) = did_fail(&ctx.device, || {
|
||||||
|
ctx.device.create_sampler(&wgpu::SamplerDescriptor {
|
||||||
|
lod_min_clamp: i as f32 * 0.01,
|
||||||
|
..desc
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
if failed {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
sampler_storage.push(sampler);
|
||||||
|
}
|
||||||
|
|
||||||
|
let failed_count = sampler_storage.len();
|
||||||
|
|
||||||
|
sampler_storage.clear();
|
||||||
|
ctx.device.poll(wgpu::Maintain::Wait);
|
||||||
|
|
||||||
|
for i in 0..failed_count {
|
||||||
|
valid(&ctx.device, || {
|
||||||
|
eprintln!("Trying to create sampler {}", i);
|
||||||
|
let sampler = ctx.device.create_sampler(&wgpu::SamplerDescriptor {
|
||||||
|
lod_min_clamp: i as f32 * 0.01,
|
||||||
|
// Change the max clamp to ensure the sampler is using different cache slots from
|
||||||
|
// the previous run.
|
||||||
|
lod_max_clamp: 200.0,
|
||||||
|
..desc
|
||||||
|
});
|
||||||
|
sampler_storage.push(sampler);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const SINGLE_GROUP_BINDINGS: &str = r#"
|
||||||
|
@group(0) @binding(0) var texture: texture_2d<f32>;
|
||||||
|
@group(0) @binding(1) var sampler0: sampler;
|
||||||
|
@group(0) @binding(2) var sampler1: sampler;
|
||||||
|
@group(0) @binding(3) var sampler2: sampler;
|
||||||
|
|
||||||
|
@group(1) @binding(0) var<storage, read_write> results: array<vec4f, 3>;
|
||||||
|
"#;
|
||||||
|
|
||||||
|
const MULTI_GROUP_BINDINGS: &str = r#"
|
||||||
|
@group(0) @binding(0) var texture: texture_2d<f32>;
|
||||||
|
@group(0) @binding(1) var sampler0: sampler;
|
||||||
|
@group(1) @binding(0) var sampler1: sampler;
|
||||||
|
@group(2) @binding(0) var sampler2: sampler;
|
||||||
|
|
||||||
|
@group(3) @binding(0) var<storage, read_write> results: array<vec4f, 3>;
|
||||||
|
"#;
|
||||||
|
|
||||||
|
const SAMPLER_CODE: &str = r#"
|
||||||
|
@compute @workgroup_size(1, 1, 1)
|
||||||
|
fn cs_main() {
|
||||||
|
// When sampling a 2x2 texture at the bottom left, we can change the address mode
|
||||||
|
// on S/T to get different values. This allows us to make sure the right sampler
|
||||||
|
// is being used.
|
||||||
|
results[0] = textureSampleLevel(texture, sampler0, vec2f(0.0, 1.0), 0.0);
|
||||||
|
results[1] = textureSampleLevel(texture, sampler1, vec2f(0.0, 1.0), 0.0);
|
||||||
|
results[2] = textureSampleLevel(texture, sampler2, vec2f(0.0, 1.0), 0.0);
|
||||||
|
}
|
||||||
|
"#;
|
||||||
|
|
||||||
|
enum GroupType {
|
||||||
|
Single,
|
||||||
|
Multi,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[gpu_test]
|
||||||
|
static SAMPLER_SINGLE_BIND_GROUP: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||||
|
.parameters(
|
||||||
|
TestParameters::default()
|
||||||
|
.test_features_limits()
|
||||||
|
// In OpenGL textures cannot be used with multiple samplers.
|
||||||
|
.skip(wgpu_test::FailureCase::backend(wgpu::Backends::GL)),
|
||||||
|
)
|
||||||
|
.run_sync(|ctx| sampler_bind_group(ctx, GroupType::Single));
|
||||||
|
|
||||||
|
#[gpu_test]
|
||||||
|
static SAMPLER_MULTI_BIND_GROUP: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||||
|
.parameters(
|
||||||
|
TestParameters::default()
|
||||||
|
.test_features_limits()
|
||||||
|
// In OpenGL textures cannot be used with multiple samplers.
|
||||||
|
.skip(wgpu_test::FailureCase::backend(wgpu::Backends::GL)),
|
||||||
|
)
|
||||||
|
.run_sync(|ctx| sampler_bind_group(ctx, GroupType::Multi));
|
||||||
|
|
||||||
|
fn sampler_bind_group(ctx: TestingContext, group_type: GroupType) {
|
||||||
|
let bindings = match group_type {
|
||||||
|
GroupType::Single => SINGLE_GROUP_BINDINGS,
|
||||||
|
GroupType::Multi => MULTI_GROUP_BINDINGS,
|
||||||
|
};
|
||||||
|
|
||||||
|
let full_shader = format!("{}\n{}", bindings, SAMPLER_CODE);
|
||||||
|
|
||||||
|
let module = ctx
|
||||||
|
.device
|
||||||
|
.create_shader_module(wgpu::ShaderModuleDescriptor {
|
||||||
|
source: wgpu::ShaderSource::Wgsl(full_shader.into()),
|
||||||
|
label: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut bind_group_layouts = Vec::new();
|
||||||
|
|
||||||
|
match group_type {
|
||||||
|
GroupType::Single => {
|
||||||
|
let bgl = ctx
|
||||||
|
.device
|
||||||
|
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||||
|
label: Some("combination_bgl"),
|
||||||
|
entries: &[
|
||||||
|
wgpu::BindGroupLayoutEntry {
|
||||||
|
binding: 0,
|
||||||
|
visibility: wgpu::ShaderStages::COMPUTE,
|
||||||
|
ty: wgpu::BindingType::Texture {
|
||||||
|
sample_type: wgpu::TextureSampleType::Float { filterable: true },
|
||||||
|
view_dimension: wgpu::TextureViewDimension::D2,
|
||||||
|
multisampled: false,
|
||||||
|
},
|
||||||
|
count: None,
|
||||||
|
},
|
||||||
|
wgpu::BindGroupLayoutEntry {
|
||||||
|
binding: 1,
|
||||||
|
visibility: wgpu::ShaderStages::COMPUTE,
|
||||||
|
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||||||
|
count: None,
|
||||||
|
},
|
||||||
|
wgpu::BindGroupLayoutEntry {
|
||||||
|
binding: 2,
|
||||||
|
visibility: wgpu::ShaderStages::COMPUTE,
|
||||||
|
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||||||
|
count: None,
|
||||||
|
},
|
||||||
|
wgpu::BindGroupLayoutEntry {
|
||||||
|
binding: 3,
|
||||||
|
visibility: wgpu::ShaderStages::COMPUTE,
|
||||||
|
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||||||
|
count: None,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
bind_group_layouts.push(bgl);
|
||||||
|
}
|
||||||
|
GroupType::Multi => {
|
||||||
|
let bgl0 = ctx
|
||||||
|
.device
|
||||||
|
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||||
|
label: Some("multiple_bgl0"),
|
||||||
|
entries: &[
|
||||||
|
wgpu::BindGroupLayoutEntry {
|
||||||
|
binding: 0,
|
||||||
|
visibility: wgpu::ShaderStages::COMPUTE,
|
||||||
|
ty: wgpu::BindingType::Texture {
|
||||||
|
sample_type: wgpu::TextureSampleType::Float { filterable: true },
|
||||||
|
view_dimension: wgpu::TextureViewDimension::D2,
|
||||||
|
multisampled: false,
|
||||||
|
},
|
||||||
|
count: None,
|
||||||
|
},
|
||||||
|
wgpu::BindGroupLayoutEntry {
|
||||||
|
binding: 1,
|
||||||
|
visibility: wgpu::ShaderStages::COMPUTE,
|
||||||
|
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||||||
|
count: None,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
let bgl1 = ctx
|
||||||
|
.device
|
||||||
|
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||||
|
label: Some("multiple_bgl1"),
|
||||||
|
entries: &[wgpu::BindGroupLayoutEntry {
|
||||||
|
binding: 0,
|
||||||
|
visibility: wgpu::ShaderStages::COMPUTE,
|
||||||
|
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||||||
|
count: None,
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
|
||||||
|
let bgl2 = ctx
|
||||||
|
.device
|
||||||
|
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||||
|
label: Some("multiple_bgl2"),
|
||||||
|
entries: &[wgpu::BindGroupLayoutEntry {
|
||||||
|
binding: 0,
|
||||||
|
visibility: wgpu::ShaderStages::COMPUTE,
|
||||||
|
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||||||
|
count: None,
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
|
||||||
|
bind_group_layouts.push(bgl0);
|
||||||
|
bind_group_layouts.push(bgl1);
|
||||||
|
bind_group_layouts.push(bgl2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let output_bgl = ctx
|
||||||
|
.device
|
||||||
|
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||||
|
label: Some("output_bgl"),
|
||||||
|
entries: &[wgpu::BindGroupLayoutEntry {
|
||||||
|
binding: 0,
|
||||||
|
visibility: wgpu::ShaderStages::COMPUTE,
|
||||||
|
ty: wgpu::BindingType::Buffer {
|
||||||
|
ty: wgpu::BufferBindingType::Storage { read_only: false },
|
||||||
|
has_dynamic_offset: false,
|
||||||
|
min_binding_size: None,
|
||||||
|
},
|
||||||
|
count: None,
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut bgl_references: Vec<_> = bind_group_layouts.iter().collect();
|
||||||
|
|
||||||
|
bgl_references.push(&output_bgl);
|
||||||
|
|
||||||
|
let pipeline_layout = ctx
|
||||||
|
.device
|
||||||
|
.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
|
||||||
|
label: Some("pipeline_layout"),
|
||||||
|
bind_group_layouts: &bgl_references,
|
||||||
|
push_constant_ranges: &[],
|
||||||
|
});
|
||||||
|
|
||||||
|
let input_image = ctx.device.create_texture(&wgpu::TextureDescriptor {
|
||||||
|
label: Some("input_image"),
|
||||||
|
size: wgpu::Extent3d {
|
||||||
|
width: 2,
|
||||||
|
height: 2,
|
||||||
|
depth_or_array_layers: 1,
|
||||||
|
},
|
||||||
|
mip_level_count: 1,
|
||||||
|
sample_count: 1,
|
||||||
|
dimension: wgpu::TextureDimension::D2,
|
||||||
|
format: wgpu::TextureFormat::Rgba8Unorm,
|
||||||
|
usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
|
||||||
|
view_formats: &[],
|
||||||
|
});
|
||||||
|
|
||||||
|
let input_image_view = input_image.create_view(&wgpu::TextureViewDescriptor::default());
|
||||||
|
|
||||||
|
let image_data: [u8; 16] = [
|
||||||
|
255, 0, 0, 255, /* */ 0, 255, 0, 255, //
|
||||||
|
0, 0, 255, 255, /* */ 255, 255, 255, 255, //
|
||||||
|
];
|
||||||
|
|
||||||
|
ctx.queue.write_texture(
|
||||||
|
wgpu::TexelCopyTextureInfo {
|
||||||
|
texture: &input_image,
|
||||||
|
mip_level: 0,
|
||||||
|
origin: wgpu::Origin3d::ZERO,
|
||||||
|
aspect: wgpu::TextureAspect::All,
|
||||||
|
},
|
||||||
|
&image_data,
|
||||||
|
wgpu::TexelCopyBufferLayout {
|
||||||
|
offset: 0,
|
||||||
|
bytes_per_row: Some(8),
|
||||||
|
rows_per_image: None,
|
||||||
|
},
|
||||||
|
wgpu::Extent3d {
|
||||||
|
width: 2,
|
||||||
|
height: 2,
|
||||||
|
depth_or_array_layers: 1,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
let address_modes = [
|
||||||
|
(
|
||||||
|
wgpu::AddressMode::ClampToEdge,
|
||||||
|
wgpu::AddressMode::ClampToEdge,
|
||||||
|
),
|
||||||
|
(wgpu::AddressMode::Repeat, wgpu::AddressMode::ClampToEdge),
|
||||||
|
(wgpu::AddressMode::ClampToEdge, wgpu::AddressMode::Repeat),
|
||||||
|
];
|
||||||
|
|
||||||
|
let samplers = address_modes.map(|(address_mode_u, address_mode_v)| {
|
||||||
|
ctx.device.create_sampler(&wgpu::SamplerDescriptor {
|
||||||
|
label: None,
|
||||||
|
address_mode_u,
|
||||||
|
address_mode_v,
|
||||||
|
address_mode_w: wgpu::AddressMode::ClampToEdge,
|
||||||
|
mag_filter: wgpu::FilterMode::Linear,
|
||||||
|
min_filter: wgpu::FilterMode::Linear,
|
||||||
|
mipmap_filter: wgpu::FilterMode::Nearest,
|
||||||
|
lod_min_clamp: 0.0,
|
||||||
|
lod_max_clamp: 100.0,
|
||||||
|
compare: None,
|
||||||
|
anisotropy_clamp: 1,
|
||||||
|
border_color: None,
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut bind_groups = Vec::new();
|
||||||
|
|
||||||
|
match group_type {
|
||||||
|
GroupType::Single => {
|
||||||
|
let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||||
|
label: Some("combination_bg"),
|
||||||
|
layout: &bind_group_layouts[0],
|
||||||
|
entries: &[
|
||||||
|
wgpu::BindGroupEntry {
|
||||||
|
binding: 0,
|
||||||
|
resource: wgpu::BindingResource::TextureView(&input_image_view),
|
||||||
|
},
|
||||||
|
wgpu::BindGroupEntry {
|
||||||
|
binding: 1,
|
||||||
|
resource: wgpu::BindingResource::Sampler(&samplers[0]),
|
||||||
|
},
|
||||||
|
wgpu::BindGroupEntry {
|
||||||
|
binding: 2,
|
||||||
|
resource: wgpu::BindingResource::Sampler(&samplers[1]),
|
||||||
|
},
|
||||||
|
wgpu::BindGroupEntry {
|
||||||
|
binding: 3,
|
||||||
|
resource: wgpu::BindingResource::Sampler(&samplers[2]),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
bind_groups.push(bg);
|
||||||
|
}
|
||||||
|
GroupType::Multi => {
|
||||||
|
let bg0 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||||
|
label: Some("multiple_bg0"),
|
||||||
|
layout: &bind_group_layouts[0],
|
||||||
|
entries: &[
|
||||||
|
wgpu::BindGroupEntry {
|
||||||
|
binding: 0,
|
||||||
|
resource: wgpu::BindingResource::TextureView(&input_image_view),
|
||||||
|
},
|
||||||
|
wgpu::BindGroupEntry {
|
||||||
|
binding: 1,
|
||||||
|
resource: wgpu::BindingResource::Sampler(&samplers[0]),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
let bg1 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||||
|
label: Some("multiple_bg1"),
|
||||||
|
layout: &bind_group_layouts[1],
|
||||||
|
entries: &[wgpu::BindGroupEntry {
|
||||||
|
binding: 0,
|
||||||
|
resource: wgpu::BindingResource::Sampler(&samplers[1]),
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
|
||||||
|
let bg2 = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||||
|
label: Some("multiple_bg2"),
|
||||||
|
layout: &bind_group_layouts[2],
|
||||||
|
entries: &[wgpu::BindGroupEntry {
|
||||||
|
binding: 0,
|
||||||
|
resource: wgpu::BindingResource::Sampler(&samplers[2]),
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
|
||||||
|
bind_groups.push(bg0);
|
||||||
|
bind_groups.push(bg1);
|
||||||
|
bind_groups.push(bg2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let output_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
|
||||||
|
label: Some("output_buffer"),
|
||||||
|
size: 48,
|
||||||
|
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
|
||||||
|
mapped_at_creation: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
let transfer_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
|
||||||
|
label: Some("transfer_buffer"),
|
||||||
|
size: 48,
|
||||||
|
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
|
||||||
|
mapped_at_creation: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
let output_bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||||
|
label: Some("output_bg"),
|
||||||
|
layout: &output_bgl,
|
||||||
|
entries: &[wgpu::BindGroupEntry {
|
||||||
|
binding: 0,
|
||||||
|
resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
|
||||||
|
buffer: &output_buffer,
|
||||||
|
offset: 0,
|
||||||
|
size: None,
|
||||||
|
}),
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut bg_references = bind_groups.iter().collect::<Vec<_>>();
|
||||||
|
|
||||||
|
bg_references.push(&output_bg);
|
||||||
|
|
||||||
|
let pipeline = ctx
|
||||||
|
.device
|
||||||
|
.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
|
||||||
|
label: Some("pipeline"),
|
||||||
|
layout: Some(&pipeline_layout),
|
||||||
|
module: &module,
|
||||||
|
entry_point: Some("cs_main"),
|
||||||
|
cache: None,
|
||||||
|
compilation_options: Default::default(),
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut encoder = ctx
|
||||||
|
.device
|
||||||
|
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
||||||
|
label: Some("encoder"),
|
||||||
|
});
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
|
||||||
|
label: None,
|
||||||
|
timestamp_writes: None,
|
||||||
|
});
|
||||||
|
cpass.set_pipeline(&pipeline);
|
||||||
|
for (i, &bg) in bg_references.iter().enumerate() {
|
||||||
|
cpass.set_bind_group(i as u32, bg, &[]);
|
||||||
|
}
|
||||||
|
cpass.dispatch_workgroups(1, 1, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
encoder.copy_buffer_to_buffer(&output_buffer, 0, &transfer_buffer, 0, 48);
|
||||||
|
|
||||||
|
ctx.queue.submit([encoder.finish()]);
|
||||||
|
let buffer_slice = transfer_buffer.slice(..);
|
||||||
|
buffer_slice.map_async(wgpu::MapMode::Read, |_| {});
|
||||||
|
|
||||||
|
ctx.device.poll(wgpu::Maintain::Wait);
|
||||||
|
|
||||||
|
let buffer_data = buffer_slice.get_mapped_range();
|
||||||
|
|
||||||
|
let f32_buffer: &[f32] = bytemuck::cast_slice(&buffer_data);
|
||||||
|
|
||||||
|
let correct_values: [f32; 12] = [
|
||||||
|
0.0, 0.0, 1.0, 1.0, //
|
||||||
|
0.5, 0.5, 1.0, 1.0, //
|
||||||
|
0.5, 0.0, 0.5, 1.0, //
|
||||||
|
];
|
||||||
|
let iter = f32_buffer.iter().zip(correct_values.iter());
|
||||||
|
for (&result, &value) in iter {
|
||||||
|
approx::assert_relative_eq!(result, value, max_relative = 0.02);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1521,9 +1521,9 @@ impl Device {
|
|||||||
};
|
};
|
||||||
for (_, var) in module.global_variables.iter() {
|
for (_, var) in module.global_variables.iter() {
|
||||||
match var.binding {
|
match var.binding {
|
||||||
Some(ref br) if br.group >= self.limits.max_bind_groups => {
|
Some(br) if br.group >= self.limits.max_bind_groups => {
|
||||||
return Err(pipeline::CreateShaderModuleError::InvalidGroupIndex {
|
return Err(pipeline::CreateShaderModuleError::InvalidGroupIndex {
|
||||||
bind: br.clone(),
|
bind: br,
|
||||||
group: br.group,
|
group: br.group,
|
||||||
limit: self.limits.max_bind_groups,
|
limit: self.limits.max_bind_groups,
|
||||||
});
|
});
|
||||||
|
|||||||
@ -921,7 +921,7 @@ impl Interface {
|
|||||||
let mut resource_mapping = FastHashMap::default();
|
let mut resource_mapping = FastHashMap::default();
|
||||||
for (var_handle, var) in module.global_variables.iter() {
|
for (var_handle, var) in module.global_variables.iter() {
|
||||||
let bind = match var.binding {
|
let bind = match var.binding {
|
||||||
Some(ref br) => br.clone(),
|
Some(br) => br,
|
||||||
_ => continue,
|
_ => continue,
|
||||||
};
|
};
|
||||||
let naga_ty = &module.types[var.ty].inner;
|
let naga_ty = &module.types[var.ty].inner;
|
||||||
@ -1058,7 +1058,7 @@ impl Interface {
|
|||||||
BindingLayoutSource::Provided(layouts) => {
|
BindingLayoutSource::Provided(layouts) => {
|
||||||
// update the required binding size for this buffer
|
// update the required binding size for this buffer
|
||||||
if let ResourceType::Buffer { size } = res.ty {
|
if let ResourceType::Buffer { size } = res.ty {
|
||||||
match shader_binding_sizes.entry(res.bind.clone()) {
|
match shader_binding_sizes.entry(res.bind) {
|
||||||
Entry::Occupied(e) => {
|
Entry::Occupied(e) => {
|
||||||
*e.into_mut() = size.max(*e.get());
|
*e.into_mut() = size.max(*e.get());
|
||||||
}
|
}
|
||||||
@ -1118,7 +1118,7 @@ impl Interface {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
if let Err(error) = result {
|
if let Err(error) = result {
|
||||||
return Err(StageError::Binding(res.bind.clone(), error));
|
return Err(StageError::Binding(res.bind, error));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1159,8 +1159,8 @@ impl Interface {
|
|||||||
|
|
||||||
if let Some(error) = error {
|
if let Some(error) = error {
|
||||||
return Err(StageError::Filtering {
|
return Err(StageError::Filtering {
|
||||||
texture: texture_bind.clone(),
|
texture: *texture_bind,
|
||||||
sampler: sampler_bind.clone(),
|
sampler: *sampler_bind,
|
||||||
error,
|
error,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@ -77,6 +77,7 @@ dx12 = [
|
|||||||
"dep:libloading",
|
"dep:libloading",
|
||||||
"dep:range-alloc",
|
"dep:range-alloc",
|
||||||
"dep:windows-core",
|
"dep:windows-core",
|
||||||
|
"dep:ordered-float",
|
||||||
"gpu-allocator/d3d12",
|
"gpu-allocator/d3d12",
|
||||||
"naga/hlsl-out-if-target-windows",
|
"naga/hlsl-out-if-target-windows",
|
||||||
"windows/Win32_Graphics_Direct3D_Fxc",
|
"windows/Win32_Graphics_Direct3D_Fxc",
|
||||||
|
|||||||
@ -154,6 +154,11 @@ impl super::Adapter {
|
|||||||
}
|
}
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
if options.ResourceHeapTier == Direct3D12::D3D12_RESOURCE_HEAP_TIER_1 {
|
||||||
|
// We require Tier 2 for the ability to make samplers bindless in all cases.
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
let _depth_bounds_test_supported = {
|
let _depth_bounds_test_supported = {
|
||||||
let mut features2 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS2::default();
|
let mut features2 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS2::default();
|
||||||
unsafe {
|
unsafe {
|
||||||
@ -195,6 +200,23 @@ impl super::Adapter {
|
|||||||
.is_ok()
|
.is_ok()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let mut max_sampler_descriptor_heap_size =
|
||||||
|
Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE;
|
||||||
|
{
|
||||||
|
let mut features19 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS19::default();
|
||||||
|
let res = unsafe {
|
||||||
|
device.CheckFeatureSupport(
|
||||||
|
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS19,
|
||||||
|
<*mut _>::cast(&mut features19),
|
||||||
|
size_of_val(&features19) as u32,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
if res.is_ok() {
|
||||||
|
max_sampler_descriptor_heap_size = features19.MaxSamplerDescriptorHeapSize;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let shader_model = if dxc_container.is_none() {
|
let shader_model = if dxc_container.is_none() {
|
||||||
naga::back::hlsl::ShaderModel::V5_1
|
naga::back::hlsl::ShaderModel::V5_1
|
||||||
} else {
|
} else {
|
||||||
@ -260,6 +282,7 @@ impl super::Adapter {
|
|||||||
// See https://github.com/gfx-rs/wgpu/issues/3552
|
// See https://github.com/gfx-rs/wgpu/issues/3552
|
||||||
suballocation_supported: !info.name.contains("Iris(R) Xe"),
|
suballocation_supported: !info.name.contains("Iris(R) Xe"),
|
||||||
shader_model,
|
shader_model,
|
||||||
|
max_sampler_descriptor_heap_size,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Theoretically vram limited, but in practice 2^20 is the limit
|
// Theoretically vram limited, but in practice 2^20 is the limit
|
||||||
|
|||||||
@ -85,7 +85,7 @@ impl super::CommandEncoder {
|
|||||||
unsafe {
|
unsafe {
|
||||||
list.SetDescriptorHeaps(&[
|
list.SetDescriptorHeaps(&[
|
||||||
Some(self.shared.heap_views.raw.clone()),
|
Some(self.shared.heap_views.raw.clone()),
|
||||||
Some(self.shared.heap_samplers.raw.clone()),
|
Some(self.shared.sampler_heap.heap().clone()),
|
||||||
])
|
])
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -241,6 +241,21 @@ impl super::CommandEncoder {
|
|||||||
(Pk::Transfer, _) => (),
|
(Pk::Transfer, _) => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
super::RootElement::SamplerHeap => match self.pass.kind {
|
||||||
|
Pk::Render => unsafe {
|
||||||
|
list.SetGraphicsRootDescriptorTable(
|
||||||
|
index,
|
||||||
|
self.shared.sampler_heap.gpu_descriptor_table(),
|
||||||
|
)
|
||||||
|
},
|
||||||
|
Pk::Compute => unsafe {
|
||||||
|
list.SetComputeRootDescriptorTable(
|
||||||
|
index,
|
||||||
|
self.shared.sampler_heap.gpu_descriptor_table(),
|
||||||
|
)
|
||||||
|
},
|
||||||
|
Pk::Transfer => (),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -254,6 +269,9 @@ impl super::CommandEncoder {
|
|||||||
other: 0,
|
other: 0,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
if let Some(root_index) = layout.sampler_heap_root_index {
|
||||||
|
self.pass.root_elements[root_index as usize] = super::RootElement::SamplerHeap;
|
||||||
|
}
|
||||||
self.pass.layout = layout.clone();
|
self.pass.layout = layout.clone();
|
||||||
self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1;
|
self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1;
|
||||||
}
|
}
|
||||||
@ -907,13 +925,6 @@ impl crate::CommandEncoder for super::CommandEncoder {
|
|||||||
root_index += 1;
|
root_index += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bind Sampler descriptor tables.
|
|
||||||
if info.tables.contains(super::TableTypes::SAMPLERS) {
|
|
||||||
self.pass.root_elements[root_index] =
|
|
||||||
super::RootElement::Table(group.handle_samplers.unwrap().gpu);
|
|
||||||
root_index += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bind root descriptors
|
// Bind root descriptors
|
||||||
for ((&kind, &gpu_base), &offset) in info
|
for ((&kind, &gpu_base), &offset) in info
|
||||||
.dynamic_buffers
|
.dynamic_buffers
|
||||||
|
|||||||
@ -284,6 +284,11 @@ impl CpuHeap {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn at(&self, index: u32) -> Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE {
|
pub(super) fn at(&self, index: u32) -> Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||||
|
debug_assert!(
|
||||||
|
index < self.total,
|
||||||
|
"Index ({index}) out of bounds {total}",
|
||||||
|
total = self.total
|
||||||
|
);
|
||||||
Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE {
|
Direct3D12::D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||||
ptr: self.start.ptr + (self.handle_size * index) as usize,
|
ptr: self.start.ptr + (self.handle_size * index) as usize,
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
use std::{
|
use std::{
|
||||||
|
borrow::Cow,
|
||||||
ffi,
|
ffi,
|
||||||
mem::{self, size_of, size_of_val},
|
mem::{self, size_of, size_of_val},
|
||||||
num::NonZeroU32,
|
num::NonZeroU32,
|
||||||
@ -100,7 +101,6 @@ impl super::Device {
|
|||||||
|
|
||||||
// maximum number of CBV/SRV/UAV descriptors in heap for Tier 1
|
// maximum number of CBV/SRV/UAV descriptors in heap for Tier 1
|
||||||
let capacity_views = limits.max_non_sampler_bindings as u64;
|
let capacity_views = limits.max_non_sampler_bindings as u64;
|
||||||
let capacity_samplers = 2_048;
|
|
||||||
|
|
||||||
let shared = super::DeviceShared {
|
let shared = super::DeviceShared {
|
||||||
zero_buffer,
|
zero_buffer,
|
||||||
@ -141,11 +141,7 @@ impl super::Device {
|
|||||||
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||||
capacity_views,
|
capacity_views,
|
||||||
)?,
|
)?,
|
||||||
heap_samplers: descriptor::GeneralHeap::new(
|
sampler_heap: super::sampler::SamplerHeap::new(&raw, &private_caps)?,
|
||||||
&raw,
|
|
||||||
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
|
||||||
capacity_samplers,
|
|
||||||
)?,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut rtv_pool =
|
let mut rtv_pool =
|
||||||
@ -188,10 +184,6 @@ impl super::Device {
|
|||||||
raw.clone(),
|
raw.clone(),
|
||||||
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||||
)),
|
)),
|
||||||
sampler_pool: Mutex::new(descriptor::CpuPool::new(
|
|
||||||
raw,
|
|
||||||
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
|
||||||
)),
|
|
||||||
library: Arc::clone(library),
|
library: Arc::clone(library),
|
||||||
#[cfg(feature = "renderdoc")]
|
#[cfg(feature = "renderdoc")]
|
||||||
render_doc: Default::default(),
|
render_doc: Default::default(),
|
||||||
@ -678,8 +670,6 @@ impl crate::Device for super::Device {
|
|||||||
&self,
|
&self,
|
||||||
desc: &crate::SamplerDescriptor,
|
desc: &crate::SamplerDescriptor,
|
||||||
) -> Result<super::Sampler, crate::DeviceError> {
|
) -> Result<super::Sampler, crate::DeviceError> {
|
||||||
let handle = self.sampler_pool.lock().alloc_handle()?;
|
|
||||||
|
|
||||||
let reduction = match desc.compare {
|
let reduction = match desc.compare {
|
||||||
Some(_) => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_COMPARISON,
|
Some(_) => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_COMPARISON,
|
||||||
None => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_STANDARD,
|
None => Direct3D12::D3D12_FILTER_REDUCTION_TYPE_STANDARD,
|
||||||
@ -697,34 +687,39 @@ impl crate::Device for super::Device {
|
|||||||
|
|
||||||
let border_color = conv::map_border_color(desc.border_color);
|
let border_color = conv::map_border_color(desc.border_color);
|
||||||
|
|
||||||
unsafe {
|
let raw_desc = Direct3D12::D3D12_SAMPLER_DESC {
|
||||||
self.raw.CreateSampler(
|
Filter: filter,
|
||||||
&Direct3D12::D3D12_SAMPLER_DESC {
|
AddressU: conv::map_address_mode(desc.address_modes[0]),
|
||||||
Filter: filter,
|
AddressV: conv::map_address_mode(desc.address_modes[1]),
|
||||||
AddressU: conv::map_address_mode(desc.address_modes[0]),
|
AddressW: conv::map_address_mode(desc.address_modes[2]),
|
||||||
AddressV: conv::map_address_mode(desc.address_modes[1]),
|
MipLODBias: 0f32,
|
||||||
AddressW: conv::map_address_mode(desc.address_modes[2]),
|
MaxAnisotropy: desc.anisotropy_clamp as u32,
|
||||||
MipLODBias: 0f32,
|
|
||||||
MaxAnisotropy: desc.anisotropy_clamp as u32,
|
|
||||||
|
|
||||||
ComparisonFunc: conv::map_comparison(
|
ComparisonFunc: conv::map_comparison(
|
||||||
desc.compare.unwrap_or(wgt::CompareFunction::Always),
|
desc.compare.unwrap_or(wgt::CompareFunction::Always),
|
||||||
),
|
),
|
||||||
BorderColor: border_color,
|
BorderColor: border_color,
|
||||||
MinLOD: desc.lod_clamp.start,
|
MinLOD: desc.lod_clamp.start,
|
||||||
MaxLOD: desc.lod_clamp.end,
|
MaxLOD: desc.lod_clamp.end,
|
||||||
},
|
|
||||||
handle.raw,
|
|
||||||
)
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let index = self
|
||||||
|
.shared
|
||||||
|
.sampler_heap
|
||||||
|
.create_sampler(&self.raw, raw_desc)?;
|
||||||
|
|
||||||
self.counters.samplers.add(1);
|
self.counters.samplers.add(1);
|
||||||
|
|
||||||
Ok(super::Sampler { handle })
|
Ok(super::Sampler {
|
||||||
|
index,
|
||||||
|
desc: raw_desc,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn destroy_sampler(&self, sampler: super::Sampler) {
|
unsafe fn destroy_sampler(&self, sampler: super::Sampler) {
|
||||||
self.sampler_pool.lock().free_handle(sampler.handle);
|
self.shared
|
||||||
|
.sampler_heap
|
||||||
|
.destroy_sampler(sampler.desc, sampler.index);
|
||||||
self.counters.samplers.sub(1);
|
self.counters.samplers.sub(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -763,12 +758,8 @@ impl crate::Device for super::Device {
|
|||||||
&self,
|
&self,
|
||||||
desc: &crate::BindGroupLayoutDescriptor,
|
desc: &crate::BindGroupLayoutDescriptor,
|
||||||
) -> Result<super::BindGroupLayout, crate::DeviceError> {
|
) -> Result<super::BindGroupLayout, crate::DeviceError> {
|
||||||
let (
|
let mut num_views = 0;
|
||||||
mut num_buffer_views,
|
let mut has_sampler_in_group = false;
|
||||||
mut num_samplers,
|
|
||||||
mut num_texture_views,
|
|
||||||
mut num_acceleration_structures,
|
|
||||||
) = (0, 0, 0, 0);
|
|
||||||
for entry in desc.entries.iter() {
|
for entry in desc.entries.iter() {
|
||||||
let count = entry.count.map_or(1, NonZeroU32::get);
|
let count = entry.count.map_or(1, NonZeroU32::get);
|
||||||
match entry.ty {
|
match entry.ty {
|
||||||
@ -776,18 +767,20 @@ impl crate::Device for super::Device {
|
|||||||
has_dynamic_offset: true,
|
has_dynamic_offset: true,
|
||||||
..
|
..
|
||||||
} => {}
|
} => {}
|
||||||
wgt::BindingType::Buffer { .. } => num_buffer_views += count,
|
wgt::BindingType::Buffer { .. }
|
||||||
wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => {
|
| wgt::BindingType::Texture { .. }
|
||||||
num_texture_views += count
|
| wgt::BindingType::StorageTexture { .. }
|
||||||
}
|
| wgt::BindingType::AccelerationStructure => num_views += count,
|
||||||
wgt::BindingType::Sampler { .. } => num_samplers += count,
|
wgt::BindingType::Sampler { .. } => has_sampler_in_group = true,
|
||||||
wgt::BindingType::AccelerationStructure => num_acceleration_structures += count,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if has_sampler_in_group {
|
||||||
|
num_views += 1;
|
||||||
|
}
|
||||||
|
|
||||||
self.counters.bind_group_layouts.add(1);
|
self.counters.bind_group_layouts.add(1);
|
||||||
|
|
||||||
let num_views = num_buffer_views + num_texture_views + num_acceleration_structures;
|
|
||||||
Ok(super::BindGroupLayout {
|
Ok(super::BindGroupLayout {
|
||||||
entries: desc.entries.to_vec(),
|
entries: desc.entries.to_vec(),
|
||||||
cpu_heap_views: if num_views != 0 {
|
cpu_heap_views: if num_views != 0 {
|
||||||
@ -800,17 +793,7 @@ impl crate::Device for super::Device {
|
|||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
},
|
},
|
||||||
cpu_heap_samplers: if num_samplers != 0 {
|
copy_counts: vec![1; num_views as usize],
|
||||||
let heap = descriptor::CpuHeap::new(
|
|
||||||
&self.raw,
|
|
||||||
Direct3D12::D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
|
||||||
num_samplers,
|
|
||||||
)?;
|
|
||||||
Some(heap)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
},
|
|
||||||
copy_counts: vec![1; num_views.max(num_samplers) as usize],
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -841,12 +824,15 @@ impl crate::Device for super::Device {
|
|||||||
// ...
|
// ...
|
||||||
// (bind group [0]) - Space=0
|
// (bind group [0]) - Space=0
|
||||||
// View descriptor table, if any
|
// View descriptor table, if any
|
||||||
// Sampler descriptor table, if any
|
// Sampler buffer descriptor table, if any
|
||||||
// Root descriptors (for dynamic offset buffers)
|
// Root descriptors (for dynamic offset buffers)
|
||||||
// (bind group [1]) - Space=0
|
// (bind group [1]) - Space=0
|
||||||
// ...
|
// ...
|
||||||
// (bind group [2]) - Space=0
|
// (bind group [2]) - Space=0
|
||||||
// Special constant buffer: Space=0
|
// Special constant buffer: Space=0
|
||||||
|
// Sampler descriptor tables: Space=0
|
||||||
|
// SamplerState Array: Space=0, Register=0-2047
|
||||||
|
// SamplerComparisonState Array: Space=0, Register=2048-4095
|
||||||
|
|
||||||
//TODO: put lower bind group indices further down the root signature. See:
|
//TODO: put lower bind group indices further down the root signature. See:
|
||||||
// https://microsoft.github.io/DirectX-Specs/d3d/ResourceBinding.html#binding-model
|
// https://microsoft.github.io/DirectX-Specs/d3d/ResourceBinding.html#binding-model
|
||||||
@ -854,12 +840,10 @@ impl crate::Device for super::Device {
|
|||||||
// on Vulkan-like layout compatibility rules.
|
// on Vulkan-like layout compatibility rules.
|
||||||
|
|
||||||
let mut binding_map = hlsl::BindingMap::default();
|
let mut binding_map = hlsl::BindingMap::default();
|
||||||
let (mut bind_cbv, mut bind_srv, mut bind_uav, mut bind_sampler) = (
|
let mut sampler_buffer_binding_map = hlsl::SamplerIndexBufferBindingMap::default();
|
||||||
hlsl::BindTarget::default(),
|
let mut bind_cbv = hlsl::BindTarget::default();
|
||||||
hlsl::BindTarget::default(),
|
let mut bind_srv = hlsl::BindTarget::default();
|
||||||
hlsl::BindTarget::default(),
|
let mut bind_uav = hlsl::BindTarget::default();
|
||||||
hlsl::BindTarget::default(),
|
|
||||||
);
|
|
||||||
let mut parameters = Vec::new();
|
let mut parameters = Vec::new();
|
||||||
let mut push_constants_target = None;
|
let mut push_constants_target = None;
|
||||||
let mut root_constant_info = None;
|
let mut root_constant_info = None;
|
||||||
@ -886,7 +870,7 @@ impl crate::Device for super::Device {
|
|||||||
},
|
},
|
||||||
ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL,
|
ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL,
|
||||||
});
|
});
|
||||||
let binding = bind_cbv.clone();
|
let binding = bind_cbv;
|
||||||
bind_cbv.register += 1;
|
bind_cbv.register += 1;
|
||||||
root_constant_info = Some(super::RootConstantInfo {
|
root_constant_info = Some(super::RootConstantInfo {
|
||||||
root_index: parameter_index as u32,
|
root_index: parameter_index as u32,
|
||||||
@ -900,19 +884,34 @@ impl crate::Device for super::Device {
|
|||||||
// Collect the whole number of bindings we will create upfront.
|
// Collect the whole number of bindings we will create upfront.
|
||||||
// It allows us to preallocate enough storage to avoid reallocation,
|
// It allows us to preallocate enough storage to avoid reallocation,
|
||||||
// which could cause invalid pointers.
|
// which could cause invalid pointers.
|
||||||
let total_non_dynamic_entries = desc
|
let mut total_non_dynamic_entries = 0_usize;
|
||||||
.bind_group_layouts
|
let mut sampler_in_any_bind_group = false;
|
||||||
.iter()
|
for bgl in desc.bind_group_layouts {
|
||||||
.flat_map(|bgl| {
|
let mut sampler_in_bind_group = false;
|
||||||
bgl.entries.iter().map(|entry| match entry.ty {
|
|
||||||
|
for entry in &bgl.entries {
|
||||||
|
match entry.ty {
|
||||||
wgt::BindingType::Buffer {
|
wgt::BindingType::Buffer {
|
||||||
has_dynamic_offset: true,
|
has_dynamic_offset: true,
|
||||||
..
|
..
|
||||||
} => 0,
|
} => {}
|
||||||
_ => 1,
|
wgt::BindingType::Sampler(_) => sampler_in_bind_group = true,
|
||||||
})
|
_ => total_non_dynamic_entries += 1,
|
||||||
})
|
}
|
||||||
.sum();
|
}
|
||||||
|
|
||||||
|
if sampler_in_bind_group {
|
||||||
|
// One for the sampler buffer
|
||||||
|
total_non_dynamic_entries += 1;
|
||||||
|
sampler_in_any_bind_group = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if sampler_in_any_bind_group {
|
||||||
|
// Two for the sampler arrays themselves
|
||||||
|
total_non_dynamic_entries += 2;
|
||||||
|
}
|
||||||
|
|
||||||
let mut ranges = Vec::with_capacity(total_non_dynamic_entries);
|
let mut ranges = Vec::with_capacity(total_non_dynamic_entries);
|
||||||
|
|
||||||
let mut bind_group_infos =
|
let mut bind_group_infos =
|
||||||
@ -926,10 +925,11 @@ impl crate::Device for super::Device {
|
|||||||
|
|
||||||
let mut visibility_view_static = wgt::ShaderStages::empty();
|
let mut visibility_view_static = wgt::ShaderStages::empty();
|
||||||
let mut visibility_view_dynamic = wgt::ShaderStages::empty();
|
let mut visibility_view_dynamic = wgt::ShaderStages::empty();
|
||||||
let mut visibility_sampler = wgt::ShaderStages::empty();
|
|
||||||
for entry in bgl.entries.iter() {
|
for entry in bgl.entries.iter() {
|
||||||
match entry.ty {
|
match entry.ty {
|
||||||
wgt::BindingType::Sampler { .. } => visibility_sampler |= entry.visibility,
|
wgt::BindingType::Sampler { .. } => {
|
||||||
|
visibility_view_static |= wgt::ShaderStages::all()
|
||||||
|
}
|
||||||
wgt::BindingType::Buffer {
|
wgt::BindingType::Buffer {
|
||||||
has_dynamic_offset: true,
|
has_dynamic_offset: true,
|
||||||
..
|
..
|
||||||
@ -939,7 +939,7 @@ impl crate::Device for super::Device {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// SRV/CBV/UAV descriptor tables
|
// SRV/CBV/UAV descriptor tables
|
||||||
let mut range_base = ranges.len();
|
let range_base = ranges.len();
|
||||||
for entry in bgl.entries.iter() {
|
for entry in bgl.entries.iter() {
|
||||||
let range_ty = match entry.ty {
|
let range_ty = match entry.ty {
|
||||||
wgt::BindingType::Buffer {
|
wgt::BindingType::Buffer {
|
||||||
@ -963,7 +963,7 @@ impl crate::Device for super::Device {
|
|||||||
},
|
},
|
||||||
hlsl::BindTarget {
|
hlsl::BindTarget {
|
||||||
binding_array_size: entry.count.map(NonZeroU32::get),
|
binding_array_size: entry.count.map(NonZeroU32::get),
|
||||||
..bt.clone()
|
..*bt
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
|
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
|
||||||
@ -976,6 +976,44 @@ impl crate::Device for super::Device {
|
|||||||
});
|
});
|
||||||
bt.register += entry.count.map(NonZeroU32::get).unwrap_or(1);
|
bt.register += entry.count.map(NonZeroU32::get).unwrap_or(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut sampler_index_within_bind_group = 0;
|
||||||
|
for entry in bgl.entries.iter() {
|
||||||
|
if let wgt::BindingType::Sampler(_) = entry.ty {
|
||||||
|
binding_map.insert(
|
||||||
|
naga::ResourceBinding {
|
||||||
|
group: index as u32,
|
||||||
|
binding: entry.binding,
|
||||||
|
},
|
||||||
|
hlsl::BindTarget {
|
||||||
|
// Naga does not use the space field for samplers
|
||||||
|
space: 255,
|
||||||
|
register: sampler_index_within_bind_group,
|
||||||
|
binding_array_size: None,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
sampler_index_within_bind_group += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if sampler_index_within_bind_group != 0 {
|
||||||
|
sampler_buffer_binding_map.insert(
|
||||||
|
hlsl::SamplerIndexBufferKey {
|
||||||
|
group: index as u32,
|
||||||
|
},
|
||||||
|
bind_srv,
|
||||||
|
);
|
||||||
|
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
|
||||||
|
RangeType: Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
|
||||||
|
NumDescriptors: 1,
|
||||||
|
BaseShaderRegister: bind_srv.register,
|
||||||
|
RegisterSpace: bind_srv.space as u32,
|
||||||
|
OffsetInDescriptorsFromTableStart:
|
||||||
|
Direct3D12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
|
||||||
|
});
|
||||||
|
bind_srv.register += 1;
|
||||||
|
}
|
||||||
|
|
||||||
if ranges.len() > range_base {
|
if ranges.len() > range_base {
|
||||||
let range = &ranges[range_base..];
|
let range = &ranges[range_base..];
|
||||||
parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
|
parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
|
||||||
@ -991,50 +1029,6 @@ impl crate::Device for super::Device {
|
|||||||
info.tables |= super::TableTypes::SRV_CBV_UAV;
|
info.tables |= super::TableTypes::SRV_CBV_UAV;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sampler descriptor tables
|
|
||||||
range_base = ranges.len();
|
|
||||||
for entry in bgl.entries.iter() {
|
|
||||||
let range_ty = match entry.ty {
|
|
||||||
wgt::BindingType::Sampler { .. } => {
|
|
||||||
Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER
|
|
||||||
}
|
|
||||||
_ => continue,
|
|
||||||
};
|
|
||||||
binding_map.insert(
|
|
||||||
naga::ResourceBinding {
|
|
||||||
group: index as u32,
|
|
||||||
binding: entry.binding,
|
|
||||||
},
|
|
||||||
hlsl::BindTarget {
|
|
||||||
binding_array_size: entry.count.map(NonZeroU32::get),
|
|
||||||
..bind_sampler.clone()
|
|
||||||
},
|
|
||||||
);
|
|
||||||
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
|
|
||||||
RangeType: range_ty,
|
|
||||||
NumDescriptors: entry.count.map_or(1, |count| count.get()),
|
|
||||||
BaseShaderRegister: bind_sampler.register,
|
|
||||||
RegisterSpace: bind_sampler.space as u32,
|
|
||||||
OffsetInDescriptorsFromTableStart:
|
|
||||||
Direct3D12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
|
|
||||||
});
|
|
||||||
bind_sampler.register += entry.count.map(NonZeroU32::get).unwrap_or(1);
|
|
||||||
}
|
|
||||||
if ranges.len() > range_base {
|
|
||||||
let range = &ranges[range_base..];
|
|
||||||
parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
|
|
||||||
ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
|
|
||||||
Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 {
|
|
||||||
DescriptorTable: Direct3D12::D3D12_ROOT_DESCRIPTOR_TABLE {
|
|
||||||
NumDescriptorRanges: range.len() as u32,
|
|
||||||
pDescriptorRanges: range.as_ptr(),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
ShaderVisibility: conv::map_visibility(visibility_sampler),
|
|
||||||
});
|
|
||||||
info.tables |= super::TableTypes::SAMPLERS;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Root (dynamic) descriptor tables
|
// Root (dynamic) descriptor tables
|
||||||
let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic);
|
let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic);
|
||||||
for entry in bgl.entries.iter() {
|
for entry in bgl.entries.iter() {
|
||||||
@ -1072,7 +1066,7 @@ impl crate::Device for super::Device {
|
|||||||
},
|
},
|
||||||
hlsl::BindTarget {
|
hlsl::BindTarget {
|
||||||
binding_array_size: entry.count.map(NonZeroU32::get),
|
binding_array_size: entry.count.map(NonZeroU32::get),
|
||||||
..bt.clone()
|
..*bt
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
info.dynamic_buffers.push(kind);
|
info.dynamic_buffers.push(kind);
|
||||||
@ -1094,6 +1088,62 @@ impl crate::Device for super::Device {
|
|||||||
bind_group_infos.push(info);
|
bind_group_infos.push(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let sampler_heap_target = hlsl::SamplerHeapBindTargets {
|
||||||
|
standard_samplers: hlsl::BindTarget {
|
||||||
|
space: 0,
|
||||||
|
register: 0,
|
||||||
|
binding_array_size: None,
|
||||||
|
},
|
||||||
|
comparison_samplers: hlsl::BindTarget {
|
||||||
|
space: 0,
|
||||||
|
register: 2048,
|
||||||
|
binding_array_size: None,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut sampler_heap_root_index = None;
|
||||||
|
if sampler_in_any_bind_group {
|
||||||
|
// Sampler descriptor tables
|
||||||
|
//
|
||||||
|
// We bind two sampler ranges pointing to the same descriptor heap, using two different register ranges.
|
||||||
|
//
|
||||||
|
// We bind them as normal samplers in registers 0-2047 and comparison samplers in registers 2048-4095.
|
||||||
|
// Tier 2 hardware guarantees that the type of sampler only needs to match if the sampler is actually
|
||||||
|
// accessed in the shader. As such, we can bind the same array of samplers to both registers.
|
||||||
|
//
|
||||||
|
// We do this because HLSL does not allow you to alias registers at all.
|
||||||
|
let range_base = ranges.len();
|
||||||
|
// Standard samplers, registers 0-2047
|
||||||
|
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
|
||||||
|
RangeType: Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
|
||||||
|
NumDescriptors: 2048,
|
||||||
|
BaseShaderRegister: 0,
|
||||||
|
RegisterSpace: 0,
|
||||||
|
OffsetInDescriptorsFromTableStart: 0,
|
||||||
|
});
|
||||||
|
// Comparison samplers, registers 2048-4095
|
||||||
|
ranges.push(Direct3D12::D3D12_DESCRIPTOR_RANGE {
|
||||||
|
RangeType: Direct3D12::D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
|
||||||
|
NumDescriptors: 2048,
|
||||||
|
BaseShaderRegister: 2048,
|
||||||
|
RegisterSpace: 0,
|
||||||
|
OffsetInDescriptorsFromTableStart: 0,
|
||||||
|
});
|
||||||
|
|
||||||
|
let range = &ranges[range_base..];
|
||||||
|
sampler_heap_root_index = Some(parameters.len() as super::RootIndex);
|
||||||
|
parameters.push(Direct3D12::D3D12_ROOT_PARAMETER {
|
||||||
|
ParameterType: Direct3D12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
|
||||||
|
Anonymous: Direct3D12::D3D12_ROOT_PARAMETER_0 {
|
||||||
|
DescriptorTable: Direct3D12::D3D12_ROOT_DESCRIPTOR_TABLE {
|
||||||
|
NumDescriptorRanges: range.len() as u32,
|
||||||
|
pDescriptorRanges: range.as_ptr(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Ensure that we didn't reallocate!
|
// Ensure that we didn't reallocate!
|
||||||
debug_assert_eq!(ranges.len(), total_non_dynamic_entries);
|
debug_assert_eq!(ranges.len(), total_non_dynamic_entries);
|
||||||
|
|
||||||
@ -1113,7 +1163,7 @@ impl crate::Device for super::Device {
|
|||||||
},
|
},
|
||||||
ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL, // really needed for VS and CS only,
|
ShaderVisibility: Direct3D12::D3D12_SHADER_VISIBILITY_ALL, // really needed for VS and CS only,
|
||||||
});
|
});
|
||||||
let binding = bind_cbv.clone();
|
let binding = bind_cbv;
|
||||||
bind_cbv.register += 1;
|
bind_cbv.register += 1;
|
||||||
(Some(parameter_index as u32), Some(binding))
|
(Some(parameter_index as u32), Some(binding))
|
||||||
} else {
|
} else {
|
||||||
@ -1231,6 +1281,7 @@ impl crate::Device for super::Device {
|
|||||||
total_root_elements: parameters.len() as super::RootIndex,
|
total_root_elements: parameters.len() as super::RootIndex,
|
||||||
special_constants,
|
special_constants,
|
||||||
root_constant_info,
|
root_constant_info,
|
||||||
|
sampler_heap_root_index,
|
||||||
},
|
},
|
||||||
bind_group_infos,
|
bind_group_infos,
|
||||||
naga_options: hlsl::Options {
|
naga_options: hlsl::Options {
|
||||||
@ -1241,6 +1292,8 @@ impl crate::Device for super::Device {
|
|||||||
push_constants_target,
|
push_constants_target,
|
||||||
zero_initialize_workgroup_memory: true,
|
zero_initialize_workgroup_memory: true,
|
||||||
restrict_indexing: true,
|
restrict_indexing: true,
|
||||||
|
sampler_heap_target,
|
||||||
|
sampler_buffer_binding_map,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -1267,14 +1320,6 @@ impl crate::Device for super::Device {
|
|||||||
if let Some(ref mut inner) = cpu_views {
|
if let Some(ref mut inner) = cpu_views {
|
||||||
inner.stage.clear();
|
inner.stage.clear();
|
||||||
}
|
}
|
||||||
let mut cpu_samplers = desc
|
|
||||||
.layout
|
|
||||||
.cpu_heap_samplers
|
|
||||||
.as_ref()
|
|
||||||
.map(|cpu_heap| cpu_heap.inner.lock());
|
|
||||||
if let Some(ref mut inner) = cpu_samplers {
|
|
||||||
inner.stage.clear();
|
|
||||||
}
|
|
||||||
let mut dynamic_buffers = Vec::new();
|
let mut dynamic_buffers = Vec::new();
|
||||||
|
|
||||||
let layout_and_entry_iter = desc.entries.iter().map(|entry| {
|
let layout_and_entry_iter = desc.entries.iter().map(|entry| {
|
||||||
@ -1286,6 +1331,8 @@ impl crate::Device for super::Device {
|
|||||||
.expect("internal error: no layout entry found with binding slot");
|
.expect("internal error: no layout entry found with binding slot");
|
||||||
(layout, entry)
|
(layout, entry)
|
||||||
});
|
});
|
||||||
|
let mut sampler_indexes: Vec<super::sampler::SamplerIndex> = Vec::new();
|
||||||
|
|
||||||
for (layout, entry) in layout_and_entry_iter {
|
for (layout, entry) in layout_and_entry_iter {
|
||||||
match layout.ty {
|
match layout.ty {
|
||||||
wgt::BindingType::Buffer {
|
wgt::BindingType::Buffer {
|
||||||
@ -1390,8 +1437,8 @@ impl crate::Device for super::Device {
|
|||||||
wgt::BindingType::Sampler { .. } => {
|
wgt::BindingType::Sampler { .. } => {
|
||||||
let start = entry.resource_index as usize;
|
let start = entry.resource_index as usize;
|
||||||
let end = start + entry.count as usize;
|
let end = start + entry.count as usize;
|
||||||
for data in &desc.samplers[start..end] {
|
for &data in &desc.samplers[start..end] {
|
||||||
cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw);
|
sampler_indexes.push(data.index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wgt::BindingType::AccelerationStructure => {
|
wgt::BindingType::AccelerationStructure => {
|
||||||
@ -1424,6 +1471,92 @@ impl crate::Device for super::Device {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let sampler_index_buffer = if !sampler_indexes.is_empty() {
|
||||||
|
let buffer_size = (sampler_indexes.len() * size_of::<u32>()) as u64;
|
||||||
|
|
||||||
|
let label = if let Some(label) = desc.label {
|
||||||
|
Cow::Owned(format!("{} (Internal Sampler Index Buffer)", label))
|
||||||
|
} else {
|
||||||
|
Cow::Borrowed("Internal Sampler Index Buffer")
|
||||||
|
};
|
||||||
|
|
||||||
|
let buffer_desc = crate::BufferDescriptor {
|
||||||
|
label: None,
|
||||||
|
size: buffer_size,
|
||||||
|
usage: crate::BufferUses::STORAGE_READ_ONLY | crate::BufferUses::MAP_WRITE,
|
||||||
|
// D3D12 backend doesn't care about the memory flags
|
||||||
|
memory_flags: crate::MemoryFlags::empty(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let raw_buffer_desc = Direct3D12::D3D12_RESOURCE_DESC {
|
||||||
|
Dimension: Direct3D12::D3D12_RESOURCE_DIMENSION_BUFFER,
|
||||||
|
Alignment: 0,
|
||||||
|
Width: buffer_size,
|
||||||
|
Height: 1,
|
||||||
|
DepthOrArraySize: 1,
|
||||||
|
MipLevels: 1,
|
||||||
|
Format: Dxgi::Common::DXGI_FORMAT_UNKNOWN,
|
||||||
|
SampleDesc: Dxgi::Common::DXGI_SAMPLE_DESC {
|
||||||
|
Count: 1,
|
||||||
|
Quality: 0,
|
||||||
|
},
|
||||||
|
Layout: Direct3D12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||||
|
Flags: Direct3D12::D3D12_RESOURCE_FLAG_NONE,
|
||||||
|
};
|
||||||
|
|
||||||
|
let (buffer, allocation) =
|
||||||
|
super::suballocation::create_buffer_resource(self, &buffer_desc, raw_buffer_desc)?;
|
||||||
|
|
||||||
|
unsafe { buffer.SetName(&windows::core::HSTRING::from(&*label)) }
|
||||||
|
.into_device_result("SetName")?;
|
||||||
|
|
||||||
|
let mut mapping = ptr::null_mut::<ffi::c_void>();
|
||||||
|
unsafe { buffer.Map(0, None, Some(&mut mapping)) }.into_device_result("Map")?;
|
||||||
|
|
||||||
|
assert!(!mapping.is_null());
|
||||||
|
assert_eq!(mapping as usize % 4, 0);
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
ptr::copy_nonoverlapping(
|
||||||
|
sampler_indexes.as_ptr(),
|
||||||
|
mapping.cast(),
|
||||||
|
sampler_indexes.len(),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
// The unmapping is not needed, as all memory is coherent in d3d12, but lets be nice to our address space.
|
||||||
|
unsafe { buffer.Unmap(0, None) };
|
||||||
|
|
||||||
|
let srv_desc = Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC {
|
||||||
|
Format: Dxgi::Common::DXGI_FORMAT_UNKNOWN,
|
||||||
|
ViewDimension: Direct3D12::D3D12_SRV_DIMENSION_BUFFER,
|
||||||
|
Anonymous: Direct3D12::D3D12_SHADER_RESOURCE_VIEW_DESC_0 {
|
||||||
|
Buffer: Direct3D12::D3D12_BUFFER_SRV {
|
||||||
|
FirstElement: 0,
|
||||||
|
NumElements: sampler_indexes.len() as u32,
|
||||||
|
StructureByteStride: 4,
|
||||||
|
Flags: Direct3D12::D3D12_BUFFER_SRV_FLAG_NONE,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Shader4ComponentMapping: Direct3D12::D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
|
||||||
|
};
|
||||||
|
|
||||||
|
let inner = cpu_views.as_mut().unwrap();
|
||||||
|
let cpu_index = inner.stage.len() as u32;
|
||||||
|
let srv = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index);
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
self.raw
|
||||||
|
.CreateShaderResourceView(&buffer, Some(&srv_desc), srv)
|
||||||
|
};
|
||||||
|
|
||||||
|
cpu_views.as_mut().unwrap().stage.push(srv);
|
||||||
|
|
||||||
|
Some(super::SamplerIndexBuffer { buffer, allocation })
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
let handle_views = match cpu_views {
|
let handle_views = match cpu_views {
|
||||||
Some(inner) => {
|
Some(inner) => {
|
||||||
let dual = unsafe {
|
let dual = unsafe {
|
||||||
@ -1438,26 +1571,12 @@ impl crate::Device for super::Device {
|
|||||||
}
|
}
|
||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
let handle_samplers = match cpu_samplers {
|
|
||||||
Some(inner) => {
|
|
||||||
let dual = unsafe {
|
|
||||||
descriptor::upload(
|
|
||||||
&self.raw,
|
|
||||||
&inner,
|
|
||||||
&self.shared.heap_samplers,
|
|
||||||
&desc.layout.copy_counts,
|
|
||||||
)
|
|
||||||
}?;
|
|
||||||
Some(dual)
|
|
||||||
}
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
self.counters.bind_groups.add(1);
|
self.counters.bind_groups.add(1);
|
||||||
|
|
||||||
Ok(super::BindGroup {
|
Ok(super::BindGroup {
|
||||||
handle_views,
|
handle_views,
|
||||||
handle_samplers,
|
sampler_index_buffer,
|
||||||
dynamic_buffers,
|
dynamic_buffers,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -1466,8 +1585,14 @@ impl crate::Device for super::Device {
|
|||||||
if let Some(dual) = group.handle_views {
|
if let Some(dual) = group.handle_views {
|
||||||
self.shared.heap_views.free_slice(dual);
|
self.shared.heap_views.free_slice(dual);
|
||||||
}
|
}
|
||||||
if let Some(dual) = group.handle_samplers {
|
|
||||||
self.shared.heap_samplers.free_slice(dual);
|
if let Some(sampler_buffer) = group.sampler_index_buffer {
|
||||||
|
// Make sure the buffer is dropped before the allocation
|
||||||
|
drop(sampler_buffer.buffer);
|
||||||
|
|
||||||
|
if let Some(allocation) = sampler_buffer.allocation {
|
||||||
|
super::suballocation::free_buffer_allocation(self, allocation, &self.mem_allocator);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.counters.bind_groups.sub(1);
|
self.counters.bind_groups.sub(1);
|
||||||
|
|||||||
@ -13,13 +13,52 @@ and destination states match, and they are for storage sync.
|
|||||||
|
|
||||||
For now, all resources are created with "committed" memory.
|
For now, all resources are created with "committed" memory.
|
||||||
|
|
||||||
|
## Sampler Descriptor Management
|
||||||
|
|
||||||
|
At most one descriptor heap of each type can be bound at once. This
|
||||||
|
means that the descriptors from all bind groups need to be present
|
||||||
|
in the same heap, and they need to be contiguous within that heap.
|
||||||
|
This is not a problem for the SRV/CBV/UAV heap as it can be sized into
|
||||||
|
the millions of entries. However the sampler heap is limited to 2048 entries.
|
||||||
|
|
||||||
|
In order to work around this limitation, we refer to samplers indirectly by index.
|
||||||
|
The entire sampler heap is bound at once and a buffer containing all sampler indexes
|
||||||
|
for that bind group is bound. The shader then uses the index to look up the sampler
|
||||||
|
in the heap. To help visualize this, the generated HLSL looks like this:
|
||||||
|
|
||||||
|
```wgsl
|
||||||
|
@group(0) @binding(2) var myLinearSampler: sampler;
|
||||||
|
@group(1) @binding(1) var myAnisoSampler: sampler;
|
||||||
|
@group(1) @binding(4) var myCompSampler: sampler;
|
||||||
|
```
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
// These bindings alias the same descriptors. Depending on the type, the shader will use the correct one.
|
||||||
|
SamplerState nagaSamplerHeap[2048]: register(s0, space0);
|
||||||
|
SamplerComparisonState nagaComparisonSamplerHeap[2048]: register(s2048, space1);
|
||||||
|
|
||||||
|
StructuredBuffer<uint> nagaGroup0SamplerIndexArray : register(t0, space0);
|
||||||
|
StructuredBuffer<uint> nagaGroup1SamplerIndexArray : register(t1, space0);
|
||||||
|
|
||||||
|
// Indexes into group 0 index array
|
||||||
|
static const SamplerState myLinearSampler = nagaSamplerHeap[nagaGroup0SamplerIndexArray[0]];
|
||||||
|
|
||||||
|
// Indexes into group 1 index array
|
||||||
|
static const SamplerState myAnisoSampler = nagaSamplerHeap[nagaGroup1SamplerIndexArray[0]];
|
||||||
|
static const SamplerComparisonState myCompSampler = nagaComparisonSamplerHeap[nagaGroup1SamplerIndexArray[1]];
|
||||||
|
```
|
||||||
|
|
||||||
|
Without this transform we would need separate set of sampler descriptors for each unique combination of samplers
|
||||||
|
in a bind group. This results in a lot of duplication and makes it easy to hit the 2048 limit. With the transform
|
||||||
|
the limit is merely 2048 unique samplers in existence, which is much more reasonable.
|
||||||
|
|
||||||
## Resource binding
|
## Resource binding
|
||||||
|
|
||||||
See ['Device::create_pipeline_layout`] documentation for the structure
|
See ['Device::create_pipeline_layout`] documentation for the structure
|
||||||
of the root signature corresponding to WebGPU pipeline layout.
|
of the root signature corresponding to WebGPU pipeline layout.
|
||||||
|
|
||||||
Binding groups is mostly straightforward, with one big caveat:
|
Binding groups is mostly straightforward, with one big caveat:
|
||||||
all bindings have to be reset whenever the pipeline layout changes.
|
all bindings have to be reset whenever the root signature changes.
|
||||||
This is the rule of D3D12, and we can do nothing to help it.
|
This is the rule of D3D12, and we can do nothing to help it.
|
||||||
|
|
||||||
We detect this change at both [`crate::CommandEncoder::set_bind_group`]
|
We detect this change at both [`crate::CommandEncoder::set_bind_group`]
|
||||||
@ -39,6 +78,7 @@ mod conv;
|
|||||||
mod descriptor;
|
mod descriptor;
|
||||||
mod device;
|
mod device;
|
||||||
mod instance;
|
mod instance;
|
||||||
|
mod sampler;
|
||||||
mod shader_compilation;
|
mod shader_compilation;
|
||||||
mod suballocation;
|
mod suballocation;
|
||||||
mod types;
|
mod types;
|
||||||
@ -518,6 +558,7 @@ struct PrivateCapabilities {
|
|||||||
casting_fully_typed_format_supported: bool,
|
casting_fully_typed_format_supported: bool,
|
||||||
suballocation_supported: bool,
|
suballocation_supported: bool,
|
||||||
shader_model: naga::back::hlsl::ShaderModel,
|
shader_model: naga::back::hlsl::ShaderModel,
|
||||||
|
max_sampler_descriptor_heap_size: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
@ -575,7 +616,7 @@ struct DeviceShared {
|
|||||||
zero_buffer: Direct3D12::ID3D12Resource,
|
zero_buffer: Direct3D12::ID3D12Resource,
|
||||||
cmd_signatures: CommandSignatures,
|
cmd_signatures: CommandSignatures,
|
||||||
heap_views: descriptor::GeneralHeap,
|
heap_views: descriptor::GeneralHeap,
|
||||||
heap_samplers: descriptor::GeneralHeap,
|
sampler_heap: sampler::SamplerHeap,
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe impl Send for DeviceShared {}
|
unsafe impl Send for DeviceShared {}
|
||||||
@ -591,7 +632,6 @@ pub struct Device {
|
|||||||
rtv_pool: Mutex<descriptor::CpuPool>,
|
rtv_pool: Mutex<descriptor::CpuPool>,
|
||||||
dsv_pool: Mutex<descriptor::CpuPool>,
|
dsv_pool: Mutex<descriptor::CpuPool>,
|
||||||
srv_uav_pool: Mutex<descriptor::CpuPool>,
|
srv_uav_pool: Mutex<descriptor::CpuPool>,
|
||||||
sampler_pool: Mutex<descriptor::CpuPool>,
|
|
||||||
// library
|
// library
|
||||||
library: Arc<D3D12Lib>,
|
library: Arc<D3D12Lib>,
|
||||||
#[cfg(feature = "renderdoc")]
|
#[cfg(feature = "renderdoc")]
|
||||||
@ -645,7 +685,7 @@ struct PassResolve {
|
|||||||
format: Dxgi::Common::DXGI_FORMAT,
|
format: Dxgi::Common::DXGI_FORMAT,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy, Debug)]
|
||||||
enum RootElement {
|
enum RootElement {
|
||||||
Empty,
|
Empty,
|
||||||
Constant,
|
Constant,
|
||||||
@ -664,6 +704,8 @@ enum RootElement {
|
|||||||
kind: BufferViewKind,
|
kind: BufferViewKind,
|
||||||
address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE,
|
address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE,
|
||||||
},
|
},
|
||||||
|
/// Descriptor table referring to the entire sampler heap.
|
||||||
|
SamplerHeap,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
@ -700,6 +742,7 @@ impl PassState {
|
|||||||
total_root_elements: 0,
|
total_root_elements: 0,
|
||||||
special_constants: None,
|
special_constants: None,
|
||||||
root_constant_info: None,
|
root_constant_info: None,
|
||||||
|
sampler_heap_root_index: None,
|
||||||
},
|
},
|
||||||
root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS],
|
root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS],
|
||||||
constant_data: [0; MAX_ROOT_ELEMENTS],
|
constant_data: [0; MAX_ROOT_ELEMENTS],
|
||||||
@ -853,7 +896,8 @@ unsafe impl Sync for TextureView {}
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Sampler {
|
pub struct Sampler {
|
||||||
handle: descriptor::Handle,
|
index: sampler::SamplerIndex,
|
||||||
|
desc: Direct3D12::D3D12_SAMPLER_DESC,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl crate::DynSampler for Sampler {}
|
impl crate::DynSampler for Sampler {}
|
||||||
@ -893,7 +937,6 @@ pub struct BindGroupLayout {
|
|||||||
/// Sorted list of entries.
|
/// Sorted list of entries.
|
||||||
entries: Vec<wgt::BindGroupLayoutEntry>,
|
entries: Vec<wgt::BindGroupLayoutEntry>,
|
||||||
cpu_heap_views: Option<descriptor::CpuHeap>,
|
cpu_heap_views: Option<descriptor::CpuHeap>,
|
||||||
cpu_heap_samplers: Option<descriptor::CpuHeap>,
|
|
||||||
copy_counts: Vec<u32>, // all 1's
|
copy_counts: Vec<u32>, // all 1's
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -906,10 +949,16 @@ enum BufferViewKind {
|
|||||||
UnorderedAccess,
|
UnorderedAccess,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct SamplerIndexBuffer {
|
||||||
|
buffer: Direct3D12::ID3D12Resource,
|
||||||
|
allocation: Option<suballocation::AllocationWrapper>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct BindGroup {
|
pub struct BindGroup {
|
||||||
handle_views: Option<descriptor::DualHandle>,
|
handle_views: Option<descriptor::DualHandle>,
|
||||||
handle_samplers: Option<descriptor::DualHandle>,
|
sampler_index_buffer: Option<SamplerIndexBuffer>,
|
||||||
dynamic_buffers: Vec<Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE>,
|
dynamic_buffers: Vec<Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -945,6 +994,7 @@ struct PipelineLayoutShared {
|
|||||||
total_root_elements: RootIndex,
|
total_root_elements: RootIndex,
|
||||||
special_constants: Option<PipelineLayoutSpecialConstants>,
|
special_constants: Option<PipelineLayoutSpecialConstants>,
|
||||||
root_constant_info: Option<RootConstantInfo>,
|
root_constant_info: Option<RootConstantInfo>,
|
||||||
|
sampler_heap_root_index: Option<RootIndex>,
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe impl Send for PipelineLayoutShared {}
|
unsafe impl Send for PipelineLayoutShared {}
|
||||||
|
|||||||
251
wgpu-hal/src/dx12/sampler.rs
Normal file
251
wgpu-hal/src/dx12/sampler.rs
Normal file
@ -0,0 +1,251 @@
|
|||||||
|
//! Sampler management for DX12.
|
||||||
|
//!
|
||||||
|
//! Nearly identical to the Vulkan sampler cache, with added descriptor heap management.
|
||||||
|
|
||||||
|
use std::collections::{hash_map::Entry, HashMap};
|
||||||
|
|
||||||
|
use ordered_float::OrderedFloat;
|
||||||
|
use parking_lot::Mutex;
|
||||||
|
use windows::Win32::Graphics::Direct3D12::*;
|
||||||
|
|
||||||
|
use crate::dx12::HResult;
|
||||||
|
|
||||||
|
/// The index of a sampler in the global sampler heap.
|
||||||
|
///
|
||||||
|
/// This is a type-safe, transparent wrapper around a u32.
|
||||||
|
#[repr(transparent)]
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||||
|
pub(crate) struct SamplerIndex(u32);
|
||||||
|
|
||||||
|
/// [`D3D12_SAMPLER_DESC`] is not hashable, so we wrap it in a newtype that is.
|
||||||
|
///
|
||||||
|
/// We use [`OrderedFloat`] to allow for floating point values to be compared and
|
||||||
|
/// hashed in a defined way.
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
struct HashableSamplerDesc(D3D12_SAMPLER_DESC);
|
||||||
|
|
||||||
|
impl PartialEq for HashableSamplerDesc {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
self.0.Filter == other.0.Filter
|
||||||
|
&& self.0.AddressU == other.0.AddressU
|
||||||
|
&& self.0.AddressV == other.0.AddressV
|
||||||
|
&& self.0.AddressW == other.0.AddressW
|
||||||
|
&& OrderedFloat(self.0.MipLODBias) == OrderedFloat(other.0.MipLODBias)
|
||||||
|
&& self.0.MaxAnisotropy == other.0.MaxAnisotropy
|
||||||
|
&& self.0.ComparisonFunc == other.0.ComparisonFunc
|
||||||
|
&& self.0.BorderColor.map(OrderedFloat) == other.0.BorderColor.map(OrderedFloat)
|
||||||
|
&& OrderedFloat(self.0.MinLOD) == OrderedFloat(other.0.MinLOD)
|
||||||
|
&& OrderedFloat(self.0.MaxLOD) == OrderedFloat(other.0.MaxLOD)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Eq for HashableSamplerDesc {}
|
||||||
|
|
||||||
|
impl std::hash::Hash for HashableSamplerDesc {
|
||||||
|
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||||
|
self.0.Filter.0.hash(state);
|
||||||
|
self.0.AddressU.0.hash(state);
|
||||||
|
self.0.AddressV.0.hash(state);
|
||||||
|
self.0.AddressW.0.hash(state);
|
||||||
|
OrderedFloat(self.0.MipLODBias).hash(state);
|
||||||
|
self.0.MaxAnisotropy.hash(state);
|
||||||
|
self.0.ComparisonFunc.0.hash(state);
|
||||||
|
self.0.BorderColor.map(OrderedFloat).hash(state);
|
||||||
|
OrderedFloat(self.0.MinLOD).hash(state);
|
||||||
|
OrderedFloat(self.0.MaxLOD).hash(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Entry in the sampler cache.
|
||||||
|
struct CacheEntry {
|
||||||
|
index: SamplerIndex,
|
||||||
|
ref_count: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Container for the mutable management state of the sampler heap.
|
||||||
|
///
|
||||||
|
/// We have this separated, using interior mutability, to allow for the outside world
|
||||||
|
/// to access the heap directly without needing to take the lock.
|
||||||
|
pub(crate) struct SamplerHeapState {
|
||||||
|
/// Mapping from the sampler description to the index within the heap and the refcount.
|
||||||
|
mapping: HashMap<HashableSamplerDesc, CacheEntry>,
|
||||||
|
/// List of free sampler indices.
|
||||||
|
freelist: Vec<SamplerIndex>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Global sampler heap for the device.
|
||||||
|
///
|
||||||
|
/// As D3D12 only allows 2048 samplers to be in a single heap, we need to cache
|
||||||
|
/// samplers aggressively and refer to them in shaders by index.
|
||||||
|
pub(crate) struct SamplerHeap {
|
||||||
|
/// Mutable management state of the sampler heap.
|
||||||
|
state: Mutex<SamplerHeapState>,
|
||||||
|
|
||||||
|
/// The heap itself.
|
||||||
|
heap: ID3D12DescriptorHeap,
|
||||||
|
/// The CPU-side handle to the first descriptor in the heap.
|
||||||
|
///
|
||||||
|
/// Both the CPU and GPU handles point to the same descriptor, just in
|
||||||
|
/// different contexts.
|
||||||
|
heap_cpu_start_handle: D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||||
|
/// The GPU-side handle to the first descriptor in the heap.
|
||||||
|
///
|
||||||
|
/// Both the CPU and GPU handles point to the same descriptor, just in
|
||||||
|
/// different contexts.
|
||||||
|
heap_gpu_start_handle: D3D12_GPU_DESCRIPTOR_HANDLE,
|
||||||
|
|
||||||
|
/// This is the device-specific size of sampler descriptors.
|
||||||
|
descriptor_stride: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SamplerHeap {
|
||||||
|
pub fn new(
|
||||||
|
device: &ID3D12Device,
|
||||||
|
private_caps: &super::PrivateCapabilities,
|
||||||
|
) -> Result<Self, crate::DeviceError> {
|
||||||
|
profiling::scope!("SamplerHeap::new");
|
||||||
|
|
||||||
|
// WARP can report this as 2M or more. We clamp it to 64k to be safe.
|
||||||
|
const SAMPLER_HEAP_SIZE_CLAMP: u32 = 64 * 1024;
|
||||||
|
|
||||||
|
let max_unique_samplers = private_caps
|
||||||
|
.max_sampler_descriptor_heap_size
|
||||||
|
.min(SAMPLER_HEAP_SIZE_CLAMP);
|
||||||
|
|
||||||
|
let desc = D3D12_DESCRIPTOR_HEAP_DESC {
|
||||||
|
Type: D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
||||||
|
NumDescriptors: max_unique_samplers,
|
||||||
|
Flags: D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
||||||
|
NodeMask: 0,
|
||||||
|
};
|
||||||
|
let heap = unsafe { device.CreateDescriptorHeap::<ID3D12DescriptorHeap>(&desc) }
|
||||||
|
.into_device_result("Failed to create global GPU-Visible Sampler Descriptor Heap")?;
|
||||||
|
|
||||||
|
let heap_cpu_start_handle = unsafe { heap.GetCPUDescriptorHandleForHeapStart() };
|
||||||
|
let heap_gpu_start_handle = unsafe { heap.GetGPUDescriptorHandleForHeapStart() };
|
||||||
|
|
||||||
|
let descriptor_stride =
|
||||||
|
unsafe { device.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) };
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
state: Mutex::new(SamplerHeapState {
|
||||||
|
mapping: HashMap::new(),
|
||||||
|
// Reverse so that samplers get allocated starting from zero.
|
||||||
|
freelist: (0..max_unique_samplers).map(SamplerIndex).rev().collect(),
|
||||||
|
}),
|
||||||
|
heap,
|
||||||
|
heap_cpu_start_handle,
|
||||||
|
heap_gpu_start_handle,
|
||||||
|
descriptor_stride,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a reference to the raw descriptor heap.
|
||||||
|
pub fn heap(&self) -> &ID3D12DescriptorHeap {
|
||||||
|
&self.heap
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a reference the handle to be bound to the descriptor table.
|
||||||
|
pub fn gpu_descriptor_table(&self) -> D3D12_GPU_DESCRIPTOR_HANDLE {
|
||||||
|
self.heap_gpu_start_handle
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a sampler with the given description to the heap.
|
||||||
|
///
|
||||||
|
/// If the sampler already exists, the refcount is incremented and the existing index is returned.
|
||||||
|
///
|
||||||
|
/// If the sampler does not exist, a new sampler is created and the index is returned.
|
||||||
|
///
|
||||||
|
/// If the heap is full, an error is returned.
|
||||||
|
pub fn create_sampler(
|
||||||
|
&self,
|
||||||
|
device: &ID3D12Device,
|
||||||
|
desc: D3D12_SAMPLER_DESC,
|
||||||
|
) -> Result<SamplerIndex, crate::DeviceError> {
|
||||||
|
profiling::scope!("SamplerHeap::create_sampler");
|
||||||
|
|
||||||
|
let hashable_desc = HashableSamplerDesc(desc);
|
||||||
|
|
||||||
|
// Eagarly dereference the lock to allow split borrows.
|
||||||
|
let state = &mut *self.state.lock();
|
||||||
|
|
||||||
|
// Lookup the sampler in the mapping.
|
||||||
|
match state.mapping.entry(hashable_desc) {
|
||||||
|
Entry::Occupied(occupied_entry) => {
|
||||||
|
// We have found a match, so increment the refcount and return the index.
|
||||||
|
let entry = occupied_entry.into_mut();
|
||||||
|
entry.ref_count += 1;
|
||||||
|
Ok(entry.index)
|
||||||
|
}
|
||||||
|
Entry::Vacant(vacant_entry) => {
|
||||||
|
// We need to create a new sampler.
|
||||||
|
|
||||||
|
// Try to get a new index from the freelist.
|
||||||
|
let Some(index) = state.freelist.pop() else {
|
||||||
|
// If the freelist is empty, we have hit the maximum number of samplers.
|
||||||
|
log::error!("There is no more room in the global sampler heap for more unique samplers. Your device supports a maximum of {} unique samplers.", state.mapping.len());
|
||||||
|
return Err(crate::DeviceError::OutOfMemory);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Compute the CPU side handle for the new sampler.
|
||||||
|
let handle = D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||||
|
ptr: self.heap_cpu_start_handle.ptr
|
||||||
|
+ self.descriptor_stride as usize * index.0 as usize,
|
||||||
|
};
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
device.CreateSampler(&desc, handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert the new sampler into the mapping.
|
||||||
|
vacant_entry.insert(CacheEntry {
|
||||||
|
index,
|
||||||
|
ref_count: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(index)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decrement the refcount of the sampler with the given description.
|
||||||
|
///
|
||||||
|
/// If the refcount reaches zero, the sampler is destroyed and the index is returned to the freelist.
|
||||||
|
///
|
||||||
|
/// The provided index is checked against the index of the sampler with the given description, ensuring
|
||||||
|
/// that there isn't a clerical error from the caller.
|
||||||
|
pub fn destroy_sampler(&self, desc: D3D12_SAMPLER_DESC, provided_index: SamplerIndex) {
|
||||||
|
profiling::scope!("SamplerHeap::destroy_sampler");
|
||||||
|
|
||||||
|
// Eagarly dereference the lock to allow split borrows.
|
||||||
|
let state = &mut *self.state.lock();
|
||||||
|
|
||||||
|
// Get the index of the sampler to destroy.
|
||||||
|
let Entry::Occupied(mut hash_map_entry) = state.mapping.entry(HashableSamplerDesc(desc))
|
||||||
|
else {
|
||||||
|
log::error!(
|
||||||
|
"Tried to destroy a sampler that doesn't exist. Sampler description: {:#?}",
|
||||||
|
desc
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
let cache_entry = hash_map_entry.get_mut();
|
||||||
|
|
||||||
|
// Ensure that the provided index matches the index of the sampler to destroy.
|
||||||
|
assert_eq!(
|
||||||
|
cache_entry.index, provided_index,
|
||||||
|
"Mismatched sampler index, this is an implementation bug"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Decrement the refcount of the sampler.
|
||||||
|
cache_entry.ref_count -= 1;
|
||||||
|
|
||||||
|
// If we are the last reference, remove the sampler from the mapping and return the index to the freelist.
|
||||||
|
//
|
||||||
|
// As samplers only exist as descriptors in the heap, there is nothing needed to be done to destroy the sampler.
|
||||||
|
if cache_entry.ref_count == 0 {
|
||||||
|
state.freelist.push(cache_entry.index);
|
||||||
|
hash_map_entry.remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -244,7 +244,7 @@ impl super::Device {
|
|||||||
}
|
}
|
||||||
naga::AddressSpace::Uniform | naga::AddressSpace::Storage { .. } => {
|
naga::AddressSpace::Uniform | naga::AddressSpace::Storage { .. } => {
|
||||||
let br = match var.binding {
|
let br = match var.binding {
|
||||||
Some(ref br) => br.clone(),
|
Some(br) => br,
|
||||||
None => continue,
|
None => continue,
|
||||||
};
|
};
|
||||||
let storage_access_store = match var.space {
|
let storage_access_store = match var.space {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user