LoadOp::DontCare (#8549)

This commit is contained in:
Connor Fitzgerald 2025-12-07 22:41:38 -05:00 committed by GitHub
parent b9821cacd1
commit e3149bac9e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 269 additions and 37 deletions

View File

@ -61,6 +61,23 @@ By @R-Cramer4 in [#8230](https://github.com/gfx-rs/wgpu/pull/8230)
[kek]: https://web.archive.org/web/20250923122958/https://knowyourmeme.com/memes/kek
#### New `LoadOp::DontCare`
In the case where a renderpass unconditionally writes to all pixels in the rendertarget,
`Load` can cause unnecessary memory traffic, and `Clear` can spend time unnecessarily
clearing the rendertargets. `DontCare` is a new `LoadOp` which will leave the contents
of the rendertarget undefined. Because this could lead to undefined behavior, this API
requires that the user gives an unsafe token to use the api.
While you can use this unconditionally, on platforms where `DontCare` is not available,
it will internally use a different load op.
```rust
load: LoadOp::DontCare(unsafe { wgpu::LoadOpDontCare::enabled() })
```
By @cwfitzgerald in [#8549](https://github.com/gfx-rs/wgpu/pull/8549)
#### `MipmapFilterMode` is split from `FilterMode`
This is a breaking change that aligns wgpu with spec.

View File

@ -41,6 +41,7 @@ mod multiview;
mod occlusion_query;
mod oob_indexing;
mod oom;
mod pass_ops;
mod pipeline;
mod pipeline_cache;
mod planar_texture;
@ -104,6 +105,7 @@ fn all_tests() -> Vec<wgpu_test::GpuTestInitializer> {
occlusion_query::all_tests(&mut tests);
oob_indexing::all_tests(&mut tests);
oom::all_tests(&mut tests);
pass_ops::all_tests(&mut tests);
pipeline_cache::all_tests(&mut tests);
pipeline::all_tests(&mut tests);
planar_texture::all_tests(&mut tests);

View File

@ -0,0 +1,111 @@
use wgpu_test::{
gpu_test, image::ReadbackBuffers, GpuTestConfiguration, GpuTestInitializer, TestParameters,
TestingContext,
};
pub fn all_tests(vec: &mut Vec<GpuTestInitializer>) {
vec.push(DONT_CARE);
}
#[gpu_test]
static DONT_CARE: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default())
.run_async(run_test);
async fn run_test(ctx: TestingContext) {
let shader_src = "
const triangles = array<vec2f, 3>(vec2f(-1.0, -1.0), vec2f(3.0, -1.0), vec2f(-1.0, 3.0));
@vertex
fn vs_main(@builtin(vertex_index) vertex_index: u32) -> @builtin(position) vec4f {
return vec4f(triangles[vertex_index], 0.0, 1.0);
}
@fragment
fn fs_main() -> @location(0) vec4f {
return vec4f(127.0 / 255.0);
}
";
let shader = ctx
.device
.create_shader_module(wgpu::ShaderModuleDescriptor {
label: None,
source: wgpu::ShaderSource::Wgsl(shader_src.into()),
});
let pipeline_desc = wgpu::RenderPipelineDescriptor {
label: None,
layout: None,
vertex: wgpu::VertexState {
buffers: &[],
module: &shader,
entry_point: Some("vs_main"),
compilation_options: Default::default(),
},
primitive: wgpu::PrimitiveState::default(),
depth_stencil: None,
multisample: wgpu::MultisampleState::default(),
fragment: Some(wgpu::FragmentState {
module: &shader,
entry_point: Some("fs_main"),
compilation_options: Default::default(),
targets: &[Some(wgpu::ColorTargetState {
format: wgpu::TextureFormat::Rgba8Unorm,
blend: None,
write_mask: wgpu::ColorWrites::ALL,
})],
}),
multiview_mask: None,
cache: None,
};
let pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
let out_texture = ctx.device.create_texture(&wgpu::TextureDescriptor {
label: None,
size: wgpu::Extent3d {
width: 1,
height: 1,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8Unorm,
usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC,
view_formats: &[],
});
let readbacks = ReadbackBuffers::new(&ctx.device, &out_texture);
let mut encoder = ctx
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: None,
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: &out_texture.create_view(&wgpu::TextureViewDescriptor::default()),
depth_slice: None,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::DontCare(unsafe { wgpu::LoadOpDontCare::enabled() }),
store: wgpu::StoreOp::Store,
},
})],
..Default::default()
});
rpass.set_pipeline(&pipeline);
rpass.draw(0..3, 0..1);
drop(rpass);
readbacks.copy_from(&ctx.device, &mut encoder, &out_texture);
ctx.queue.submit([encoder.finish()]);
// Assert that DONT_CARE load op was fully overridden by the draw.
readbacks
.assert_buffer_contents(&ctx, &[127, 127, 127, 127])
.await;
}

View File

@ -500,7 +500,7 @@ fn clear_texture_via_render_passes(
},
depth_slice: None,
resolve_target: None,
ops: hal::AttachmentOps::STORE,
ops: hal::AttachmentOps::STORE | hal::AttachmentOps::LOAD_CLEAR,
clear_value: wgt::Color::TRANSPARENT,
})];
(&color_attachments_tmp[..], None)
@ -517,8 +517,8 @@ fn clear_texture_via_render_passes(
),
usage: wgt::TextureUses::DEPTH_STENCIL_WRITE,
},
depth_ops: hal::AttachmentOps::STORE,
stencil_ops: hal::AttachmentOps::STORE,
depth_ops: hal::AttachmentOps::STORE | hal::AttachmentOps::LOAD_CLEAR,
stencil_ops: hal::AttachmentOps::STORE | hal::AttachmentOps::LOAD_CLEAR,
clear_value: (0.0, 0),
}),
)

View File

@ -65,14 +65,15 @@ pub use wgt::{LoadOp, StoreOp};
fn load_hal_ops<V>(load: LoadOp<V>) -> hal::AttachmentOps {
match load {
LoadOp::Load => hal::AttachmentOps::LOAD,
LoadOp::Clear(_) => hal::AttachmentOps::empty(),
LoadOp::Clear(_) => hal::AttachmentOps::LOAD_CLEAR,
LoadOp::DontCare(_) => hal::AttachmentOps::LOAD_DONT_CARE,
}
}
fn store_hal_ops(store: StoreOp) -> hal::AttachmentOps {
match store {
StoreOp::Store => hal::AttachmentOps::STORE,
StoreOp::Discard => hal::AttachmentOps::empty(),
StoreOp::Discard => hal::AttachmentOps::STORE_DISCARD,
}
}
@ -115,6 +116,7 @@ impl<V: Copy + Default> PassChannel<Option<V>> {
Ok(ResolvedPassChannel::Operational(wgt::Operations {
load: match self.load_op.ok_or(AttachmentError::NoLoad)? {
LoadOp::Clear(clear_value) => LoadOp::Clear(handle_clear(clear_value)?),
LoadOp::DontCare(token) => LoadOp::DontCare(token),
LoadOp::Load => LoadOp::Load,
},
store: self.store_op.ok_or(AttachmentError::NoStore)?,
@ -204,7 +206,7 @@ impl ArcRenderPassColorAttachment {
fn clear_value(&self) -> Color {
match self.load_op {
LoadOp::Clear(clear_value) => clear_value,
LoadOp::Load => Color::default(),
LoadOp::DontCare(_) | LoadOp::Load => Color::default(),
}
}
}
@ -1552,13 +1554,13 @@ impl RenderPassInfo {
if let Some((aspect, view)) = self.divergent_discarded_depth_stencil_aspect {
let (depth_ops, stencil_ops) = if aspect == wgt::TextureAspect::DepthOnly {
(
hal::AttachmentOps::STORE, // clear depth
hal::AttachmentOps::LOAD | hal::AttachmentOps::STORE, // unchanged stencil
hal::AttachmentOps::LOAD_CLEAR | hal::AttachmentOps::STORE, // clear depth
hal::AttachmentOps::LOAD | hal::AttachmentOps::STORE, // unchanged stencil
)
} else {
(
hal::AttachmentOps::LOAD | hal::AttachmentOps::STORE, // unchanged stencil
hal::AttachmentOps::STORE, // clear depth
hal::AttachmentOps::LOAD_CLEAR | hal::AttachmentOps::STORE, // clear depth
)
};
let desc = hal::RenderPassDescriptor::<'_, _, dyn hal::DynTextureView> {

View File

@ -718,7 +718,7 @@ impl<A: hal::Api> Example<A> {
},
depth_slice: None,
resolve_target: None,
ops: hal::AttachmentOps::STORE,
ops: hal::AttachmentOps::STORE | hal::AttachmentOps::LOAD_CLEAR,
clear_value: wgpu_types::Color {
r: 0.1,
g: 0.2,

View File

@ -325,7 +325,7 @@ fn fill_screen(exposed: &hal::ExposedAdapter<hal::api::Gles>, width: u32, height
},
depth_slice: None,
resolve_target: None,
ops: hal::AttachmentOps::STORE,
ops: hal::AttachmentOps::STORE | hal::AttachmentOps::LOAD_CLEAR,
clear_value: wgpu_types::Color::BLUE,
})],
depth_stencil_attachment: None,

View File

@ -964,7 +964,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
self.pass.resolves.clear();
for (rtv, cat) in color_views.iter().zip(desc.color_attachments.iter()) {
if let Some(cat) = cat.as_ref() {
if !cat.ops.contains(crate::AttachmentOps::LOAD) {
if cat.ops.contains(crate::AttachmentOps::LOAD_CLEAR) {
let value = [
cat.clear_value.r as f32,
cat.clear_value.g as f32,
@ -989,12 +989,12 @@ impl crate::CommandEncoder for super::CommandEncoder {
if let Some(ref ds) = desc.depth_stencil_attachment {
let mut flags = Direct3D12::D3D12_CLEAR_FLAGS::default();
let aspects = ds.target.view.aspects;
if !ds.depth_ops.contains(crate::AttachmentOps::LOAD)
if ds.depth_ops.contains(crate::AttachmentOps::LOAD_CLEAR)
&& aspects.contains(crate::FormatAspects::DEPTH)
{
flags |= Direct3D12::D3D12_CLEAR_FLAG_DEPTH;
}
if !ds.stencil_ops.contains(crate::AttachmentOps::LOAD)
if ds.stencil_ops.contains(crate::AttachmentOps::LOAD_CLEAR)
&& aspects.contains(crate::FormatAspects::STENCIL)
{
flags |= Direct3D12::D3D12_CLEAR_FLAG_STENCIL;

View File

@ -567,7 +567,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
.resolve_attachments
.push((attachment, rat.view.clone()));
}
if !cat.ops.contains(crate::AttachmentOps::STORE) {
if cat.ops.contains(crate::AttachmentOps::STORE_DISCARD) {
self.state.invalidate_attachments.push(attachment);
}
}
@ -585,14 +585,16 @@ impl crate::CommandEncoder for super::CommandEncoder {
depth_slice: None,
});
if aspects.contains(crate::FormatAspects::DEPTH)
&& !dsat.depth_ops.contains(crate::AttachmentOps::STORE)
&& dsat.depth_ops.contains(crate::AttachmentOps::STORE_DISCARD)
{
self.state
.invalidate_attachments
.push(glow::DEPTH_ATTACHMENT);
}
if aspects.contains(crate::FormatAspects::STENCIL)
&& !dsat.stencil_ops.contains(crate::AttachmentOps::STORE)
&& dsat
.stencil_ops
.contains(crate::AttachmentOps::STORE_DISCARD)
{
self.state
.invalidate_attachments
@ -628,7 +630,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
.filter_map(|at| at.as_ref())
.enumerate()
{
if !cat.ops.contains(crate::AttachmentOps::LOAD) {
if cat.ops.contains(crate::AttachmentOps::LOAD_CLEAR) {
let c = &cat.clear_value;
self.cmd_buffer.commands.push(
match cat.target.view.format.sample_type(None, None).unwrap() {
@ -652,8 +654,8 @@ impl crate::CommandEncoder for super::CommandEncoder {
}
if let Some(ref dsat) = desc.depth_stencil_attachment {
let clear_depth = !dsat.depth_ops.contains(crate::AttachmentOps::LOAD);
let clear_stencil = !dsat.stencil_ops.contains(crate::AttachmentOps::LOAD);
let clear_depth = dsat.depth_ops.contains(crate::AttachmentOps::LOAD_CLEAR);
let clear_stencil = dsat.stencil_ops.contains(crate::AttachmentOps::LOAD_CLEAR);
if clear_depth && clear_stencil {
self.cmd_buffer.commands.push(C::ClearDepthAndStencil(

View File

@ -1751,13 +1751,22 @@ bitflags!(
}
);
//TODO: it's not intuitive for the backends to consider `LOAD` being optional.
bitflags!(
/// Attachment load and store operations.
///
/// There must be at least one flag from the LOAD group and one from the STORE group set.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct AttachmentOps: u8 {
/// Load the existing contents of the attachment.
const LOAD = 1 << 0;
const STORE = 1 << 1;
/// Clear the attachment to a specified value.
const LOAD_CLEAR = 1 << 1;
/// The contents of the attachment are undefined.
const LOAD_DONT_CARE = 1 << 2;
/// Store the contents of the attachment.
const STORE = 1 << 3;
/// The contents of the attachment are undefined after the pass.
const STORE_DISCARD = 1 << 4;
}
);

View File

@ -670,9 +670,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
}
let load_action = if at.ops.contains(crate::AttachmentOps::LOAD) {
MTLLoadAction::Load
} else {
} else if at.ops.contains(crate::AttachmentOps::LOAD_DONT_CARE) {
MTLLoadAction::DontCare
} else if at.ops.contains(crate::AttachmentOps::LOAD_CLEAR) {
at_descriptor.set_clear_color(conv::map_clear_color(&at.clear_value));
MTLLoadAction::Clear
} else {
unreachable!()
};
let store_action = conv::map_store_action(
at.ops.contains(crate::AttachmentOps::STORE),
@ -690,9 +694,13 @@ impl crate::CommandEncoder for super::CommandEncoder {
let load_action = if at.depth_ops.contains(crate::AttachmentOps::LOAD) {
MTLLoadAction::Load
} else {
} else if at.depth_ops.contains(crate::AttachmentOps::LOAD_DONT_CARE) {
MTLLoadAction::DontCare
} else if at.depth_ops.contains(crate::AttachmentOps::LOAD_CLEAR) {
at_descriptor.set_clear_depth(at.clear_value.0 as f64);
MTLLoadAction::Clear
} else {
unreachable!();
};
let store_action = if at.depth_ops.contains(crate::AttachmentOps::STORE) {
MTLStoreAction::Store
@ -713,9 +721,16 @@ impl crate::CommandEncoder for super::CommandEncoder {
let load_action = if at.stencil_ops.contains(crate::AttachmentOps::LOAD) {
MTLLoadAction::Load
} else {
} else if at
.stencil_ops
.contains(crate::AttachmentOps::LOAD_DONT_CARE)
{
MTLLoadAction::DontCare
} else if at.stencil_ops.contains(crate::AttachmentOps::LOAD_CLEAR) {
at_descriptor.set_clear_stencil(at.clear_value.1);
MTLLoadAction::Clear
} else {
unreachable!()
};
let store_action = if at.stencil_ops.contains(crate::AttachmentOps::STORE) {
MTLStoreAction::Store

View File

@ -813,10 +813,11 @@ impl crate::CommandEncoder for super::CommandEncoder {
});
let color = super::ColorAttachmentKey {
base: cat.target.make_attachment_key(cat.ops),
resolve: cat
.resolve_target
.as_ref()
.map(|target| target.make_attachment_key(crate::AttachmentOps::STORE)),
resolve: cat.resolve_target.as_ref().map(|target| {
target.make_attachment_key(
crate::AttachmentOps::LOAD_CLEAR | crate::AttachmentOps::STORE,
)
}),
};
rp_key.colors.push(Some(color));

View File

@ -452,13 +452,19 @@ pub fn map_attachment_ops(
) -> (vk::AttachmentLoadOp, vk::AttachmentStoreOp) {
let load_op = if op.contains(crate::AttachmentOps::LOAD) {
vk::AttachmentLoadOp::LOAD
} else {
} else if op.contains(crate::AttachmentOps::LOAD_DONT_CARE) {
vk::AttachmentLoadOp::DONT_CARE
} else if op.contains(crate::AttachmentOps::LOAD_CLEAR) {
vk::AttachmentLoadOp::CLEAR
} else {
unreachable!()
};
let store_op = if op.contains(crate::AttachmentOps::STORE) {
vk::AttachmentStoreOp::STORE
} else {
} else if op.contains(crate::AttachmentOps::STORE_DISCARD) {
vk::AttachmentStoreOp::DONT_CARE
} else {
unreachable!()
};
(load_op, store_op)
}

View File

@ -5,7 +5,7 @@ use bytemuck::{Pod, Zeroable};
#[cfg(any(feature = "serde", test))]
use serde::{Deserialize, Serialize};
use crate::link_to_wgpu_docs;
use crate::{link_to_wgpu_docs, LoadOpDontCare};
#[cfg(doc)]
use crate::{Features, TextureFormat};
@ -697,6 +697,24 @@ pub enum LoadOp<V> {
Clear(V) = 0,
/// Loads the existing value for this attachment into the render pass.
Load = 1,
/// The render target has undefined contents at the start of the render pass.
/// This may lead to undefined behavior if you read from the any of the
/// render target pixels without first writing to them.
///
/// Blending also becomes undefined behavior if the source
/// pixels are undefined.
///
/// This is the fastest option on all GPUs if you always overwrite all pixels
/// in the render target after this load operation.
///
/// Backends that don't support `DontCare` internally, will pick a different (unspecified)
/// load op instead.
///
/// # Safety
///
/// - All pixels in the render target must be written to before
/// any read or a [`StoreOp::Store`] occurs.
DontCare(#[cfg_attr(feature = "serde", serde(skip))] LoadOpDontCare) = 2,
}
impl<V> LoadOp<V> {
@ -704,7 +722,9 @@ impl<V> LoadOp<V> {
pub fn eq_variant<T>(&self, other: LoadOp<T>) -> bool {
matches!(
(self, other),
(LoadOp::Clear(_), LoadOp::Clear(_)) | (LoadOp::Load, LoadOp::Load)
(LoadOp::Clear(_), LoadOp::Clear(_))
| (LoadOp::Load, LoadOp::Load)
| (LoadOp::DontCare(_), LoadOp::DontCare(_))
)
}
}

View File

@ -43,3 +43,32 @@ impl ExperimentalFeatures {
self.enabled
}
}
/// Token of the user agreeing to use [`LoadOp::DontCare`](crate::LoadOp::DontCare).
#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq)]
pub struct LoadOpDontCare {
// Private to prevent construction outside of the unsafe
// enabled() function.
_private: (),
}
impl LoadOpDontCare {
/// Using [`LoadOp::DontCare`](crate::LoadOp::DontCare) will result
/// in the render target having undefined contents at the start of the render pass.
/// This may lead to undefined behavior if you read from the any of the
/// render target pixels without first writing to them.
///
/// Blending also becomes undefined behavior if the source
/// pixels are undefined.
///
/// All pixels in the render target must be written to before
/// any blending or a [`StoreOp::Store`](crate::StoreOp::Store) occurs.
///
/// # Safety
///
/// - You acknowledge that using `LoadOp::DontCare` may lead to undefined behavior
/// if the above conditions are not met.
pub const unsafe fn enabled() -> Self {
Self { _private: () }
}
}

View File

@ -3072,6 +3072,13 @@ impl dispatch::CommandEncoderInterface for WebCommandEncoder {
clear_value = Some(wasm_bindgen::JsValue::from(map_color(color)));
webgpu_sys::GpuLoadOp::Clear
}
crate::LoadOp::DontCare(_token) => {
// WebGPU can't safely have a ClearOp::DontCare, so we clear to black
// which is ideal for most GPUs.
clear_value =
Some(wasm_bindgen::JsValue::from(map_color(crate::Color::BLACK)));
webgpu_sys::GpuLoadOp::Clear
}
crate::LoadOp::Load => webgpu_sys::GpuLoadOp::Load,
};
@ -3113,6 +3120,11 @@ impl dispatch::CommandEncoderInterface for WebCommandEncoder {
mapped_depth_stencil_attachment.set_depth_clear_value(v);
webgpu_sys::GpuLoadOp::Clear
}
crate::LoadOp::DontCare(_token) => {
// WebGPU can't safely have a ClearOp::DontCare, so we clear to 1.0
mapped_depth_stencil_attachment.set_depth_clear_value(1.0);
webgpu_sys::GpuLoadOp::Clear
}
crate::LoadOp::Load => webgpu_sys::GpuLoadOp::Load,
};
mapped_depth_stencil_attachment.set_depth_load_op(load_op);
@ -3125,6 +3137,11 @@ impl dispatch::CommandEncoderInterface for WebCommandEncoder {
mapped_depth_stencil_attachment.set_stencil_clear_value(v);
webgpu_sys::GpuLoadOp::Clear
}
crate::LoadOp::DontCare(_token) => {
// WebGPU can't safely have a ClearOp::DontCare, so we clear to 0
mapped_depth_stencil_attachment.set_stencil_clear_value(0);
webgpu_sys::GpuLoadOp::Clear
}
crate::LoadOp::Load => webgpu_sys::GpuLoadOp::Load,
};
mapped_depth_stencil_attachment.set_stencil_load_op(load_op);

View File

@ -424,8 +424,9 @@ fn map_texture_tagged_copy_view(
}
fn map_load_op<V: Copy>(load: &LoadOp<V>) -> LoadOp<Option<V>> {
match load {
LoadOp::Clear(clear_value) => LoadOp::Clear(Some(*clear_value)),
match *load {
LoadOp::Clear(clear_value) => LoadOp::Clear(Some(clear_value)),
LoadOp::DontCare(token) => LoadOp::DontCare(token),
LoadOp::Load => LoadOp::Load,
}
}

View File

@ -139,7 +139,7 @@ pub use wgt::{
ExperimentalFeatures, Extent3d, ExternalTextureFormat, ExternalTextureTransferFunction, Face,
Features, FeaturesWGPU, FeaturesWebGPU, FilterMode, FrontFace, GlBackendOptions,
GlFenceBehavior, Gles3MinorVersion, HalCounters, ImageSubresourceRange, ImmediateRange,
IndexFormat, InstanceDescriptor, InstanceFlags, InternalCounters, Limits,
IndexFormat, InstanceDescriptor, InstanceFlags, InternalCounters, Limits, LoadOpDontCare,
MemoryBudgetThresholds, MemoryHints, MipmapFilterMode, MultisampleState, NoopBackendOptions,
Origin2d, Origin3d, PipelineStatisticsTypes, PollError, PollStatus, PolygonMode,
PowerPreference, PredefinedColorSpace, PresentMode, PresentationTimestamp, PrimitiveState,