Fix performance regression in bind group processing (#8519)

This commit is contained in:
Andy Leiserson 2025-11-13 21:10:42 -08:00 committed by GitHub
parent 91c1ce5af3
commit 71820eef20
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 133 additions and 75 deletions

View File

@ -431,7 +431,7 @@ impl Binder {
pub(super) fn entries(
&self,
range: Range<usize>,
) -> impl ExactSizeIterator<Item = (usize, &'_ EntryPayload)> + '_ {
) -> impl ExactSizeIterator<Item = (usize, &'_ EntryPayload)> + Clone + '_ {
let payloads = &self.payloads[range.clone()];
zip(range, payloads)
}

View File

@ -8,8 +8,10 @@ use alloc::{borrow::Cow, boxed::Box, sync::Arc, vec::Vec};
use core::{convert::Infallible, fmt, str};
use crate::{
api_log, binding_model::BindError, command::pass::flush_bindings_helper,
resource::RawResourceAccess,
api_log,
binding_model::BindError,
command::pass::flush_bindings_helper,
resource::{RawResourceAccess, Trackable},
};
use crate::{
binding_model::{LateMinBufferBindingSizeMismatch, PushConstantUploadError},
@ -30,7 +32,7 @@ use crate::{
resource::{
self, Buffer, InvalidResourceError, Labeled, MissingBufferUsageError, ParentDevice,
},
track::{ResourceUsageCompatibilityError, Tracker, TrackerIndex},
track::{ResourceUsageCompatibilityError, Tracker},
Label,
};
use crate::{command::InnerCommandEncoder, resource::DestroyedResourceError};
@ -306,45 +308,65 @@ impl<'scope, 'snatch_guard, 'cmd_enc> State<'scope, 'snatch_guard, 'cmd_enc> {
///
/// # Indirect buffer handling
///
/// For indirect dispatches without validation, pass both `indirect_buffer`
/// and `indirect_buffer_index_if_not_validating`. The indirect buffer will
/// be added to the usage scope and the tracker.
/// The `indirect_buffer` argument should be passed for any indirect
/// dispatch (with or without validation). It will be checked for
/// conflicting usages according to WebGPU rules. For the purpose of
/// these rules, the fact that we have actually processed the buffer in
/// the validation pass is an implementation detail.
///
/// For indirect dispatches with validation, pass only `indirect_buffer`.
/// The indirect buffer will be added to the usage scope to detect usage
/// conflicts. The indirect buffer does not need to be added to the tracker;
/// the indirect validation code handles transitions manually.
/// The `track_indirect_buffer` argument should be set when doing indirect
/// dispatch *without* validation. In this case, the indirect buffer will
/// be added to the tracker in order to generate any necessary transitions
/// for that usage.
///
/// When doing indirect dispatch *with* validation, the indirect buffer is
/// processed by the validation pass and is not used by the actual dispatch.
/// The indirect validation code handles transitions for the validation
/// pass.
fn flush_bindings(
&mut self,
indirect_buffer: Option<&Arc<Buffer>>,
indirect_buffer_index_if_not_validating: Option<TrackerIndex>,
track_indirect_buffer: bool,
) -> Result<(), ComputePassErrorInner> {
let mut scope = self.pass.base.device.new_usage_scope();
for bind_group in self.pass.binder.list_active() {
unsafe { scope.merge_bind_group(&bind_group.used)? };
unsafe { self.pass.scope.merge_bind_group(&bind_group.used)? };
}
// When indirect validation is turned on, our actual use of the buffer
// is `STORAGE_READ_ONLY`, but for usage scope validation, we still want
// to treat it as indirect so we can detect the conflicts prescribed by
// WebGPU. The usage scope we construct here never leaves this function
// (and is not used to populate a tracker), so it's fine to do this.
// Add the indirect buffer. Because usage scopes are per-dispatch, this
// is the only place where INDIRECT usage could be added, and it is safe
// for us to remove it below.
if let Some(buffer) = indirect_buffer {
scope
self.pass
.scope
.buffers
.merge_single(buffer, wgt::BufferUses::INDIRECT)?;
}
// Add the state of the indirect buffer, if needed (see above).
self.intermediate_trackers
.buffers
.set_multiple(&mut scope.buffers, indirect_buffer_index_if_not_validating);
// For compute, usage scopes are associated with each dispatch and not
// with the pass as a whole. However, because the cost of creating and
// dropping `UsageScope`s is significant (even with the pool), we
// add and then remove usage from a single usage scope.
flush_bindings_helper(&mut self.pass, |bind_group| {
for bind_group in self.pass.binder.list_active() {
self.intermediate_trackers
.set_from_bind_group(&mut scope, &bind_group.used)
})?;
.set_and_remove_from_usage_scope_sparse(&mut self.pass.scope, &bind_group.used);
}
if track_indirect_buffer {
self.intermediate_trackers
.buffers
.set_and_remove_from_usage_scope_sparse(
&mut self.pass.scope.buffers,
indirect_buffer.map(|buf| buf.tracker_index()),
);
} else if let Some(buffer) = indirect_buffer {
self.pass
.scope
.buffers
.remove_usage(buffer, wgt::BufferUses::INDIRECT);
}
flush_bindings_helper(&mut self.pass)?;
CommandEncoder::drain_barriers(
self.pass.base.raw_encoder,
@ -827,7 +849,7 @@ fn dispatch(state: &mut State, groups: [u32; 3]) -> Result<(), ComputePassErrorI
state.is_ready()?;
state.flush_bindings(None, None)?;
state.flush_bindings(None, false)?;
let groups_size_limit = state
.pass
@ -1028,7 +1050,7 @@ fn dispatch_indirect(
}]);
}
state.flush_bindings(Some(&buffer), None)?;
state.flush_bindings(Some(&buffer), false)?;
unsafe {
state
.pass
@ -1037,8 +1059,7 @@ fn dispatch_indirect(
.dispatch_indirect(params.dst_buffer, 0);
}
} else {
use crate::resource::Trackable;
state.flush_bindings(Some(&buffer), Some(buffer.tracker_index()))?;
state.flush_bindings(Some(&buffer), true)?;
let buf_raw = buffer.try_raw(state.pass.base.snatch_guard)?;
unsafe {

View File

@ -132,17 +132,20 @@ where
Ok(())
}
/// Helper for `flush_bindings` implementing the portions that are the same for
/// compute and render passes.
pub(super) fn flush_bindings_helper<F>(
state: &mut PassState,
mut f: F,
) -> Result<(), DestroyedResourceError>
where
F: FnMut(&Arc<BindGroup>),
{
for bind_group in state.binder.list_active() {
f(bind_group);
/// Implementation of `flush_bindings` for both compute and render passes.
///
/// See the compute pass version of `State::flush_bindings` for an explanation
/// of some differences in handling the two types of passes.
pub(super) fn flush_bindings_helper(state: &mut PassState) -> Result<(), DestroyedResourceError> {
let range = state.binder.take_rebind_range();
if range.is_empty() {
return Ok(());
}
let entries = state.binder.entries(range);
for (_, entry) in entries.clone() {
let bind_group = entry.group.as_ref().unwrap();
state.base.buffer_memory_init_actions.extend(
bind_group.used_buffer_ranges.iter().filter_map(|action| {
@ -171,25 +174,20 @@ where
state.base.as_actions.extend(used_resource);
}
let range = state.binder.take_rebind_range();
let entries = state.binder.entries(range);
match state.binder.pipeline_layout.as_ref() {
Some(pipeline_layout) if entries.len() != 0 => {
for (i, e) in entries {
if let Some(group) = e.group.as_ref() {
let raw_bg = group.try_raw(state.base.snatch_guard)?;
unsafe {
state.base.raw_encoder.set_bind_group(
pipeline_layout.raw(),
i as u32,
Some(raw_bg),
&e.dynamic_offsets,
);
}
if let Some(pipeline_layout) = state.binder.pipeline_layout.as_ref() {
for (i, e) in entries {
if let Some(group) = e.group.as_ref() {
let raw_bg = group.try_raw(state.base.snatch_guard)?;
unsafe {
state.base.raw_encoder.set_bind_group(
pipeline_layout.raw(),
i as u32,
Some(raw_bg),
&e.dynamic_offsets,
);
}
}
}
_ => {}
}
Ok(())

View File

@ -601,7 +601,7 @@ impl<'scope, 'snatch_guard, 'cmd_enc> State<'scope, 'snatch_guard, 'cmd_enc> {
/// See the compute pass version for an explanation of some ways that
/// `flush_bindings` differs between the two types of passes.
fn flush_bindings(&mut self) -> Result<(), RenderPassErrorInner> {
flush_bindings_helper(&mut self.pass, |_| {})?;
flush_bindings_helper(&mut self.pass)?;
Ok(())
}

View File

@ -1801,9 +1801,13 @@ fn validate_command_buffer(
}
}
}
// WebGPU requires that we check every bind group referenced during
// encoding, even ones that may have been replaced before being used.
// TODO(<https://github.com/gfx-rs/wgpu/issues/8510>): Optimize this.
{
profiling::scope!("bind groups");
for bind_group in &cmd_buf_data.trackers.bind_groups {
// This checks the bind group and all resources it references.
bind_group.try_raw(snatch_guard)?;
}
}

View File

@ -261,6 +261,22 @@ impl BufferUsageScope {
)
}
}
/// Removes the indicated usage from the scope.
///
/// Note that multiple uses of the same type get merged. It is only
/// safe to remove a usage if you are certain you aren't going to
/// erase another usage you don't know about.
pub fn remove_usage(&mut self, buffer: &Buffer, usage: BufferUses) {
let index = buffer.tracker_index().as_usize();
if self.metadata.contains(index) {
// SAFETY: If the buffer is part of this usage scope, then the index
// is in range.
unsafe {
*self.state.get_unchecked_mut(index) &= !usage;
}
}
}
}
/// Stores all buffer state within a command buffer.
@ -303,7 +319,7 @@ impl BufferTracker {
}
/// Extend the vectors to let the given index be valid.
pub fn allow_index(&mut self, index: usize) {
fn allow_index(&mut self, index: usize) {
if index >= self.start.len() {
self.set_size(index + 1);
}
@ -447,7 +463,7 @@ impl BufferTracker {
///
/// If a resource identified by `index_source` is not found in the usage
/// scope.
pub fn set_multiple(
pub fn set_and_remove_from_usage_scope_sparse(
&mut self,
scope: &mut BufferUsageScope,
index_source: impl IntoIterator<Item = TrackerIndex>,
@ -461,8 +477,9 @@ impl BufferTracker {
let index = index.as_usize();
scope.tracker_assert_in_bounds(index);
unsafe {
assert!(scope.metadata.contains_unchecked(index));
if unsafe { !scope.metadata.contains_unchecked(index) } {
continue;
}
// SAFETY: we checked that the index is in bounds for the scope, and
@ -479,6 +496,8 @@ impl BufferTracker {
},
)
};
unsafe { scope.metadata.remove(index) };
}
}

View File

@ -130,6 +130,14 @@ impl<T: Clone> ResourceMetadata<T> {
};
iterate_bitvec_indices(&self.owned)
}
/// Remove the resource with the given index from the set.
pub(super) unsafe fn remove(&mut self, index: usize) {
unsafe {
*self.resources.get_unchecked_mut(index) = None;
}
self.owned.set(index, false);
}
}
/// A source of resource metadata.

View File

@ -661,7 +661,8 @@ impl Tracker {
}
/// Iterates through all resources in the given bind group and adopts
/// the state given for those resources in the UsageScope.
/// the state given for those resources in the UsageScope. It also
/// removes all touched resources from the usage scope.
///
/// If a transition is needed to get the resources into the needed
/// state, those transitions are stored within the tracker. A
@ -669,8 +670,10 @@ impl Tracker {
/// [`TextureTracker::drain_transitions`] is needed to get those transitions.
///
/// This is a really funky method used by Compute Passes to generate
/// barriers for each dispatch. We use the bind group as a source of which
/// IDs to look at.
/// barriers after a call to dispatch without needing to iterate
/// over all elements in the usage scope. We use each the
/// bind group as a source of which IDs to look at. The bind groups
/// must have first been added to the usage scope.
///
/// Only stateful things are merged in here, all other resources are owned
/// indirectly by the bind group.
@ -678,12 +681,16 @@ impl Tracker {
/// # Panics
///
/// If a resource in the `bind_group` is not found in the usage scope.
pub fn set_from_bind_group(&mut self, scope: &mut UsageScope, bind_group: &BindGroupStates) {
self.buffers.set_multiple(
pub fn set_and_remove_from_usage_scope_sparse(
&mut self,
scope: &mut UsageScope,
bind_group: &BindGroupStates,
) {
self.buffers.set_and_remove_from_usage_scope_sparse(
&mut scope.buffers,
bind_group.buffers.used_tracker_indices(),
);
self.textures
.set_multiple(&mut scope.textures, &bind_group.views);
.set_and_remove_from_usage_scope_sparse(&mut scope.textures, &bind_group.views);
}
}

View File

@ -615,7 +615,7 @@ impl TextureTracker {
/// # Panics
///
/// If a resource in `bind_group_state` is not found in the usage scope.
pub fn set_multiple(
pub fn set_and_remove_from_usage_scope_sparse(
&mut self,
scope: &mut TextureUsageScope,
bind_group_state: &TextureViewBindGroupState,
@ -627,12 +627,11 @@ impl TextureTracker {
for (view, _) in bind_group_state.views.iter() {
let index = view.parent.tracker_index().as_usize();
scope.tracker_assert_in_bounds(index);
unsafe {
assert!(scope.metadata.contains_unchecked(index));
}
if unsafe { !scope.metadata.contains_unchecked(index) } {
continue;
}
let texture_selector = &view.parent.full_range;
// SAFETY: we checked that the index is in bounds for the scope, and
// called `set_size` to ensure it is valid for `self`.
@ -651,6 +650,8 @@ impl TextureTracker {
&mut self.temp,
)
};
unsafe { scope.metadata.remove(index) };
}
}
}