Move subgroup size info to AdapterInfo (#8609)

Co-authored-by: JMS55 <47158642+JMS55@users.noreply.github.com>
This commit is contained in:
Connor Fitzgerald 2025-12-04 12:01:56 -05:00 committed by GitHub
parent 57a6cc1514
commit 9a41de4e96
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 122 additions and 89 deletions

View File

@ -140,6 +140,23 @@ event happens. Our new log policy is as follows:
By @cwfitzgerald in [#8579](https://github.com/gfx-rs/wgpu/pull/8579).
#### `subgroup_{min,max}_size` renamed and moved from `Limits` -> `AdapterInfo`
To bring our code in line with the WebGPU spec, we have moved information about subgroup size
from limits to adapter info. Limits was not the correct place for this anyway, and we had some
code special casing those limits.
Additionally we have renamed the fields to match the spec.
```diff
- let min = limits.min_subgroup_size;
+ let min = info.subgroup_min_size;
- let max = limits.max_subgroup_size;
+ let max = info.subgroup_max_size;
```
By @cwfitzgerald in [#8609](https://github.com/gfx-rs/wgpu/pull/8609).
### New Features
- Added support for transient textures on Vulkan and Metal. By @opstic in [#8247](https://github.com/gfx-rs/wgpu/pull/8247)

View File

@ -80,13 +80,8 @@ impl GPUAdapter {
fn info(&self, scope: &mut v8::HandleScope) -> v8::Global<v8::Object> {
self.info.get(scope, |_| {
let info = self.instance.adapter_get_info(self.id);
let limits = self.instance.adapter_limits(self.id);
GPUAdapterInfo {
info,
subgroup_min_size: limits.min_subgroup_size,
subgroup_max_size: limits.max_subgroup_size,
}
GPUAdapterInfo { info }
})
}
@ -429,8 +424,6 @@ impl GPUSupportedFeatures {
pub struct GPUAdapterInfo {
pub info: wgpu_types::AdapterInfo,
pub subgroup_min_size: u32,
pub subgroup_max_size: u32,
}
impl GarbageCollected for GPUAdapterInfo {
@ -473,12 +466,12 @@ impl GPUAdapterInfo {
#[getter]
fn subgroup_min_size(&self) -> u32 {
self.subgroup_min_size
self.info.subgroup_min_size
}
#[getter]
fn subgroup_max_size(&self) -> u32 {
self.subgroup_max_size
self.info.subgroup_max_size
}
#[getter]

View File

@ -119,13 +119,8 @@ impl GPUDevice {
) -> v8::Global<v8::Object> {
self.adapter_info.get(scope, |_| {
let info = self.instance.adapter_get_info(self.adapter);
let limits = self.instance.adapter_limits(self.adapter);
GPUAdapterInfo {
info,
subgroup_min_size: limits.min_subgroup_size,
subgroup_max_size: limits.max_subgroup_size,
}
GPUAdapterInfo { info }
})
}

View File

@ -323,6 +323,8 @@ mod tests {
driver: String::new(),
driver_info: String::new(),
backend: wgt::Backend::Vulkan,
subgroup_min_size: 32,
subgroup_max_size: 32,
transient_saves_memory: true,
};

View File

@ -116,6 +116,15 @@ impl super::Adapter {
}
.unwrap();
let mut features1 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS1::default();
let hr = unsafe {
device.CheckFeatureSupport(
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS1,
<*mut _>::cast(&mut features1),
size_of_val(&features1) as u32,
)
};
let driver_version = unsafe { adapter.CheckInterfaceSupport(&Dxgi::IDXGIDevice::IID) }
.ok()
.map(|i| {
@ -156,6 +165,8 @@ impl super::Adapter {
driver_version.0, driver_version.1, driver_version.2, driver_version.3
),
driver_info: String::new(),
subgroup_min_size: features1.WaveLaneCountMin,
subgroup_max_size: features1.WaveLaneCountMax,
transient_saves_memory: false,
};
@ -483,15 +494,6 @@ impl super::Adapter {
};
features.set(wgt::Features::TEXTURE_FORMAT_P010, p010_format_supported);
let mut features1 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS1::default();
let hr = unsafe {
device.CheckFeatureSupport(
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS1,
<*mut _>::cast(&mut features1),
size_of_val(&features1) as u32,
)
};
features.set(
wgt::Features::SHADER_INT64,
shader_model >= naga::back::hlsl::ShaderModel::V6_0
@ -694,8 +696,6 @@ impl super::Adapter {
.min(crate::MAX_VERTEX_BUFFERS as u32),
max_vertex_attributes: Direct3D12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT,
max_vertex_buffer_array_stride: Direct3D12::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES,
min_subgroup_size: 4, // Not using `features1.WaveLaneCountMin` as it is unreliable
max_subgroup_size: 128,
// The immediates are part of the root signature which
// has a limit of 64 DWORDS (256 bytes), but other resources
// also share the root signature:

View File

@ -190,6 +190,8 @@ impl super::Adapter {
device_pci_bus_id: String::new(),
driver_info: version,
backend: wgt::Backend::Gl,
subgroup_min_size: wgt::MINIMUM_SUBGROUP_MIN_SIZE,
subgroup_max_size: wgt::MAXIMUM_SUBGROUP_MAX_SIZE,
transient_saves_memory: false,
}
}
@ -751,8 +753,6 @@ impl super::Adapter {
} else {
!0
},
min_subgroup_size: 0,
max_subgroup_size: 0,
max_immediate_size: super::MAX_IMMEDIATES as u32 * 4,
min_uniform_buffer_offset_alignment,
min_storage_buffer_offset_alignment,

View File

@ -1100,8 +1100,6 @@ impl super::PrivateCapabilities {
max_vertex_buffers: self.max_vertex_buffers,
max_vertex_attributes: 31,
max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride,
min_subgroup_size: 4,
max_subgroup_size: 64,
max_immediate_size: 0x1000,
min_uniform_buffer_offset_alignment: self.buffer_alignment as u32,
min_storage_buffer_offset_alignment: self.buffer_alignment as u32,

View File

@ -160,6 +160,12 @@ impl crate::Instance for Instance {
driver: String::new(),
driver_info: String::new(),
backend: wgt::Backend::Metal,
// These are hardcoded based on typical values for Metal devices
//
// See <https://github.com/gpuweb/gpuweb/blob/main/proposals/subgroups.md#adapter-info>
// for more information.
subgroup_min_size: 4,
subgroup_max_size: 64,
transient_saves_memory: shared.private_caps.supports_memoryless_storage,
},
features: shared.private_caps.features(),

View File

@ -141,6 +141,8 @@ pub fn adapter_info() -> wgt::AdapterInfo {
driver: String::from("wgpu"),
driver_info: String::new(),
backend: wgt::Backend::Noop,
subgroup_min_size: wgt::MINIMUM_SUBGROUP_MIN_SIZE,
subgroup_max_size: wgt::MAXIMUM_SUBGROUP_MAX_SIZE,
transient_saves_memory: false,
}
}
@ -189,8 +191,6 @@ pub const CAPABILITIES: crate::Capabilities = {
max_compute_workgroup_size_y: ALLOC_MAX_U32,
max_compute_workgroup_size_z: ALLOC_MAX_U32,
max_compute_workgroups_per_dimension: ALLOC_MAX_U32,
min_subgroup_size: 1,
max_subgroup_size: ALLOC_MAX_U32,
max_immediate_size: ALLOC_MAX_U32,
max_non_sampler_bindings: ALLOC_MAX_U32,

View File

@ -1331,14 +1331,6 @@ impl PhysicalDeviceProperties {
.min(crate::MAX_VERTEX_BUFFERS as u32),
max_vertex_attributes: limits.max_vertex_input_attributes,
max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride,
min_subgroup_size: self
.subgroup_size_control
.map(|subgroup_size| subgroup_size.min_subgroup_size)
.unwrap_or(0),
max_subgroup_size: self
.subgroup_size_control
.map(|subgroup_size| subgroup_size.max_subgroup_size)
.unwrap_or(0),
max_immediate_size: limits.max_push_constants_size,
min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32,
min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32,
@ -1776,6 +1768,14 @@ impl super::Instance {
.to_owned()
},
backend: wgt::Backend::Vulkan,
subgroup_min_size: phd_capabilities
.subgroup_size_control
.map(|subgroup_size| subgroup_size.min_subgroup_size)
.unwrap_or(wgt::MINIMUM_SUBGROUP_MIN_SIZE),
subgroup_max_size: phd_capabilities
.subgroup_size_control
.map(|subgroup_size| subgroup_size.max_subgroup_size)
.unwrap_or(wgt::MAXIMUM_SUBGROUP_MAX_SIZE),
transient_saves_memory: supports_lazily_allocated,
};
let (available_features, mut downlevel_flags) =

View File

@ -1,6 +1,7 @@
use std::io;
use bitflags::Flags;
use wgpu::AdapterInfo;
use crate::{
report::{AdapterReport, GpuReport},
@ -89,21 +90,38 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize
// Adapter Info //
//////////////////
let AdapterInfo {
name,
vendor,
device,
device_type,
device_pci_bus_id,
driver,
driver_info,
backend,
subgroup_min_size,
subgroup_max_size,
transient_saves_memory,
} = info;
if matches!(verbosity, PrintingVerbosity::NameOnly) {
writeln!(output, "Adapter {idx}: {} ({:?})", info.name, info.backend)?;
return Ok(());
}
writeln!(output, "Adapter {idx}:")?;
writeln!(output, "\t Backend: {:?}", info.backend)?;
writeln!(output, "\t Name: {}", info.name)?;
writeln!(output, "\t VendorID: {:#X?}", info.vendor)?;
writeln!(output, "\t DeviceID: {:#X?}", info.device)?;
writeln!(output, "\t DevicePCIBusId: {}", print_empty_string(&info.device_pci_bus_id))?;
writeln!(output, "\t Type: {:?}", info.device_type)?;
writeln!(output, "\t Driver: {}", print_empty_string(&info.driver))?;
writeln!(output, "\t DriverInfo: {}", print_empty_string(&info.driver_info))?;
writeln!(output, "\tWebGPU Compliant: {:?}", downlevel.is_webgpu_compliant())?;
writeln!(output, "\t Backend: {backend:?}")?;
writeln!(output, "\t Name: {name}")?;
writeln!(output, "\t Vendor ID: {vendor:#X?}")?;
writeln!(output, "\t Device ID: {device:#X?}")?;
writeln!(output, "\t Device PCI Bus ID: {}", print_empty_string(device_pci_bus_id))?;
writeln!(output, "\t Type: {device_type:?}")?;
writeln!(output, "\t Driver: {}", print_empty_string(driver))?;
writeln!(output, "\t Driver Info: {}", print_empty_string(driver_info))?;
writeln!(output, "\t Subgroup Min Size: {subgroup_min_size}")?;
writeln!(output, "\t Subgroup Max Size: {subgroup_max_size}")?;
writeln!(output, "\tTransient Saves Memory: {transient_saves_memory}")?;
writeln!(output, "\t WebGPU Compliant: {:?}", downlevel.is_webgpu_compliant())?;
if matches!(verbosity, PrintingVerbosity::Information) {
return Ok(());
@ -157,8 +175,6 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize
max_compute_workgroup_size_y,
max_compute_workgroup_size_z,
max_compute_workgroups_per_dimension,
min_subgroup_size,
max_subgroup_size,
max_immediate_size,
max_non_sampler_bindings,
@ -195,8 +211,6 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize
writeln!(output, "\t\t Max Vertex Buffers: {max_vertex_buffers}")?;
writeln!(output, "\t\t Max Vertex Attributes: {max_vertex_attributes}")?;
writeln!(output, "\t\t Max Vertex Buffer Array Stride: {max_vertex_buffer_array_stride}")?;
writeln!(output, "\t\t Min Subgroup Size: {min_subgroup_size}")?;
writeln!(output, "\t\t Max Subgroup Size: {max_subgroup_size}")?;
writeln!(output, "\t\t Max Immediate data Size: {max_immediate_size}")?;
writeln!(output, "\t\t Min Uniform Buffer Offset Alignment: {min_uniform_buffer_offset_alignment}")?;
writeln!(output, "\t\t Min Storage Buffer Offset Alignment: {min_storage_buffer_offset_alignment}")?;

View File

@ -132,6 +132,32 @@ pub struct AdapterInfo {
pub driver_info: String,
/// Backend used for device
pub backend: Backend,
/// Minimum possible size of a subgroup on this adapter. Will
/// never be lower than [`crate::MINIMUM_SUBGROUP_MIN_SIZE`].
///
/// This will vary from device to device. Typical values are listed below.
///
/// - NVIDIA: 32
/// - AMD GCN/Vega: 64
/// - AMD RDNA+: 32
/// - Intel: 8 or 16
/// - Qualcomm: 64
/// - WARP: 4
/// - lavapipe: 8
pub subgroup_min_size: u32,
/// Maximum possible size of a subgroup on this adapter. Will
/// never be higher than [`crate::MAXIMUM_SUBGROUP_MAX_SIZE`].
///
/// This will vary from device to device. Typical values are listed below:
///
/// - NVIDIA: 32
/// - AMD GCN/Vega: 64
/// - AMD RDNA+: 64
/// - Intel: 16 or 32
/// - Qualcomm: 128
/// - WARP: 4 or 128
/// - lavapipe: 8
pub subgroup_max_size: u32,
/// If true, adding [`TextureUsages::TRANSIENT`] to a texture will decrease memory usage.
pub transient_saves_memory: bool,
}

View File

@ -207,6 +207,17 @@ pub const QUERY_SET_MAX_QUERIES: u32 = 4096;
#[doc = link_to_wgpu_docs!(["query"]: "struct.QuerySet.html")]
pub const QUERY_SIZE: u32 = 8;
/// The minimum allowed value for [`AdapterInfo::subgroup_min_size`].
///
/// See <https://gpuweb.github.io/gpuweb/#gpuadapterinfo>
/// where you can always use these values on all devices
pub const MINIMUM_SUBGROUP_MIN_SIZE: u32 = 4;
/// The maximum allowed value for [`AdapterInfo::subgroup_max_size`].
///
/// See <https://gpuweb.github.io/gpuweb/#gpuadapterinfo>
/// where you can always use these values on all devices.
pub const MAXIMUM_SUBGROUP_MAX_SIZE: u32 = 128;
/// Passed to `Device::poll` to control how and if it should block.
#[derive(Clone, Debug)]
pub enum PollType<T> {

View File

@ -56,9 +56,6 @@ macro_rules! with_limits {
$macro_name!(max_compute_workgroup_size_z, Ordering::Less);
$macro_name!(max_compute_workgroups_per_dimension, Ordering::Less);
$macro_name!(min_subgroup_size, Ordering::Greater);
$macro_name!(max_subgroup_size, Ordering::Less);
$macro_name!(max_immediate_size, Ordering::Less);
$macro_name!(max_non_sampler_bindings, Ordering::Less);
@ -214,10 +211,6 @@ pub struct Limits {
/// Defaults to 65535. Higher is "better".
pub max_compute_workgroups_per_dimension: u32,
/// Minimal number of invocations in a subgroup. Lower is "better".
pub min_subgroup_size: u32,
/// Maximal number of invocations in a subgroup. Higher is "better".
pub max_subgroup_size: u32,
/// Amount of storage available for immediates in bytes. Defaults to 0. Higher is "better".
/// Requesting more than 0 during device creation requires [`Features::IMMEDIATES`] to be enabled.
///
@ -316,8 +309,6 @@ impl Limits {
/// max_compute_workgroup_size_y: 256,
/// max_compute_workgroup_size_z: 64,
/// max_compute_workgroups_per_dimension: 65535,
/// min_subgroup_size: 0,
/// max_subgroup_size: 0,
/// max_immediate_size: 0,
/// max_non_sampler_bindings: 1_000_000,
/// max_task_workgroup_total_count: 0,
@ -369,8 +360,6 @@ impl Limits {
max_compute_workgroup_size_y: 256,
max_compute_workgroup_size_z: 64,
max_compute_workgroups_per_dimension: 65535,
min_subgroup_size: 0,
max_subgroup_size: 0,
max_immediate_size: 0,
max_non_sampler_bindings: 1_000_000,
@ -414,8 +403,6 @@ impl Limits {
/// max_vertex_buffers: 8,
/// max_vertex_attributes: 16,
/// max_vertex_buffer_array_stride: 2048,
/// min_subgroup_size: 0,
/// max_subgroup_size: 0,
/// max_immediate_size: 0,
/// min_uniform_buffer_offset_alignment: 256,
/// min_storage_buffer_offset_alignment: 256,
@ -491,8 +478,6 @@ impl Limits {
/// max_vertex_buffers: 8,
/// max_vertex_attributes: 16,
/// max_vertex_buffer_array_stride: 255, // +
/// min_subgroup_size: 0,
/// max_subgroup_size: 0,
/// max_immediate_size: 0,
/// min_uniform_buffer_offset_alignment: 256,
/// min_storage_buffer_offset_alignment: 256,
@ -536,8 +521,6 @@ impl Limits {
max_compute_workgroup_size_y: 0,
max_compute_workgroup_size_z: 0,
max_compute_workgroups_per_dimension: 0,
min_subgroup_size: 0,
max_subgroup_size: 0,
// Value supported by Intel Celeron B830 on Windows (OpenGL 3.1)
max_inter_stage_shader_components: 31,
@ -647,12 +630,7 @@ impl Limits {
macro_rules! check_with_fail_fn {
($name:ident, $ordering:expr) => {
let invalid_ord = $ordering.reverse();
// In the case of `min_subgroup_size`, requesting a value of
// zero means "I'm not going to use subgroups", so we have to
// special case that. If any of our minimum limits could
// meaningfully go all the way to zero, that would conflict with
// this.
if self.$name != 0 && self.$name.cmp(&allowed.$name) == invalid_ord {
if self.$name.cmp(&allowed.$name) == invalid_ord {
fail_fn(stringify!($name), self.$name as u64, allowed.$name as u64);
if fatal {
return;
@ -661,13 +639,6 @@ impl Limits {
};
}
if self.min_subgroup_size > self.max_subgroup_size {
fail_fn(
"max_subgroup_size",
self.min_subgroup_size as u64,
allowed.min_subgroup_size as u64,
);
}
with_limits!(check_with_fail_fn);
}

View File

@ -818,9 +818,6 @@ fn map_wgt_limits(limits: webgpu_sys::GpuSupportedLimits) -> wgt::Limits {
max_compute_workgroup_size_y: limits.max_compute_workgroup_size_y(),
max_compute_workgroup_size_z: limits.max_compute_workgroup_size_z(),
max_compute_workgroups_per_dimension: limits.max_compute_workgroups_per_dimension(),
// The following are not part of WebGPU
min_subgroup_size: wgt::Limits::default().min_subgroup_size,
max_subgroup_size: wgt::Limits::default().max_subgroup_size,
max_immediate_size: wgt::Limits::default().max_immediate_size,
max_non_sampler_bindings: wgt::Limits::default().max_non_sampler_bindings,
max_inter_stage_shader_components: wgt::Limits::default().max_inter_stage_shader_components,
@ -1716,6 +1713,8 @@ impl dispatch::AdapterInterface for WebAdapter {
driver: String::new(),
driver_info: String::new(),
backend: wgt::Backend::BrowserWebGpu,
subgroup_min_size: wgt::MINIMUM_SUBGROUP_MIN_SIZE,
subgroup_max_size: wgt::MAXIMUM_SUBGROUP_MAX_SIZE,
transient_saves_memory: false,
}
}

View File

@ -150,8 +150,9 @@ pub use wgt::{
TextureFormat, TextureFormatFeatureFlags, TextureFormatFeatures, TextureSampleType,
TextureTransition, TextureUsages, TextureUses, TextureViewDimension, Trace, VertexAttribute,
VertexFormat, VertexStepMode, WasmNotSend, WasmNotSendSync, WasmNotSync, COPY_BUFFER_ALIGNMENT,
COPY_BYTES_PER_ROW_ALIGNMENT, IMMEDIATES_ALIGNMENT, MAP_ALIGNMENT,
QUERY_RESOLVE_BUFFER_ALIGNMENT, QUERY_SET_MAX_QUERIES, QUERY_SIZE, VERTEX_ALIGNMENT,
COPY_BYTES_PER_ROW_ALIGNMENT, IMMEDIATES_ALIGNMENT, MAP_ALIGNMENT, MAXIMUM_SUBGROUP_MAX_SIZE,
MINIMUM_SUBGROUP_MIN_SIZE, QUERY_RESOLVE_BUFFER_ALIGNMENT, QUERY_SET_MAX_QUERIES, QUERY_SIZE,
VERTEX_ALIGNMENT,
};
#[expect(deprecated)]