mirror of
https://github.com/gfx-rs/wgpu.git
synced 2025-12-08 21:26:17 +00:00
Speed Up Benchmarks in Test (#7129)
This commit is contained in:
parent
2f50426b35
commit
03a01df3cb
@ -7,7 +7,17 @@ use criterion::{criterion_group, Criterion, Throughput};
|
||||
use nanorand::{Rng, WyRand};
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use crate::DeviceState;
|
||||
use crate::{is_test, DeviceState};
|
||||
|
||||
// Creating 50_000 textures takes a considerable amount of time with syncval enabled.
|
||||
//
|
||||
// We greatly reduce the number of textures for the test case to keep the runtime
|
||||
// reasonable for testing.
|
||||
const MAX_TEXTURE_COUNT_BENCHMARK: u32 = 50_000;
|
||||
const TEXTURE_COUNTS_BENCHMARK: &[u32] = &[5, 50, 500, 5_000, 50_000];
|
||||
|
||||
const MAX_TEXTURE_COUNT_TEST: u32 = 5;
|
||||
const TEXTURE_COUNTS_TEST: &[u32] = &[5];
|
||||
|
||||
struct BindGroupState {
|
||||
device_state: DeviceState,
|
||||
@ -19,7 +29,11 @@ impl BindGroupState {
|
||||
fn new() -> Self {
|
||||
let device_state = DeviceState::new();
|
||||
|
||||
const TEXTURE_COUNT: u32 = 50_000;
|
||||
let texture_count = if is_test() {
|
||||
MAX_TEXTURE_COUNT_TEST
|
||||
} else {
|
||||
MAX_TEXTURE_COUNT_BENCHMARK
|
||||
};
|
||||
|
||||
// Performance gets considerably worse if the resources are shuffled.
|
||||
//
|
||||
@ -27,8 +41,8 @@ impl BindGroupState {
|
||||
// well defined usage order.
|
||||
let mut random = WyRand::new_seed(0x8BADF00D);
|
||||
|
||||
let mut texture_views = Vec::with_capacity(TEXTURE_COUNT as usize);
|
||||
for i in 0..TEXTURE_COUNT {
|
||||
let mut texture_views = Vec::with_capacity(texture_count as usize);
|
||||
for i in 0..texture_count {
|
||||
let texture = device_state
|
||||
.device
|
||||
.create_texture(&wgpu::TextureDescriptor {
|
||||
@ -64,7 +78,13 @@ fn run_bench(ctx: &mut Criterion) {
|
||||
|
||||
let mut group = ctx.benchmark_group("Bind Group Creation");
|
||||
|
||||
for count in [5, 50, 500, 5_000, 50_000] {
|
||||
let count_list = if is_test() {
|
||||
TEXTURE_COUNTS_TEST
|
||||
} else {
|
||||
TEXTURE_COUNTS_BENCHMARK
|
||||
};
|
||||
|
||||
for &count in count_list {
|
||||
group.throughput(Throughput::Elements(count as u64));
|
||||
group.bench_with_input(
|
||||
format!("{} Element Bind Group", count),
|
||||
|
||||
@ -8,12 +8,12 @@ use nanorand::{Rng, WyRand};
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use crate::DeviceState;
|
||||
use crate::{is_test, DeviceState};
|
||||
|
||||
fn dispatch_count() -> usize {
|
||||
// When testing we only want to run a very lightweight version of the benchmark
|
||||
// to ensure that it does not break.
|
||||
if std::env::var("NEXTEST").is_ok() {
|
||||
if is_test() {
|
||||
8
|
||||
} else {
|
||||
10_000
|
||||
@ -28,13 +28,21 @@ fn dispatch_count() -> usize {
|
||||
fn dispatch_count_bindless() -> usize {
|
||||
// On CI we only want to run a very lightweight version of the benchmark
|
||||
// to ensure that it does not break.
|
||||
if std::env::var("NEXTEST").is_ok() {
|
||||
if is_test() {
|
||||
8
|
||||
} else {
|
||||
1_000
|
||||
}
|
||||
}
|
||||
|
||||
fn thread_count_list() -> &'static [usize] {
|
||||
if is_test() {
|
||||
&[2]
|
||||
} else {
|
||||
&[2, 4, 8]
|
||||
}
|
||||
}
|
||||
|
||||
// Must match the number of textures in the computepass.wgsl shader
|
||||
const TEXTURES_PER_DISPATCH: usize = 2;
|
||||
const STORAGE_TEXTURES_PER_DISPATCH: usize = 2;
|
||||
@ -437,7 +445,7 @@ fn run_bench(ctx: &mut Criterion) {
|
||||
group.throughput(Throughput::Elements(dispatch_count as _));
|
||||
|
||||
for time_submit in [false, true] {
|
||||
for cpasses in [1, 2, 4, 8] {
|
||||
for &cpasses in thread_count_list() {
|
||||
let dispatch_per_pass = dispatch_count / cpasses;
|
||||
|
||||
let label = if time_submit {
|
||||
@ -493,7 +501,7 @@ fn run_bench(ctx: &mut Criterion) {
|
||||
let mut group = ctx.benchmark_group("Computepass: Multi Threaded");
|
||||
group.throughput(Throughput::Elements(dispatch_count as _));
|
||||
|
||||
for threads in [2, 4, 8] {
|
||||
for &threads in thread_count_list() {
|
||||
let dispatch_per_pass = dispatch_count / threads;
|
||||
group.bench_function(
|
||||
format!("{threads} threads x {dispatch_per_pass} dispatch"),
|
||||
|
||||
@ -8,18 +8,26 @@ use nanorand::{Rng, WyRand};
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use crate::DeviceState;
|
||||
use crate::{is_test, DeviceState};
|
||||
|
||||
fn draw_count() -> usize {
|
||||
// When testing we only want to run a very lightweight version of the benchmark
|
||||
// to ensure that it does not break.
|
||||
if std::env::var("NEXTEST").is_ok() {
|
||||
if is_test() {
|
||||
8
|
||||
} else {
|
||||
10_000
|
||||
}
|
||||
}
|
||||
|
||||
fn thread_count_list() -> &'static [usize] {
|
||||
if is_test() {
|
||||
&[2]
|
||||
} else {
|
||||
&[1, 2, 4, 8]
|
||||
}
|
||||
}
|
||||
|
||||
// Must match the number of textures in the renderpass.wgsl shader
|
||||
const TEXTURES_PER_DRAW: usize = 7;
|
||||
const VERTEX_BUFFERS_PER_DRAW: usize = 2;
|
||||
@ -438,7 +446,7 @@ fn run_bench(ctx: &mut Criterion) {
|
||||
group.throughput(Throughput::Elements(draw_count as _));
|
||||
|
||||
for time_submit in [false, true] {
|
||||
for rpasses in [1, 2, 4, 8] {
|
||||
for &rpasses in thread_count_list() {
|
||||
let draws_per_pass = draw_count / rpasses;
|
||||
|
||||
let label = if time_submit {
|
||||
@ -499,7 +507,7 @@ fn run_bench(ctx: &mut Criterion) {
|
||||
let mut group = ctx.benchmark_group("Renderpass: Multi Threaded");
|
||||
group.throughput(Throughput::Elements(draw_count as _));
|
||||
|
||||
for threads in [2, 4, 8] {
|
||||
for &threads in thread_count_list() {
|
||||
let draws_per_pass = draw_count / threads;
|
||||
group.bench_function(format!("{threads} threads x {draws_per_pass} draws"), |b| {
|
||||
LazyLock::force(&state);
|
||||
|
||||
@ -4,7 +4,15 @@ use criterion::{criterion_group, Criterion, Throughput};
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use crate::DeviceState;
|
||||
use crate::{is_test, DeviceState};
|
||||
|
||||
fn thread_count_list() -> &'static [usize] {
|
||||
if is_test() {
|
||||
&[2]
|
||||
} else {
|
||||
&[1, 2, 4, 8]
|
||||
}
|
||||
}
|
||||
|
||||
fn run_bench(ctx: &mut Criterion) {
|
||||
let state = LazyLock::new(DeviceState::new);
|
||||
@ -14,7 +22,7 @@ fn run_bench(ctx: &mut Criterion) {
|
||||
let mut group = ctx.benchmark_group("Resource Creation: Large Buffer");
|
||||
group.throughput(Throughput::Elements(RESOURCES_TO_CREATE as _));
|
||||
|
||||
for threads in [1, 2, 4, 8] {
|
||||
for &threads in thread_count_list() {
|
||||
let resources_per_thread = RESOURCES_TO_CREATE / threads;
|
||||
group.bench_function(
|
||||
format!("{threads} threads x {resources_per_thread} resource"),
|
||||
|
||||
@ -7,6 +7,10 @@ mod renderpass;
|
||||
mod resource_creation;
|
||||
mod shader;
|
||||
|
||||
fn is_test() -> bool {
|
||||
std::env::var("NEXTEST").is_ok()
|
||||
}
|
||||
|
||||
struct DeviceState {
|
||||
adapter_info: wgpu::AdapterInfo,
|
||||
device: wgpu::Device,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user