Speed Up Benchmarks in Test (#7129)

This commit is contained in:
Connor Fitzgerald 2025-02-13 18:48:13 -05:00 committed by GitHub
parent 2f50426b35
commit 03a01df3cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 64 additions and 16 deletions

View File

@ -7,7 +7,17 @@ use criterion::{criterion_group, Criterion, Throughput};
use nanorand::{Rng, WyRand};
use std::sync::LazyLock;
use crate::DeviceState;
use crate::{is_test, DeviceState};
// Creating 50_000 textures takes a considerable amount of time with syncval enabled.
//
// We greatly reduce the number of textures for the test case to keep the runtime
// reasonable for testing.
const MAX_TEXTURE_COUNT_BENCHMARK: u32 = 50_000;
const TEXTURE_COUNTS_BENCHMARK: &[u32] = &[5, 50, 500, 5_000, 50_000];
const MAX_TEXTURE_COUNT_TEST: u32 = 5;
const TEXTURE_COUNTS_TEST: &[u32] = &[5];
struct BindGroupState {
device_state: DeviceState,
@ -19,7 +29,11 @@ impl BindGroupState {
fn new() -> Self {
let device_state = DeviceState::new();
const TEXTURE_COUNT: u32 = 50_000;
let texture_count = if is_test() {
MAX_TEXTURE_COUNT_TEST
} else {
MAX_TEXTURE_COUNT_BENCHMARK
};
// Performance gets considerably worse if the resources are shuffled.
//
@ -27,8 +41,8 @@ impl BindGroupState {
// well defined usage order.
let mut random = WyRand::new_seed(0x8BADF00D);
let mut texture_views = Vec::with_capacity(TEXTURE_COUNT as usize);
for i in 0..TEXTURE_COUNT {
let mut texture_views = Vec::with_capacity(texture_count as usize);
for i in 0..texture_count {
let texture = device_state
.device
.create_texture(&wgpu::TextureDescriptor {
@ -64,7 +78,13 @@ fn run_bench(ctx: &mut Criterion) {
let mut group = ctx.benchmark_group("Bind Group Creation");
for count in [5, 50, 500, 5_000, 50_000] {
let count_list = if is_test() {
TEXTURE_COUNTS_TEST
} else {
TEXTURE_COUNTS_BENCHMARK
};
for &count in count_list {
group.throughput(Throughput::Elements(count as u64));
group.bench_with_input(
format!("{} Element Bind Group", count),

View File

@ -8,12 +8,12 @@ use nanorand::{Rng, WyRand};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::sync::LazyLock;
use crate::DeviceState;
use crate::{is_test, DeviceState};
fn dispatch_count() -> usize {
// When testing we only want to run a very lightweight version of the benchmark
// to ensure that it does not break.
if std::env::var("NEXTEST").is_ok() {
if is_test() {
8
} else {
10_000
@ -28,13 +28,21 @@ fn dispatch_count() -> usize {
fn dispatch_count_bindless() -> usize {
// On CI we only want to run a very lightweight version of the benchmark
// to ensure that it does not break.
if std::env::var("NEXTEST").is_ok() {
if is_test() {
8
} else {
1_000
}
}
fn thread_count_list() -> &'static [usize] {
if is_test() {
&[2]
} else {
&[2, 4, 8]
}
}
// Must match the number of textures in the computepass.wgsl shader
const TEXTURES_PER_DISPATCH: usize = 2;
const STORAGE_TEXTURES_PER_DISPATCH: usize = 2;
@ -437,7 +445,7 @@ fn run_bench(ctx: &mut Criterion) {
group.throughput(Throughput::Elements(dispatch_count as _));
for time_submit in [false, true] {
for cpasses in [1, 2, 4, 8] {
for &cpasses in thread_count_list() {
let dispatch_per_pass = dispatch_count / cpasses;
let label = if time_submit {
@ -493,7 +501,7 @@ fn run_bench(ctx: &mut Criterion) {
let mut group = ctx.benchmark_group("Computepass: Multi Threaded");
group.throughput(Throughput::Elements(dispatch_count as _));
for threads in [2, 4, 8] {
for &threads in thread_count_list() {
let dispatch_per_pass = dispatch_count / threads;
group.bench_function(
format!("{threads} threads x {dispatch_per_pass} dispatch"),

View File

@ -8,18 +8,26 @@ use nanorand::{Rng, WyRand};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::sync::LazyLock;
use crate::DeviceState;
use crate::{is_test, DeviceState};
fn draw_count() -> usize {
// When testing we only want to run a very lightweight version of the benchmark
// to ensure that it does not break.
if std::env::var("NEXTEST").is_ok() {
if is_test() {
8
} else {
10_000
}
}
fn thread_count_list() -> &'static [usize] {
if is_test() {
&[2]
} else {
&[1, 2, 4, 8]
}
}
// Must match the number of textures in the renderpass.wgsl shader
const TEXTURES_PER_DRAW: usize = 7;
const VERTEX_BUFFERS_PER_DRAW: usize = 2;
@ -438,7 +446,7 @@ fn run_bench(ctx: &mut Criterion) {
group.throughput(Throughput::Elements(draw_count as _));
for time_submit in [false, true] {
for rpasses in [1, 2, 4, 8] {
for &rpasses in thread_count_list() {
let draws_per_pass = draw_count / rpasses;
let label = if time_submit {
@ -499,7 +507,7 @@ fn run_bench(ctx: &mut Criterion) {
let mut group = ctx.benchmark_group("Renderpass: Multi Threaded");
group.throughput(Throughput::Elements(draw_count as _));
for threads in [2, 4, 8] {
for &threads in thread_count_list() {
let draws_per_pass = draw_count / threads;
group.bench_function(format!("{threads} threads x {draws_per_pass} draws"), |b| {
LazyLock::force(&state);

View File

@ -4,7 +4,15 @@ use criterion::{criterion_group, Criterion, Throughput};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::sync::LazyLock;
use crate::DeviceState;
use crate::{is_test, DeviceState};
fn thread_count_list() -> &'static [usize] {
if is_test() {
&[2]
} else {
&[1, 2, 4, 8]
}
}
fn run_bench(ctx: &mut Criterion) {
let state = LazyLock::new(DeviceState::new);
@ -14,7 +22,7 @@ fn run_bench(ctx: &mut Criterion) {
let mut group = ctx.benchmark_group("Resource Creation: Large Buffer");
group.throughput(Throughput::Elements(RESOURCES_TO_CREATE as _));
for threads in [1, 2, 4, 8] {
for &threads in thread_count_list() {
let resources_per_thread = RESOURCES_TO_CREATE / threads;
group.bench_function(
format!("{threads} threads x {resources_per_thread} resource"),

View File

@ -7,6 +7,10 @@ mod renderpass;
mod resource_creation;
mod shader;
fn is_test() -> bool {
std::env::var("NEXTEST").is_ok()
}
struct DeviceState {
adapter_info: wgpu::AdapterInfo,
device: wgpu::Device,