diff --git a/tests/gpu-tests/draw_indirect.rs b/tests/gpu-tests/draw_indirect.rs index bf4ed52f4..5b9384edd 100644 --- a/tests/gpu-tests/draw_indirect.rs +++ b/tests/gpu-tests/draw_indirect.rs @@ -540,3 +540,219 @@ make_failing_test!( INSTANCED_INDEXED_DRAW_OOB_INSTANCE_COUNT, get_instanced_indexed_draw_test_data(0, 6, 0, 5) ); + +#[gpu_test] +static INDIRECT_BUFFER_OFFSETS: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .downlevel_flags(wgpu::DownlevelFlags::INDIRECT_EXECUTION) + .features(wgpu::Features::INDIRECT_FIRST_INSTANCE) + .limits(wgpu::Limits::downlevel_defaults()), + ) + .run_async(indirect_buffer_offsets); + +/// Tests that indirect draw calls work properly with offsets that straddle 16 byte boundaries (size of DrawIndirectArgs). +async fn indirect_buffer_offsets(ctx: TestingContext) { + // The first 2 draws are successful, the third one is not. + let indirect_args_offsets = [0, 4, 8]; + + let indirect_args = [ + // 1st draw | 2nd draw | 3rd draw + 9, // vertex_count | | + 9, // instance_count | vertex_count | + 1, // first_vertex | instance_count | vertex_count + 0, // first_instance | first_vertex | instance_count + 9, // | first_instance | first_vertex + 10, // | | first_instance + ]; + + // 1st draw (first_vertex: 1): ◤ ◢ ◢ + // 2nd draw (first_vertex: 0): ◤ ◣ ◢ + let vertex_buffer_content = [ + -0.5, 0.5, // Top left + // Triangle 1 + -0.5, -0.5, // Bottom left + 0.5, 0.5, // Top right + -0.5, 0.5, // Top left + // Triangle 2 + -0.5, -0.5, // Bottom left + 0.5, -0.5, // Bottom right + 0.5, 0.5, // Top right + // Triangle 3 (same as Triangle 2) + -0.5, -0.5, // Bottom left + 0.5, -0.5, // Bottom right + 0.5, 0.5, // Top right + ]; + #[rustfmt::skip] + let instance_buffer_content = [ + // Move quad to top left (for 1st draw): + -0.5, 0.5, + -0.5, 0.5, + -0.5, 0.5, + -0.5, 0.5, + -0.5, 0.5, + -0.5, 0.5, + -0.5, 0.5, + -0.5, 0.5, + -0.5, 0.5, + // Move quad to top right (for 2nd draw): + 0.5, 0.5, + ]; + + let vertex_buffer = ctx.device.create_buffer_init(&BufferInitDescriptor { + label: None, + contents: bytemuck::cast_slice::(&vertex_buffer_content), + usage: wgpu::BufferUsages::VERTEX, + }); + let instance_buffer = ctx.device.create_buffer_init(&BufferInitDescriptor { + label: None, + contents: bytemuck::cast_slice::(&instance_buffer_content), + usage: wgpu::BufferUsages::VERTEX, + }); + + let shader_src = " + @vertex + fn vs_main(@location(0) position: vec2f, @location(1) position_offset: vec2f) -> @builtin(position) vec4f { + return vec4f(position + position_offset, 0.0, 1.0); + } + + @fragment + fn fs_main() -> @location(0) vec4f { + return vec4f(1.0); + } + "; + + let shader = ctx + .device + .create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(shader_src.into()), + }); + + let pipeline_desc = wgpu::RenderPipelineDescriptor { + label: None, + layout: None, + vertex: wgpu::VertexState { + buffers: &[ + wgpu::VertexBufferLayout { + array_stride: 8, + step_mode: wgpu::VertexStepMode::Vertex, + attributes: &vertex_attr_array![0 => Float32x2], + }, + wgpu::VertexBufferLayout { + array_stride: 8, + step_mode: wgpu::VertexStepMode::Instance, + attributes: &vertex_attr_array![1 => Float32x2], + }, + ], + module: &shader, + entry_point: Some("vs_main"), + compilation_options: Default::default(), + }, + primitive: wgpu::PrimitiveState::default(), + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + fragment: Some(wgpu::FragmentState { + module: &shader, + entry_point: Some("fs_main"), + compilation_options: Default::default(), + targets: &[Some(wgpu::ColorTargetState { + format: wgpu::TextureFormat::R8Unorm, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + })], + }), + multiview: None, + cache: None, + }; + let pipeline = ctx.device.create_render_pipeline(&pipeline_desc); + + let out_texture = ctx.device.create_texture(&wgpu::TextureDescriptor { + label: None, + size: wgpu::Extent3d { + width: 256, + height: 256, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::R8Unorm, + usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC, + view_formats: &[], + }); + let out_texture_view = out_texture.create_view(&wgpu::TextureViewDescriptor::default()); + + let readback_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 256 * 256, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + let indirect_buffer = ctx.device.create_buffer_init(&BufferInitDescriptor { + label: None, + contents: bytemuck::cast_slice::(&indirect_args), + usage: wgpu::BufferUsages::INDIRECT, + }); + + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + + { + let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: None, + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + ops: wgpu::Operations::default(), + resolve_target: None, + view: &out_texture_view, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }); + + rpass.set_pipeline(&pipeline); + rpass.set_vertex_buffer(0, vertex_buffer.slice(..)); + rpass.set_vertex_buffer(1, instance_buffer.slice(..)); + for offset in indirect_args_offsets { + rpass.draw_indirect(&indirect_buffer, offset); + } + } + + encoder.copy_texture_to_buffer( + wgpu::TexelCopyTextureInfo { + texture: &out_texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::TexelCopyBufferInfo { + buffer: &readback_buffer, + layout: wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(256), + rows_per_image: None, + }, + }, + wgpu::Extent3d { + width: 256, + height: 256, + depth_or_array_layers: 1, + }, + ); + + ctx.queue.submit([encoder.finish()]); + + let slice = readback_buffer.slice(..); + slice.map_async(wgpu::MapMode::Read, |_| ()); + + ctx.async_poll(wgpu::PollType::wait()).await.unwrap(); + + let data = slice.get_mapped_range(); + let half = data.len() / 2; + let succeeded = + data[..half].iter().all(|b| *b == u8::MAX) && data[half..].iter().all(|b| *b == 0); + assert!(succeeded); +}