Improve alignment

2025-12-08 19:05:57 +00:00 · 2022-01-08 16:31:25 +01:00 · 2022-01-08 16:31:25 +01:00 · 1136bf5edb
commit 1136bf5edb
parent 59ebdc4f93
5 changed files with 151 additions and 93 deletions
--- a/src/io/cache.rs
+++ b/src/io/cache.rs
@ -1,5 +1,6 @@
 use std::collections::VecDeque;
 use std::io::Cursor;
+use std::ops::Range;
 use std::sync::{Arc, Condvar, Mutex};

 use log::{error, info};
@ -11,13 +12,13 @@ use vector_tile::tile::Tile;

 use crate::io::static_database;
 use crate::render::shader_ffi::GpuVertexUniform;
-use crate::tesselation::{IndexDataType, Tesselated};
+use crate::tesselation::{IndexDataType, OverAlignedVertexBuffer, Tesselated};

 #[derive(Clone)]
 pub struct TesselatedTile {
    pub id: u32,
    pub coords: TileCoords,
-    pub geometry: VertexBuffers<GpuVertexUniform, IndexDataType>,
+    pub over_aligned: OverAlignedVertexBuffer<GpuVertexUniform, IndexDataType>,
 }

 #[derive(Clone)]
@ -61,14 +62,11 @@ impl Cache {
                    );
                    let tile = parse_tile_bytes(file.contents()).expect("failed to load tile");

-                    let mut geometry: VertexBuffers<GpuVertexUniform, IndexDataType> =
-                        VertexBuffers::new();
-
-                    tile.tesselate_stroke(&mut geometry);
+                    let buffer = tile.tesselate_stroke();
                    self.responses.push(TesselatedTile {
                        id: current_id,
                        coords,
-                        geometry,
+                        over_aligned: buffer.into(),
                    });
                    current_id += 1;
                    info!("tile ready: {:?}", &coords);
--- a/src/render/buffer_pool.rs
+++ b/src/render/buffer_pool.rs
@ -7,10 +7,11 @@ use std::mem::size_of;
 use std::ops::Range;

 use lyon::tessellation::VertexBuffers;
+use wgpu::BufferAddress;

 use crate::coords::TileCoords;
 use crate::render::shader_ffi::GpuVertexUniform;
-use crate::tesselation::IndexDataType;
+use crate::tesselation::{IndexDataType, OverAlignedVertexBuffer};

 /// Buffer and its size
 pub struct BackingBufferDescriptor<B>(pub B, pub wgpu::BufferAddress);
@ -63,46 +64,80 @@ impl<Q: Queue<B>, B, V: bytemuck::Pod, I: bytemuck::Pod> BufferPool<Q, B, V, I>
        &self.indices.inner
    }

+    /// The VertexBuffers can contain padding elements. Not everything from a VertexBuffers is useable.
+    /// The function returns the `bytes` and `aligned_bytes`. See [`OverAlignedVertexBuffer`].
+    fn align(
+        stride: wgpu::BufferAddress,
+        elements: wgpu::BufferAddress,
+        usable_elements: wgpu::BufferAddress,
+    ) -> (BufferAddress, BufferAddress) {
+        let bytes = elements * stride;
+
+        let usable_bytes = (usable_elements * stride) as wgpu::BufferAddress;
+
+        let align = wgpu::COPY_BUFFER_ALIGNMENT;
+        let padding = (align - usable_bytes % align) % align;
+
+        let aligned_bytes = usable_bytes + padding;
+
+        return (bytes, aligned_bytes);
+    }
+
    /// Allocates `buffer` and uploads it to the GPU
    pub fn allocate_geometry(
        &mut self,
        queue: &Q,
        id: u32,
        coords: TileCoords,
-        geometry: &VertexBuffers<V, I>,
+        over_aligned: &OverAlignedVertexBuffer<V, I>,
    ) {
-        let vertices_stride = size_of::<V>();
-        let new_vertices = (geometry.vertices.len() * vertices_stride) as wgpu::BufferAddress;
-        let indices_stride = size_of::<I>();
-        let new_indices = (geometry.indices.len() * indices_stride) as wgpu::BufferAddress;
+        let vertices_stride = size_of::<V>() as wgpu::BufferAddress;
+        let indices_stride = size_of::<I>() as wgpu::BufferAddress;
+
+        let (vertices_bytes, aligned_vertices_bytes) = Self::align(
+            vertices_stride,
+            over_aligned.buffer.vertices.len() as BufferAddress,
+            over_aligned.buffer.vertices.len() as BufferAddress,
+        );
+        let (indices_bytes, aligned_indices_bytes) = Self::align(
+            indices_stride,
+            over_aligned.buffer.indices.len() as BufferAddress,
+            over_aligned.usable_indices as BufferAddress,
+        );

        let maybe_entry = IndexEntry {
            id,
            coords,
-            indices_stride: indices_stride as u64,
-            vertices: self.vertices.make_room(new_vertices, &mut self.index, true),
-            indices: self.indices.make_room(new_indices, &mut self.index, false),
+            indices_stride: indices_stride as wgpu::BufferAddress,
+            buffer_vertices: self
+                .vertices
+                .make_room(vertices_bytes, &mut self.index, true),
+            buffer_indices: self
+                .indices
+                .make_room(indices_bytes, &mut self.index, false),
+            usable_indices: over_aligned.usable_indices as u32,
        };

        assert_eq!(
-            maybe_entry.vertices.end - &maybe_entry.vertices.start,
-            new_vertices
+            maybe_entry.buffer_vertices.end - &maybe_entry.buffer_vertices.start,
+            vertices_bytes
        );
        assert_eq!(
-            maybe_entry.indices.end - &maybe_entry.indices.start,
-            new_indices
+            maybe_entry.buffer_indices.end - &maybe_entry.buffer_indices.start,
+            indices_bytes
        );

        // write_buffer() is the preferred method for WASM: https://toji.github.io/webgpu-best-practices/buffer-uploads.html#when-in-doubt-writebuffer
        queue.write_buffer(
            &self.vertices.inner,
-            maybe_entry.vertices.start,
-            bytemuck::cast_slice(&geometry.vertices),
+            maybe_entry.buffer_vertices.start,
+            &bytemuck::cast_slice(&over_aligned.buffer.vertices)
+                [0..aligned_vertices_bytes as usize],
        );
        queue.write_buffer(
            &self.indices.inner,
-            maybe_entry.indices.start,
-            bytemuck::cast_slice(&geometry.indices),
+            maybe_entry.buffer_indices.start,
+            &bytemuck::cast_slice(&over_aligned.buffer.indices)[0..aligned_indices_bytes as usize],
        );
        self.index.push_back(maybe_entry);
    }
@ -162,16 +197,16 @@ impl<B> BackingBuffer<B> {
    ) -> Range<wgpu::BufferAddress> {
        let start = index.front().map(|first| {
            if vertices {
-                first.vertices.start
+                first.buffer_vertices.start
            } else {
-                first.indices.start
+                first.buffer_indices.start
            }
        });
        let end = index.back().map(|first| {
            if vertices {
-                first.vertices.end
+                first.buffer_vertices.end
            } else {
-                first.indices.end
+                first.buffer_indices.end
            }
        });

@ -207,22 +242,27 @@ impl<B> BackingBuffer<B> {
 pub struct IndexEntry {
    pub id: u32,
    pub coords: TileCoords,
-    indices_stride: u64,
-    vertices: Range<wgpu::BufferAddress>,
-    indices: Range<wgpu::BufferAddress>,
+    indices_stride: wgpu::BufferAddress,
+    // Range of bytes within the backing buffer for vertices
+    buffer_vertices: Range<wgpu::BufferAddress>,
+    // Range of bytes within the backing buffer for indices
+    buffer_indices: Range<wgpu::BufferAddress>,
+    // Amount of actually usable indices. Each index has the size/format `IndexDataType`.
+    // Can be lower than size(buffer_indices) / indices_stride because of alignment.
+    usable_indices: u32,
 }

 impl IndexEntry {
    pub fn indices_range(&self) -> Range<u32> {
-        0..((self.indices.end - self.indices.start) / self.indices_stride) as u32
+        0..self.usable_indices
    }

    pub fn indices_buffer_range(&self) -> Range<wgpu::BufferAddress> {
-        self.indices.clone()
+        self.buffer_indices.clone()
    }

    pub fn vertices_buffer_range(&self) -> Range<wgpu::BufferAddress> {
-        self.vertices.clone()
+        self.buffer_vertices.clone()
    }
 }

@ -274,10 +314,12 @@ mod tests {
        let mut data48bytes = VertexBuffers::new();
        data48bytes.vertices.append(&mut create_48byte());
        data48bytes.indices.append(&mut vec![1, 2, 3, 4]);
+        let data48bytes_range = 0..2;

        let mut data24bytes = VertexBuffers::new();
        data24bytes.vertices.append(&mut create_24byte());
        data24bytes.indices.append(&mut vec![1, 2, 3, 4]);
+        let data24bytes_range = 0..1;

        for i in 0..2 {
            pool.allocate_geometry(&queue, 0, (0, 0, 0).into(), &data48bytes);
--- a/src/render/state.rs
+++ b/src/render/state.rs
@ -40,8 +40,8 @@ impl Default for SceneParams {
 }

 const INDEX_FORMAT: wgpu::IndexFormat = wgpu::IndexFormat::Uint16; // Must match IndexDataType
-const VERTEX_BUFFER_SIZE: BufferAddress = 1024 * 1024 * 16;
-const INDICES_BUFFER_SIZE: BufferAddress = 1024 * 1024 * 16;
+const VERTEX_BUFFER_SIZE: BufferAddress = 1024 * 1024 * 8;
+const INDICES_BUFFER_SIZE: BufferAddress = 1024 * 1024 * 8;
 const TILE_META_COUNT: BufferAddress = 512;
 const TILE_MASK_INSTANCE_COUNT: BufferAddress = 512;

@ -371,8 +371,12 @@ impl State {
            let world_coords = tile.coords.into_world_tile();
            self.tile_mask_pattern.update_bounds(&world_coords);

-            self.buffer_pool
-                .allocate_geometry(&self.queue, tile.id, tile.coords, &tile.geometry);
+            self.buffer_pool.allocate_geometry(
+                &self.queue,
+                tile.id,
+                tile.coords,
+                &tile.over_aligned,
+            );

            self.queue.write_buffer(
                &self.tiles_uniform_buffer,
--- a/src/tesselation/mod.rs
+++ b/src/tesselation/mod.rs
@ -1,32 +1,25 @@
-mod misc;
-pub mod tile;
+use bytemuck::Pod;
+use std::ops::{Add, Range};

-use crate::render::shader_ffi::GpuVertexUniform;
 use lyon::tessellation::{
    FillVertex, FillVertexConstructor, StrokeVertex, StrokeVertexConstructor, VertexBuffers,
 };
-use std::ops::Range;
+use wgpu::BufferAddress;
+
+use crate::render::shader_ffi::GpuVertexUniform;
+
+pub mod tile;

 const DEFAULT_TOLERANCE: f32 = 0.02;

 pub type IndexDataType = u16; // Must match INDEX_FORMAT

-pub trait Tesselated<OutputIndex: std::ops::Add> {
-    fn tesselate_stroke(
-        &self,
-        buffer: &mut VertexBuffers<GpuVertexUniform, OutputIndex>,
-    ) -> Range<IndexDataType>;
-    fn tesselate_fill(
-        &self,
-        buffer: &mut VertexBuffers<GpuVertexUniform, OutputIndex>,
-    ) -> Range<IndexDataType>;
+pub trait Tesselated<I: Add> {
+    fn tesselate_stroke(&self) -> VertexBuffers<GpuVertexUniform, I>;
+    fn tesselate_fill(&self) -> VertexBuffers<GpuVertexUniform, I>;

-    fn empty_range(
-        &self,
-        buffer: &mut VertexBuffers<GpuVertexUniform, OutputIndex>,
-    ) -> Range<IndexDataType> {
-        let initial_indices_count = buffer.indices.len() as IndexDataType;
-        initial_indices_count..initial_indices_count
+    fn empty_range(&self) -> VertexBuffers<GpuVertexUniform, I> {
+        VertexBuffers::new()
    }
 }

@ -47,17 +40,53 @@ impl StrokeVertexConstructor<GpuVertexUniform> for VertexConstructor {
    }
 }

-trait Align<V: bytemuck::Pod, I: bytemuck::Pod> {
-    fn align_indices(&mut self);
+#[derive(Clone)]
+pub struct OverAlignedVertexBuffer<V, I> {
+    pub buffer: VertexBuffers<V, I>,
+    pub usable_indices: u32,
 }

-impl<V: bytemuck::Pod, I: bytemuck::Pod> Align<V, I> for VertexBuffers<V, I> {
-    fn align_indices(&mut self) {
-        let alignment = wgpu::COPY_BUFFER_ALIGNMENT as usize / std::mem::size_of::<I>();
-        let padding = self.indices.len() % alignment;
-        if padding > 0 {
-            self.indices
-                .extend(std::iter::repeat(I::zeroed()).take(alignment - padding));
+impl<V: Pod, I: Pod> From<VertexBuffers<V, I>> for OverAlignedVertexBuffer<V, I> {
+    fn from(mut buffer: VertexBuffers<V, I>) -> Self {
+        let usable_indices = buffer.indices.len() as u32;
+        buffer.align_vertices();
+        buffer.align_indices();
+        Self {
+            buffer,
+            usable_indices,
+        }
+    }
+}
+
+trait Align<V: Pod, I: Pod> {
+    fn align_vertices(&mut self);
+    fn align_indices(&mut self);
+}
+
+impl<V: Pod, I: Pod> Align<V, I> for VertexBuffers<V, I> {
+    fn align_vertices(&mut self) {
+        let align = wgpu::COPY_BUFFER_ALIGNMENT;
+        let stride = std::mem::size_of::<GpuVertexUniform>() as BufferAddress;
+        let unpadded_bytes = self.vertices.len() as BufferAddress * stride;
+        let padding_bytes = (align - unpadded_bytes % align) % align;
+
+        if padding_bytes != 0 {
+            panic!(
+                "vertices are always aligned to wgpu::COPY_BUFFER_ALIGNMENT \
+                    because GpuVertexUniform is aligned"
+            )
+        }
+    }
+
+    fn align_indices(&mut self) {
+        let align = wgpu::COPY_BUFFER_ALIGNMENT;
+        let stride = std::mem::size_of::<I>() as BufferAddress;
+        let unpadded_bytes = self.indices.len() as BufferAddress * stride;
+        let padding_bytes = (align - unpadded_bytes % align) % align;
+        let overpad = (padding_bytes + stride - 1) / stride; // Divide by stride but round up
+
+        for _ in 0..overpad {
+            self.indices.push(I::zeroed());
        }
    }
 }
--- a/src/tesselation/tile.rs
+++ b/src/tesselation/tile.rs
@ -1,5 +1,6 @@
-use std::ops::Range;
+use std::ops::{Add, Range};

+use bytemuck::Pod;
 use lyon::lyon_tessellation::LineJoin;
 use lyon::tessellation;
 use lyon::tessellation::geometry_builder::MaxIndex;
@ -14,7 +15,7 @@ use vector_tile::geometry::{Command, Geometry};
 use vector_tile::tile::Tile;

 use crate::render::shader_ffi::GpuVertexUniform;
-use crate::tesselation::{Align, IndexDataType, Tesselated, VertexConstructor, DEFAULT_TOLERANCE};
+use crate::tesselation::{IndexDataType, Tesselated, VertexConstructor, DEFAULT_TOLERANCE};

 fn build_path(tile: &Tile, fill: bool) -> Path {
    let mut tile_builder = Path::builder().with_svg();
@ -85,54 +86,38 @@ fn build_path(tile: &Tile, fill: bool) -> Path {
    tile_builder.build()
 }

-impl<
-        OutputIndex: std::ops::Add
-            + std::convert::From<lyon::lyon_tessellation::VertexId>
-            + MaxIndex
-            + bytemuck::Pod,
-    > Tesselated<OutputIndex> for Tile
-{
-    fn tesselate_stroke(
-        &self,
-        buffer: &mut VertexBuffers<GpuVertexUniform, OutputIndex>,
-    ) -> Range<IndexDataType> {
+impl<I: Add + From<lyon::lyon_tessellation::VertexId> + MaxIndex + Pod> Tesselated<I> for Tile {
+    fn tesselate_stroke(&self) -> VertexBuffers<GpuVertexUniform, I> {
+        let mut buffer: VertexBuffers<GpuVertexUniform, I> = VertexBuffers::new();
        let mut tesselator = StrokeTessellator::new();

-        let initial_indices_count = buffer.indices.len();
-
        let tile_path = build_path(self, false);

        tesselator
            .tessellate_path(
                &tile_path,
                &StrokeOptions::tolerance(DEFAULT_TOLERANCE),
-                &mut BuffersBuilder::new(buffer, VertexConstructor()),
+                &mut BuffersBuilder::new(&mut buffer, VertexConstructor()),
            )
            .unwrap();

-        buffer.align_indices();
-
-        initial_indices_count as IndexDataType..buffer.indices.len() as IndexDataType
+        buffer
    }

-    fn tesselate_fill(
-        &self,
-        buffer: &mut VertexBuffers<GpuVertexUniform, OutputIndex>,
-    ) -> Range<IndexDataType> {
+    fn tesselate_fill(&self) -> VertexBuffers<GpuVertexUniform, I> {
+        let mut buffer: VertexBuffers<GpuVertexUniform, I> = VertexBuffers::new();
        let mut tesselator = FillTessellator::new();

-        let initial_indices_count = buffer.indices.len();
-
        let tile_path = build_path(self, true);

        tesselator
            .tessellate_path(
                &tile_path,
                &FillOptions::tolerance(DEFAULT_TOLERANCE),
-                &mut BuffersBuilder::new(buffer, VertexConstructor()),
+                &mut BuffersBuilder::new(&mut buffer, VertexConstructor()),
            )
            .unwrap();

-        initial_indices_count as IndexDataType..buffer.indices.len() as IndexDataType
+        buffer
    }
 }