Optimize dot4{I, U}8Packed for all spv versions

Emit optimized code for `dot4{I, U}8Packed` regardless of SPIR-V version as long as the required capabilities are available. On SPIR-V < 1.6, require the extension "SPV_KHR_integer_dot_product" for this. On SPIR-V >= 1.6, don't require the extension because the corresponding capabilities are part of SPIR-V >= 1.6 proper.
2025-12-08 21:26:17 +00:00 · 2025-04-27 15:37:47 +02:00 · 2025-04-27 15:37:47 +02:00 · bb83976ddb
commit bb83976ddb
parent 065d6546c4
11 changed files with 122 additions and 108 deletions
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@ -1143,17 +1143,21 @@ impl BlockContext<'_> {
                        ),
                    },
                    fun @ (Mf::Dot4I8Packed | Mf::Dot4U8Packed) => {
-                        if self.writer.lang_version() >= (1, 6)
+                        if self
-                            && self
+                            .writer
-                                .writer
+                            .require_all(&[
-                                .require_all(&[
+                                spirv::Capability::DotProduct,
-                                    spirv::Capability::DotProduct,
+                                spirv::Capability::DotProductInput4x8BitPacked,
-                                    spirv::Capability::DotProductInput4x8BitPacked,
+                            ])
-                                ])
+                            .is_ok()
                                .is_ok()
                        {
                            // Write optimized code using `PackedVectorFormat4x8Bit`.
-                            self.writer.use_extension("SPV_KHR_integer_dot_product");
+                            if self.writer.lang_version() < (1, 6) {
                                // SPIR-V 1.6 supports the required capabilities natively, so the extension
                                // is only required for earlier versions. See right column of
                                // <https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpSDot>.
                                self.writer.use_extension("SPV_KHR_integer_dot_product");
                            }
                            let op = match fun {
                                Mf::Dot4I8Packed => spirv::Op::SDot,
--- a/naga/tests/in/wgsl/functions-optimized-by-capability.toml
+++ b/naga/tests/in/wgsl/functions-optimized-by-capability.toml
@ -0,0 +1,9 @@
 # Turn on optimizations for `dot4I8Packed` and `dot4U8Packed` by enabling the
 # required capabilities on a SPIR-V version where these capabilities are only
 # available via the extension "SPV_KHR_integer_dot_product".
 targets = "SPIRV"
 [spv]
 capabilities = ["DotProduct", "DotProductInput4x8BitPacked"]
 version = [1, 0]
--- a/naga/tests/in/wgsl/functions-optimized-by-capability.wgsl
+++ b/naga/tests/in/wgsl/functions-optimized-by-capability.wgsl
--- a/naga/tests/in/wgsl/functions-optimized-by-version.toml
+++ b/naga/tests/in/wgsl/functions-optimized-by-version.toml
@ -0,0 +1,12 @@
 # Turn on optimizations for `dot4I8Packed` and `dot4U8Packed` on SPIR-V and HLSL by
 # using a version of SPIR-V / shader model that supports these without any extensions.
 targets = "SPIRV | HLSL"
 [spv]
 # We also need to provide the corresponding capabilities (which are part of SPIR-V >= 1.6).
 capabilities = ["DotProduct", "DotProductInput4x8BitPacked"]
 version = [1, 6]
 [hlsl]
 shader_model = "V6_4"
--- a/naga/tests/in/wgsl/functions-optimized-by-version.wgsl
+++ b/naga/tests/in/wgsl/functions-optimized-by-version.wgsl
@ -0,0 +1,19 @@
 fn test_packed_integer_dot_product() -> u32 {
    let a_5 = 1u;
    let b_5 = 2u;
    let c_5: i32 = dot4I8Packed(a_5, b_5);
    let a_6 = 3u;
    let b_6 = 4u;
    let c_6: u32 = dot4U8Packed(a_6, b_6);
    // test baking of arguments
    let c_7: i32 = dot4I8Packed(5u + c_6, 6u + c_6);
    let c_8: u32 = dot4U8Packed(7u + c_6, 8u + c_6);
    return c_8;
 }
@compute @workgroup_size(1)
 fn main() {
    let c = test_packed_integer_dot_product();
 }
--- a/naga/tests/in/wgsl/functions-optimized.toml
+++ b/naga/tests/in/wgsl/functions-optimized.toml
@ -1,11 +0,0 @@
 # Explicitly turn on optimizations for `dot4I8Packed` and `dot4U8Packed`
 # on SPIRV and HLSL.
 targets = "SPIRV | HLSL"
 [spv]
 capabilities = ["DotProduct", "DotProductInput4x8BitPacked"]
 version = [1, 6]
 [hlsl]
 shader_model = "V6_4"
--- a/naga/tests/out/hlsl/wgsl-functions-optimized-by-version.hlsl
+++ b/naga/tests/out/hlsl/wgsl-functions-optimized-by-version.hlsl
--- a/naga/tests/out/hlsl/wgsl-functions-optimized-by-version.ron
+++ b/naga/tests/out/hlsl/wgsl-functions-optimized-by-version.ron
--- a/naga/tests/out/spv/wgsl-functions-optimized-by-capability.spvasm
+++ b/naga/tests/out/spv/wgsl-functions-optimized-by-capability.spvasm
@ -1,5 +1,5 @@
 ; SPIR-V
-; Version: 1.6
+; Version: 1.0
 ; Generator: rspirv
 ; Bound: 30
 OpCapability Shader
--- a/naga/tests/out/spv/wgsl-functions-optimized-by-version.spvasm
+++ b/naga/tests/out/spv/wgsl-functions-optimized-by-version.spvasm
@ -0,0 +1,45 @@
 ; SPIR-V
 ; Version: 1.6
 ; Generator: rspirv
 ; Bound: 30
 OpCapability Shader
 OpCapability DotProductKHR
 OpCapability DotProductInput4x8BitPackedKHR
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
 OpEntryPoint GLCompute %26 "main"
 OpExecutionMode %26 LocalSize 1 1 1
 %2 = OpTypeVoid
 %3 = OpTypeInt 32 0
 %6 = OpTypeFunction %3
 %7 = OpConstant  %3  1
 %8 = OpConstant  %3  2
 %9 = OpConstant  %3  3
 %10 = OpConstant  %3  4
 %11 = OpConstant  %3  5
 %12 = OpConstant  %3  6
 %13 = OpConstant  %3  7
 %14 = OpConstant  %3  8
 %16 = OpTypeInt 32 1
 %27 = OpTypeFunction %2
 %5 = OpFunction  %3  None %6
 %4 = OpLabel
 OpBranch %15
 %15 = OpLabel
 %17 = OpSDotKHR  %16  %7 %8 PackedVectorFormat4x8BitKHR
 %18 = OpUDotKHR  %3  %9 %10 PackedVectorFormat4x8BitKHR
 %19 = OpIAdd  %3  %11 %18
 %20 = OpIAdd  %3  %12 %18
 %21 = OpSDotKHR  %16  %19 %20 PackedVectorFormat4x8BitKHR
 %22 = OpIAdd  %3  %13 %18
 %23 = OpIAdd  %3  %14 %18
 %24 = OpUDotKHR  %3  %22 %23 PackedVectorFormat4x8BitKHR
 OpReturnValue %24
 OpFunctionEnd
 %26 = OpFunction  %2  None %27
 %25 = OpLabel
 OpBranch %28
 %28 = OpLabel
 %29 = OpFunctionCall  %3  %5
 OpReturn
 OpFunctionEnd
--- a/naga/tests/out/spv/wgsl-functions.spvasm
+++ b/naga/tests/out/spv/wgsl-functions.spvasm
@ -1,12 +1,15 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 162
+; Bound: 95
 OpCapability Shader
 OpCapability DotProductKHR
 OpCapability DotProductInput4x8BitPackedKHR
 OpExtension "SPV_KHR_integer_dot_product"
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
-OpEntryPoint GLCompute %156 "main"
+OpEntryPoint GLCompute %89 "main"
-OpExecutionMode %156 LocalSize 1 1 1
+OpExecutionMode %89 LocalSize 1 1 1
 %2 = OpTypeVoid
 %4 = OpTypeFloat 32
 %3 = OpTypeVector %4 2
@ -39,10 +42,7 @@ OpExecutionMode %156 LocalSize 1 1 1
 %76 = OpConstant  %6  6
 %77 = OpConstant  %6  7
 %78 = OpConstant  %6  8
-%83 = OpConstant  %6  0
+%90 = OpTypeFunction %2
 %84 = OpConstant  %6  16
 %85 = OpConstant  %6  24
 %157 = OpTypeFunction %2
 %8 = OpFunction  %3  None %9
 %7 = OpLabel
 OpBranch %14
@ -96,86 +96,22 @@ OpFunctionEnd
 %69 = OpLabel
 OpBranch %79
 %79 = OpLabel
-%81 = OpBitcast  %5  %22
+%80 = OpSDotKHR  %5  %22 %72 PackedVectorFormat4x8BitKHR
-%82 = OpBitcast  %5  %72
+%81 = OpUDotKHR  %6  %73 %74 PackedVectorFormat4x8BitKHR
-%86 = OpBitFieldSExtract  %5  %81 %83 %78
+%82 = OpIAdd  %6  %75 %81
-%87 = OpBitFieldSExtract  %5  %82 %83 %78
+%83 = OpIAdd  %6  %76 %81
-%88 = OpIMul  %5  %86 %87
+%84 = OpSDotKHR  %5  %82 %83 PackedVectorFormat4x8BitKHR
-%89 = OpIAdd  %5  %32 %88
+%85 = OpIAdd  %6  %77 %81
-%90 = OpBitFieldSExtract  %5  %81 %78 %78
+%86 = OpIAdd  %6  %78 %81
-%91 = OpBitFieldSExtract  %5  %82 %78 %78
+%87 = OpUDotKHR  %6  %85 %86 PackedVectorFormat4x8BitKHR
-%92 = OpIMul  %5  %90 %91
+OpReturnValue %87
 %93 = OpIAdd  %5  %89 %92
 %94 = OpBitFieldSExtract  %5  %81 %84 %78
 %95 = OpBitFieldSExtract  %5  %82 %84 %78
 %96 = OpIMul  %5  %94 %95
 %97 = OpIAdd  %5  %93 %96
 %98 = OpBitFieldSExtract  %5  %81 %85 %78
 %99 = OpBitFieldSExtract  %5  %82 %85 %78
 %100 = OpIMul  %5  %98 %99
 %80 = OpIAdd  %5  %97 %100
 %102 = OpBitFieldUExtract  %6  %73 %83 %78
 %103 = OpBitFieldUExtract  %6  %74 %83 %78
 %104 = OpIMul  %6  %102 %103
 %105 = OpIAdd  %6  %41 %104
 %106 = OpBitFieldUExtract  %6  %73 %78 %78
 %107 = OpBitFieldUExtract  %6  %74 %78 %78
 %108 = OpIMul  %6  %106 %107
 %109 = OpIAdd  %6  %105 %108
 %110 = OpBitFieldUExtract  %6  %73 %84 %78
 %111 = OpBitFieldUExtract  %6  %74 %84 %78
 %112 = OpIMul  %6  %110 %111
 %113 = OpIAdd  %6  %109 %112
 %114 = OpBitFieldUExtract  %6  %73 %85 %78
 %115 = OpBitFieldUExtract  %6  %74 %85 %78
 %116 = OpIMul  %6  %114 %115
 %101 = OpIAdd  %6  %113 %116
 %117 = OpIAdd  %6  %75 %101
 %118 = OpIAdd  %6  %76 %101
 %120 = OpBitcast  %5  %117
 %121 = OpBitcast  %5  %118
 %122 = OpBitFieldSExtract  %5  %120 %83 %78
 %123 = OpBitFieldSExtract  %5  %121 %83 %78
 %124 = OpIMul  %5  %122 %123
 %125 = OpIAdd  %5  %32 %124
 %126 = OpBitFieldSExtract  %5  %120 %78 %78
 %127 = OpBitFieldSExtract  %5  %121 %78 %78
 %128 = OpIMul  %5  %126 %127
 %129 = OpIAdd  %5  %125 %128
 %130 = OpBitFieldSExtract  %5  %120 %84 %78
 %131 = OpBitFieldSExtract  %5  %121 %84 %78
 %132 = OpIMul  %5  %130 %131
 %133 = OpIAdd  %5  %129 %132
 %134 = OpBitFieldSExtract  %5  %120 %85 %78
 %135 = OpBitFieldSExtract  %5  %121 %85 %78
 %136 = OpIMul  %5  %134 %135
 %119 = OpIAdd  %5  %133 %136
 %137 = OpIAdd  %6  %77 %101
 %138 = OpIAdd  %6  %78 %101
 %140 = OpBitFieldUExtract  %6  %137 %83 %78
 %141 = OpBitFieldUExtract  %6  %138 %83 %78
 %142 = OpIMul  %6  %140 %141
 %143 = OpIAdd  %6  %41 %142
 %144 = OpBitFieldUExtract  %6  %137 %78 %78
 %145 = OpBitFieldUExtract  %6  %138 %78 %78
 %146 = OpIMul  %6  %144 %145
 %147 = OpIAdd  %6  %143 %146
 %148 = OpBitFieldUExtract  %6  %137 %84 %78
 %149 = OpBitFieldUExtract  %6  %138 %84 %78
 %150 = OpIMul  %6  %148 %149
 %151 = OpIAdd  %6  %147 %150
 %152 = OpBitFieldUExtract  %6  %137 %85 %78
 %153 = OpBitFieldUExtract  %6  %138 %85 %78
 %154 = OpIMul  %6  %152 %153
 %139 = OpIAdd  %6  %151 %154
 OpReturnValue %139
 OpFunctionEnd
-%156 = OpFunction  %2  None %157
+%89 = OpFunction  %2  None %90
-%155 = OpLabel
+%88 = OpLabel
-OpBranch %158
+OpBranch %91
-%158 = OpLabel
+%91 = OpLabel
-%159 = OpFunctionCall  %3  %8
+%92 = OpFunctionCall  %3  %8
-%160 = OpFunctionCall  %5  %17
+%93 = OpFunctionCall  %5  %17
-%161 = OpFunctionCall  %6  %70
+%94 = OpFunctionCall  %6  %70
 OpReturn
 OpFunctionEnd