[naga] Vectorize [un]pack4x{I, U}8[Clamp] on spv

Emits vectorized SPIR-V code for the WGSL functions `unpack4xI8`, `unpack4xU8`, `pack4xI8`, `pack4xU8`, `pack4xI8Clamp`, and `pack4xU8Clamp` if `Capability::Int8` is available. Exploits the following facts about SPIR-V ops: - `SClamp`, `UClamp`, and `OpUConvert` accept vector arguments, in which case results are computed per component; and - `OpBitcast` can cast between vectors and scalars, with a well-defined bit order that matches that required by the WGSL spec, see below. WGSL spec for `pack4xI8` [1]: > Component e[i] of the input is mapped to bits 8 x i through 8 x i + 7 > of the result. SPIR-V spec for `OpBitcast` [2]: > Within this mapping, any single component of `S` [remark: the type > with fewer but wider components] (mapping to multiple components of > `L` [remark: the type with more but narrower components]) maps its > lower-ordered bits to the lower-numbered components of `L`. [1] https://www.w3.org/TR/WGSL/#pack4xI8-builtin [2] https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast
2025-12-08 21:26:17 +00:00 · 2025-05-03 13:27:19 +02:00 · 2025-05-03 13:27:19 +02:00 · b32eb4a120
commit b32eb4a120
parent 0997b99429
3 changed files with 545 additions and 416 deletions
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@ -1552,105 +1552,29 @@ impl BlockContext<'_> {
                    Mf::Pack2x16unorm => MathOp::Ext(spirv::GLOp::PackUnorm2x16),
                    Mf::Pack2x16snorm => MathOp::Ext(spirv::GLOp::PackSnorm2x16),
                    fun @ (Mf::Pack4xI8 | Mf::Pack4xU8 | Mf::Pack4xI8Clamp | Mf::Pack4xU8Clamp) => {
-                        let (int_type, is_signed) = match fun {
+                        let is_signed = matches!(fun, Mf::Pack4xI8 | Mf::Pack4xI8Clamp);
                            Mf::Pack4xI8 | Mf::Pack4xI8Clamp => (crate::ScalarKind::Sint, true),
                            Mf::Pack4xU8 | Mf::Pack4xU8Clamp => (crate::ScalarKind::Uint, false),
                            _ => unreachable!(),
                        };
                        let should_clamp = matches!(fun, Mf::Pack4xI8Clamp | Mf::Pack4xU8Clamp);
                        let uint_type_id =
                            self.get_numeric_type_id(NumericType::Scalar(crate::Scalar::U32));
-                        let int_type_id =
+                        let last_instruction =
-                            self.get_numeric_type_id(NumericType::Scalar(crate::Scalar {
+                            if self.writer.require_all(&[spirv::Capability::Int8]).is_ok() {
-                                kind: int_type,
+                                self.write_pack4x8_optimized(
-                                width: 4,
+                                    block,
                            }));
                        let mut last_instruction = Instruction::new(spirv::Op::Nop);
                        let zero = self.writer.get_constant_scalar(crate::Literal::U32(0));
                        let mut preresult = zero;
                        block
                            .body
                            .reserve(usize::from(VEC_LENGTH) * (2 + usize::from(is_signed)));
                        let eight = self.writer.get_constant_scalar(crate::Literal::U32(8));
                        const VEC_LENGTH: u8 = 4;
                        for i in 0..u32::from(VEC_LENGTH) {
                            let offset =
                                self.writer.get_constant_scalar(crate::Literal::U32(i * 8));
                            let mut extracted = self.gen_id();
                            block.body.push(Instruction::binary(
                                spirv::Op::CompositeExtract,
                                int_type_id,
                                extracted,
                                arg0_id,
                                i,
                            ));
                            if is_signed {
                                let casted = self.gen_id();
                                block.body.push(Instruction::unary(
                                    spirv::Op::Bitcast,
                                    uint_type_id,
                                    casted,
                                    extracted,
                                ));
                                extracted = casted;
                            }
                            if should_clamp {
                                let (min, max, clamp_op) = if is_signed {
                                    (
                                        crate::Literal::I32(-128),
                                        crate::Literal::I32(127),
                                        spirv::GLOp::SClamp,
                                    )
                                } else {
                                    (
                                        crate::Literal::U32(0),
                                        crate::Literal::U32(255),
                                        spirv::GLOp::UClamp,
                                    )
                                };
                                let [min, max] =
                                    [min, max].map(|lit| self.writer.get_constant_scalar(lit));
                                let clamp_id = self.gen_id();
                                block.body.push(Instruction::ext_inst(
                                    self.writer.gl450_ext_inst_id,
                                    clamp_op,
                                    result_type_id,
                                    clamp_id,
                                    &[extracted, min, max],
                                ));
                                extracted = clamp_id;
                            }
                            let is_last = i == u32::from(VEC_LENGTH - 1);
                            if is_last {
                                last_instruction = Instruction::quaternary(
                                    spirv::Op::BitFieldInsert,
                                    result_type_id,
                                    arg0_id,
                                    id,
-                                    preresult,
+                                    is_signed,
-                                    extracted,
+                                    should_clamp,
                                    offset,
                                    eight,
                                )
                            } else {
-                                let new_preresult = self.gen_id();
+                                self.write_pack4x8_polyfill(
-                                block.body.push(Instruction::quaternary(
+                                    block,
                                    spirv::Op::BitFieldInsert,
                                    result_type_id,
-                                    new_preresult,
+                                    arg0_id,
-                                    preresult,
+                                    id,
-                                    extracted,
+                                    is_signed,
-                                    offset,
+                                    should_clamp,
-                                    eight,
+                                )
-                                ));
+                            };
                                preresult = new_preresult;
                            }
                        }
                        MathOp::Custom(last_instruction)
                    }
@ -1660,59 +1584,28 @@ impl BlockContext<'_> {
                    Mf::Unpack2x16unorm => MathOp::Ext(spirv::GLOp::UnpackUnorm2x16),
                    Mf::Unpack2x16snorm => MathOp::Ext(spirv::GLOp::UnpackSnorm2x16),
                    fun @ (Mf::Unpack4xI8 | Mf::Unpack4xU8) => {
-                        let (int_type, extract_op, is_signed) = match fun {
+                        let is_signed = matches!(fun, Mf::Unpack4xI8);
                            Mf::Unpack4xI8 => {
                                (crate::ScalarKind::Sint, spirv::Op::BitFieldSExtract, true)
                            }
                            Mf::Unpack4xU8 => {
                                (crate::ScalarKind::Uint, spirv::Op::BitFieldUExtract, false)
                            }
                            _ => unreachable!(),
                        };
-                        let sint_type_id =
+                        let last_instruction =
-                            self.get_numeric_type_id(NumericType::Scalar(crate::Scalar::I32));
+                            if self.writer.require_all(&[spirv::Capability::Int8]).is_ok() {
                                self.write_unpack4x8_optimized(
                                    block,
                                    result_type_id,
                                    arg0_id,
                                    id,
                                    is_signed,
                                )
                            } else {
                                self.write_unpack4x8_polyfill(
                                    block,
                                    result_type_id,
                                    arg0_id,
                                    id,
                                    is_signed,
                                )
                            };
-                        let eight = self.writer.get_constant_scalar(crate::Literal::U32(8));
+                        MathOp::Custom(last_instruction)
                        let int_type_id =
                            self.get_numeric_type_id(NumericType::Scalar(crate::Scalar {
                                kind: int_type,
                                width: 4,
                            }));
                        block
                            .body
                            .reserve(usize::from(VEC_LENGTH) * 2 + usize::from(is_signed));
                        let arg_id = if is_signed {
                            let new_arg_id = self.gen_id();
                            block.body.push(Instruction::unary(
                                spirv::Op::Bitcast,
                                sint_type_id,
                                new_arg_id,
                                arg0_id,
                            ));
                            new_arg_id
                        } else {
                            arg0_id
                        };
                        const VEC_LENGTH: u8 = 4;
                        let parts: [_; VEC_LENGTH as usize] =
                            core::array::from_fn(|_| self.gen_id());
                        for (i, part_id) in parts.into_iter().enumerate() {
                            let index = self
                                .writer
                                .get_constant_scalar(crate::Literal::U32(i as u32 * 8));
                            block.body.push(Instruction::ternary(
                                extract_op,
                                int_type_id,
                                part_id,
                                arg_id,
                                index,
                                eight,
                            ));
                        }
                        MathOp::Custom(Instruction::composite_construct(result_type_id, id, &parts))
                    }
                };
@ -2721,6 +2614,288 @@ impl BlockContext<'_> {
        }
    }
    /// Emit code for `pack4x{I,U}8[Clamp]` if capability "Int8" is available.
    fn write_pack4x8_optimized(
        &mut self,
        block: &mut Block,
        result_type_id: u32,
        arg0_id: u32,
        id: u32,
        is_signed: bool,
        should_clamp: bool,
    ) -> Instruction {
        let int_type = if is_signed {
            crate::ScalarKind::Sint
        } else {
            crate::ScalarKind::Uint
        };
        let wide_vector_type = NumericType::Vector {
            size: crate::VectorSize::Quad,
            scalar: crate::Scalar {
                kind: int_type,
                width: 4,
            },
        };
        let wide_vector_type_id = self.get_numeric_type_id(wide_vector_type);
        let packed_vector_type_id = self.get_numeric_type_id(NumericType::Vector {
            size: crate::VectorSize::Quad,
            scalar: crate::Scalar {
                kind: crate::ScalarKind::Uint,
                width: 1,
            },
        });
        let mut wide_vector = arg0_id;
        if should_clamp {
            let (min, max, clamp_op) = if is_signed {
                (
                    crate::Literal::I32(-128),
                    crate::Literal::I32(127),
                    spirv::GLOp::SClamp,
                )
            } else {
                (
                    crate::Literal::U32(0),
                    crate::Literal::U32(255),
                    spirv::GLOp::UClamp,
                )
            };
            let [min, max] = [min, max].map(|lit| {
                let scalar = self.writer.get_constant_scalar(lit);
                self.writer.get_constant_composite(
                    LookupType::Local(LocalType::Numeric(wide_vector_type)),
                    &[scalar; 4],
                )
            });
            let clamp_id = self.gen_id();
            block.body.push(Instruction::ext_inst(
                self.writer.gl450_ext_inst_id,
                clamp_op,
                wide_vector_type_id,
                clamp_id,
                &[wide_vector, min, max],
            ));
            wide_vector = clamp_id;
        }
        let packed_vector = self.gen_id();
        block.body.push(Instruction::unary(
            spirv::Op::UConvert, // We truncate, so `UConvert` and `SConvert` behave identically.
            packed_vector_type_id,
            packed_vector,
            wide_vector,
        ));
        // The SPIR-V spec [1] defines the bit order for bit casting between a vector
        // and a scalar precisely as required by the WGSL spec [2].
        // [1]: https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast
        // [2]: https://www.w3.org/TR/WGSL/#pack4xI8-builtin
        Instruction::unary(spirv::Op::Bitcast, result_type_id, id, packed_vector)
    }
    /// Emit code for `pack4x{I,U}8[Clamp]` if capability "Int8" is not available.
    fn write_pack4x8_polyfill(
        &mut self,
        block: &mut Block,
        result_type_id: u32,
        arg0_id: u32,
        id: u32,
        is_signed: bool,
        should_clamp: bool,
    ) -> Instruction {
        let int_type = if is_signed {
            crate::ScalarKind::Sint
        } else {
            crate::ScalarKind::Uint
        };
        let uint_type_id = self.get_numeric_type_id(NumericType::Scalar(crate::Scalar::U32));
        let int_type_id = self.get_numeric_type_id(NumericType::Scalar(crate::Scalar {
            kind: int_type,
            width: 4,
        }));
        let mut last_instruction = Instruction::new(spirv::Op::Nop);
        let zero = self.writer.get_constant_scalar(crate::Literal::U32(0));
        let mut preresult = zero;
        block
            .body
            .reserve(usize::from(VEC_LENGTH) * (2 + usize::from(is_signed)));
        let eight = self.writer.get_constant_scalar(crate::Literal::U32(8));
        const VEC_LENGTH: u8 = 4;
        for i in 0..u32::from(VEC_LENGTH) {
            let offset = self.writer.get_constant_scalar(crate::Literal::U32(i * 8));
            let mut extracted = self.gen_id();
            block.body.push(Instruction::binary(
                spirv::Op::CompositeExtract,
                int_type_id,
                extracted,
                arg0_id,
                i,
            ));
            if is_signed {
                let casted = self.gen_id();
                block.body.push(Instruction::unary(
                    spirv::Op::Bitcast,
                    uint_type_id,
                    casted,
                    extracted,
                ));
                extracted = casted;
            }
            if should_clamp {
                let (min, max, clamp_op) = if is_signed {
                    (
                        crate::Literal::I32(-128),
                        crate::Literal::I32(127),
                        spirv::GLOp::SClamp,
                    )
                } else {
                    (
                        crate::Literal::U32(0),
                        crate::Literal::U32(255),
                        spirv::GLOp::UClamp,
                    )
                };
                let [min, max] = [min, max].map(|lit| self.writer.get_constant_scalar(lit));
                let clamp_id = self.gen_id();
                block.body.push(Instruction::ext_inst(
                    self.writer.gl450_ext_inst_id,
                    clamp_op,
                    result_type_id,
                    clamp_id,
                    &[extracted, min, max],
                ));
                extracted = clamp_id;
            }
            let is_last = i == u32::from(VEC_LENGTH - 1);
            if is_last {
                last_instruction = Instruction::quaternary(
                    spirv::Op::BitFieldInsert,
                    result_type_id,
                    id,
                    preresult,
                    extracted,
                    offset,
                    eight,
                )
            } else {
                let new_preresult = self.gen_id();
                block.body.push(Instruction::quaternary(
                    spirv::Op::BitFieldInsert,
                    result_type_id,
                    new_preresult,
                    preresult,
                    extracted,
                    offset,
                    eight,
                ));
                preresult = new_preresult;
            }
        }
        last_instruction
    }
    /// Emit code for `unpack4x{I,U}8` if capability "Int8" is available.
    fn write_unpack4x8_optimized(
        &mut self,
        block: &mut Block,
        result_type_id: u32,
        arg0_id: u32,
        id: u32,
        is_signed: bool,
    ) -> Instruction {
        let (int_type, convert_op) = if is_signed {
            (crate::ScalarKind::Sint, spirv::Op::SConvert)
        } else {
            (crate::ScalarKind::Uint, spirv::Op::UConvert)
        };
        let packed_vector_type_id = self.get_numeric_type_id(NumericType::Vector {
            size: crate::VectorSize::Quad,
            scalar: crate::Scalar {
                kind: int_type,
                width: 1,
            },
        });
        // The SPIR-V spec [1] defines the bit order for bit casting between a vector
        // and a scalar precisely as required by the WGSL spec [2].
        // [1]: https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast
        // [2]: https://www.w3.org/TR/WGSL/#pack4xI8-builtin
        let packed_vector = self.gen_id();
        block.body.push(Instruction::unary(
            spirv::Op::Bitcast,
            packed_vector_type_id,
            packed_vector,
            arg0_id,
        ));
        Instruction::unary(convert_op, result_type_id, id, packed_vector)
    }
    /// Emit code for `unpack4x{I,U}8` if capability "Int8" is not available.
    fn write_unpack4x8_polyfill(
        &mut self,
        block: &mut Block,
        result_type_id: u32,
        arg0_id: u32,
        id: u32,
        is_signed: bool,
    ) -> Instruction {
        let (int_type, extract_op) = if is_signed {
            (crate::ScalarKind::Sint, spirv::Op::BitFieldSExtract)
        } else {
            (crate::ScalarKind::Uint, spirv::Op::BitFieldUExtract)
        };
        let sint_type_id = self.get_numeric_type_id(NumericType::Scalar(crate::Scalar::I32));
        let eight = self.writer.get_constant_scalar(crate::Literal::U32(8));
        let int_type_id = self.get_numeric_type_id(NumericType::Scalar(crate::Scalar {
            kind: int_type,
            width: 4,
        }));
        block
            .body
            .reserve(usize::from(VEC_LENGTH) * 2 + usize::from(is_signed));
        let arg_id = if is_signed {
            let new_arg_id = self.gen_id();
            block.body.push(Instruction::unary(
                spirv::Op::Bitcast,
                sint_type_id,
                new_arg_id,
                arg0_id,
            ));
            new_arg_id
        } else {
            arg0_id
        };
        const VEC_LENGTH: u8 = 4;
        let parts: [_; VEC_LENGTH as usize] = core::array::from_fn(|_| self.gen_id());
        for (i, part_id) in parts.into_iter().enumerate() {
            let index = self
                .writer
                .get_constant_scalar(crate::Literal::U32(i as u32 * 8));
            block.body.push(Instruction::ternary(
                extract_op,
                int_type_id,
                part_id,
                arg_id,
                index,
                eight,
            ));
        }
        Instruction::composite_construct(result_type_id, id, &parts)
    }
    /// Generate one or more SPIR-V blocks for `naga_block`.
    ///
    /// Use `label_id` as the label for the SPIR-V entry point block.
--- a/naga/tests/out/spv/wgsl-6772-unpack-expr-accesses.spvasm
+++ b/naga/tests/out/spv/wgsl-6772-unpack-expr-accesses.spvasm
@ -1,8 +1,9 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 30
+; Bound: 23
 OpCapability Shader
 OpCapability Int8
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
 OpEntryPoint GLCompute %4 "main"
@ -14,27 +15,20 @@ OpExecutionMode %4 LocalSize 1 1 1
 %8 = OpTypeInt 32 0
 %9 = OpConstant  %8  12
 %11 = OpTypeVector %6 4
-%13 = OpConstant  %8  8
+%14 = OpTypeInt 8 1
-%19 = OpConstant  %8  0
+%13 = OpTypeVector %14 4
-%20 = OpConstant  %8  16
+%17 = OpTypeVector %8 4
-%21 = OpConstant  %8  24
+%20 = OpTypeInt 8 0
-%23 = OpTypeVector %8 4
+%19 = OpTypeVector %20 4
 %4 = OpFunction  %2  None %5
 %3 = OpLabel
 OpBranch %10
 %10 = OpLabel
-%14 = OpBitcast  %6  %9
+%15 = OpBitcast  %13  %9
-%15 = OpBitFieldSExtract  %6  %14 %19 %13
+%12 = OpSConvert  %11  %15
-%16 = OpBitFieldSExtract  %6  %14 %13 %13
+%16 = OpCompositeExtract  %6  %12 2
-%17 = OpBitFieldSExtract  %6  %14 %20 %13
+%21 = OpBitcast  %19  %9
-%18 = OpBitFieldSExtract  %6  %14 %21 %13
+%18 = OpUConvert  %17  %21
-%12 = OpCompositeConstruct  %11  %15 %16 %17 %18
+%22 = OpCompositeExtract  %8  %18 1
 %22 = OpCompositeExtract  %6  %12 2
 %25 = OpBitFieldUExtract  %8  %9 %19 %13
 %26 = OpBitFieldUExtract  %8  %9 %13 %13
 %27 = OpBitFieldUExtract  %8  %9 %20 %13
 %28 = OpBitFieldUExtract  %8  %9 %21 %13
 %24 = OpCompositeConstruct  %23  %25 %26 %27 %28
 %29 = OpCompositeExtract  %8  %24 1
 OpReturn
 OpFunctionEnd
--- a/naga/tests/out/spv/wgsl-bits.spvasm
+++ b/naga/tests/out/spv/wgsl-bits.spvasm
@ -1,8 +1,9 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 275
+; Bound: 234
 OpCapability Shader
 OpCapability Int8
 %1 = OpExtInstImport "GLSL.std.450"
 OpMemoryModel Logical GLSL450
 OpEntryPoint GLCompute %15 "main"
@ -43,13 +44,17 @@ OpExecutionMode %15 LocalSize 1 1 1
 %45 = OpTypePointer Function %10
 %47 = OpTypePointer Function %11
 %49 = OpTypePointer Function %13
-%63 = OpConstant  %7  8
+%64 = OpTypeInt 8 0
-%70 = OpConstant  %7  16
+%63 = OpTypeVector %64 4
-%74 = OpConstant  %7  24
+%71 = OpConstant  %3  -128
-%90 = OpConstant  %3  -128
+%72 = OpConstantComposite  %6  %71 %71 %71 %71
-%91 = OpConstant  %3  127
+%73 = OpConstant  %3  127
-%108 = OpConstant  %7  255
+%74 = OpConstantComposite  %6  %73 %73 %73 %73
-%145 = OpConstant  %7  32
+%79 = OpConstant  %7  255
 %80 = OpConstantComposite  %10  %79 %79 %79 %79
 %96 = OpTypeInt 8 1
 %95 = OpTypeVector %96 4
 %104 = OpConstant  %7  32
 %15 = OpFunction  %2  None %16
 %14 = OpLabel
 %48 = OpVariable  %49  Function %27
@ -80,260 +85,215 @@ OpStore %38 %58
 %60 = OpExtInst  %7  %1 PackHalf2x16 %59
 OpStore %38 %60
 %61 = OpLoad  %6  %36
-%64 = OpCompositeExtract  %3  %61 0
+%65 = OpUConvert  %63  %61
-%65 = OpBitcast  %7  %64
+%62 = OpBitcast  %7  %65
 %66 = OpBitFieldInsert  %7  %21 %65 %21 %63
 %67 = OpCompositeExtract  %3  %61 1
 %68 = OpBitcast  %7  %67
 %69 = OpBitFieldInsert  %7  %66 %68 %63 %63
 %71 = OpCompositeExtract  %3  %61 2
 %72 = OpBitcast  %7  %71
 %73 = OpBitFieldInsert  %7  %69 %72 %70 %63
 %75 = OpCompositeExtract  %3  %61 3
 %76 = OpBitcast  %7  %75
 %62 = OpBitFieldInsert  %7  %73 %76 %74 %63
 OpStore %38 %62
 %66 = OpLoad  %10  %44
 %68 = OpUConvert  %63  %66
 %67 = OpBitcast  %7  %68
 OpStore %38 %67
 %69 = OpLoad  %6  %36
 %75 = OpExtInst  %6  %1 SClamp %69 %72 %74
 %76 = OpUConvert  %63  %75
 %70 = OpBitcast  %7  %76
 OpStore %38 %70
 %77 = OpLoad  %10  %44
-%79 = OpCompositeExtract  %7  %77 0
+%81 = OpExtInst  %10  %1 UClamp %77 %24 %80
-%80 = OpBitFieldInsert  %7  %21 %79 %21 %63
+%82 = OpUConvert  %63  %81
-%81 = OpCompositeExtract  %7  %77 1
+%78 = OpBitcast  %7  %82
 %82 = OpBitFieldInsert  %7  %80 %81 %63 %63
 %83 = OpCompositeExtract  %7  %77 2
 %84 = OpBitFieldInsert  %7  %82 %83 %70 %63
 %85 = OpCompositeExtract  %7  %77 3
 %78 = OpBitFieldInsert  %7  %84 %85 %74 %63
 OpStore %38 %78
-%86 = OpLoad  %6  %36
+%83 = OpLoad  %7  %38
-%88 = OpCompositeExtract  %3  %86 0
+%84 = OpExtInst  %13  %1 UnpackSnorm4x8 %83
-%89 = OpBitcast  %7  %88
+OpStore %48 %84
-%92 = OpExtInst  %7  %1 SClamp %89 %90 %91
+%85 = OpLoad  %7  %38
-%93 = OpBitFieldInsert  %7  %21 %92 %21 %63
+%86 = OpExtInst  %13  %1 UnpackUnorm4x8 %85
-%94 = OpCompositeExtract  %3  %86 1
+OpStore %48 %86
-%95 = OpBitcast  %7  %94
+%87 = OpLoad  %7  %38
-%96 = OpExtInst  %7  %1 SClamp %95 %90 %91
+%88 = OpExtInst  %11  %1 UnpackSnorm2x16 %87
-%97 = OpBitFieldInsert  %7  %93 %96 %63 %63
+OpStore %46 %88
-%98 = OpCompositeExtract  %3  %86 2
+%89 = OpLoad  %7  %38
-%99 = OpBitcast  %7  %98
+%90 = OpExtInst  %11  %1 UnpackUnorm2x16 %89
-%100 = OpExtInst  %7  %1 SClamp %99 %90 %91
+OpStore %46 %90
-%101 = OpBitFieldInsert  %7  %97 %100 %70 %63
+%91 = OpLoad  %7  %38
-%102 = OpCompositeExtract  %3  %86 3
+%92 = OpExtInst  %11  %1 UnpackHalf2x16 %91
-%103 = OpBitcast  %7  %102
+OpStore %46 %92
-%104 = OpExtInst  %7  %1 SClamp %103 %90 %91
+%93 = OpLoad  %7  %38
-%87 = OpBitFieldInsert  %7  %101 %104 %74 %63
+%97 = OpBitcast  %95  %93
-OpStore %38 %87
+%94 = OpSConvert  %6  %97
-%105 = OpLoad  %10  %44
+OpStore %36 %94
-%107 = OpCompositeExtract  %7  %105 0
+%98 = OpLoad  %7  %38
-%109 = OpExtInst  %7  %1 UClamp %107 %21 %108
+%100 = OpBitcast  %63  %98
-%110 = OpBitFieldInsert  %7  %21 %109 %21 %63
+%99 = OpUConvert  %10  %100
-%111 = OpCompositeExtract  %7  %105 1
+OpStore %44 %99
-%112 = OpExtInst  %7  %1 UClamp %111 %21 %108
+%101 = OpLoad  %3  %30
-%113 = OpBitFieldInsert  %7  %110 %112 %63 %63
+%102 = OpLoad  %3  %30
-%114 = OpCompositeExtract  %7  %105 2
+%105 = OpExtInst  %7  %1 UMin %28 %104
-%115 = OpExtInst  %7  %1 UClamp %114 %21 %108
+%106 = OpISub  %7  %104 %105
-%116 = OpBitFieldInsert  %7  %113 %115 %70 %63
+%107 = OpExtInst  %7  %1 UMin %29 %106
-%117 = OpCompositeExtract  %7  %105 3
+%103 = OpBitFieldInsert  %3  %101 %102 %105 %107
-%118 = OpExtInst  %7  %1 UClamp %117 %21 %108
+OpStore %30 %103
-%106 = OpBitFieldInsert  %7  %116 %118 %74 %63
+%108 = OpLoad  %4  %32
-OpStore %38 %106
+%109 = OpLoad  %4  %32
-%119 = OpLoad  %7  %38
+%111 = OpExtInst  %7  %1 UMin %28 %104
-%120 = OpExtInst  %13  %1 UnpackSnorm4x8 %119
+%112 = OpISub  %7  %104 %111
-OpStore %48 %120
+%113 = OpExtInst  %7  %1 UMin %29 %112
-%121 = OpLoad  %7  %38
+%110 = OpBitFieldInsert  %4  %108 %109 %111 %113
-%122 = OpExtInst  %13  %1 UnpackUnorm4x8 %121
+OpStore %32 %110
-OpStore %48 %122
+%114 = OpLoad  %5  %34
-%123 = OpLoad  %7  %38
+%115 = OpLoad  %5  %34
-%124 = OpExtInst  %11  %1 UnpackSnorm2x16 %123
+%117 = OpExtInst  %7  %1 UMin %28 %104
-OpStore %46 %124
+%118 = OpISub  %7  %104 %117
-%125 = OpLoad  %7  %38
+%119 = OpExtInst  %7  %1 UMin %29 %118
-%126 = OpExtInst  %11  %1 UnpackUnorm2x16 %125
+%116 = OpBitFieldInsert  %5  %114 %115 %117 %119
-OpStore %46 %126
+OpStore %34 %116
 %120 = OpLoad  %6  %36
 %121 = OpLoad  %6  %36
 %123 = OpExtInst  %7  %1 UMin %28 %104
 %124 = OpISub  %7  %104 %123
 %125 = OpExtInst  %7  %1 UMin %29 %124
 %122 = OpBitFieldInsert  %6  %120 %121 %123 %125
 OpStore %36 %122
 %126 = OpLoad  %7  %38
 %127 = OpLoad  %7  %38
-%128 = OpExtInst  %11  %1 UnpackHalf2x16 %127
+%129 = OpExtInst  %7  %1 UMin %28 %104
-OpStore %46 %128
+%130 = OpISub  %7  %104 %129
-%129 = OpLoad  %7  %38
+%131 = OpExtInst  %7  %1 UMin %29 %130
-%131 = OpBitcast  %3  %129
+%128 = OpBitFieldInsert  %7  %126 %127 %129 %131
-%132 = OpBitFieldSExtract  %3  %131 %21 %63
+OpStore %38 %128
-%133 = OpBitFieldSExtract  %3  %131 %63 %63
+%132 = OpLoad  %8  %40
-%134 = OpBitFieldSExtract  %3  %131 %70 %63
+%133 = OpLoad  %8  %40
-%135 = OpBitFieldSExtract  %3  %131 %74 %63
+%135 = OpExtInst  %7  %1 UMin %28 %104
-%130 = OpCompositeConstruct  %6  %132 %133 %134 %135
+%136 = OpISub  %7  %104 %135
-OpStore %36 %130
+%137 = OpExtInst  %7  %1 UMin %29 %136
-%136 = OpLoad  %7  %38
+%134 = OpBitFieldInsert  %8  %132 %133 %135 %137
-%138 = OpBitFieldUExtract  %7  %136 %21 %63
+OpStore %40 %134
-%139 = OpBitFieldUExtract  %7  %136 %63 %63
+%138 = OpLoad  %9  %42
-%140 = OpBitFieldUExtract  %7  %136 %70 %63
+%139 = OpLoad  %9  %42
-%141 = OpBitFieldUExtract  %7  %136 %74 %63
+%141 = OpExtInst  %7  %1 UMin %28 %104
-%137 = OpCompositeConstruct  %10  %138 %139 %140 %141
+%142 = OpISub  %7  %104 %141
-OpStore %44 %137
+%143 = OpExtInst  %7  %1 UMin %29 %142
-%142 = OpLoad  %3  %30
+%140 = OpBitFieldInsert  %9  %138 %139 %141 %143
-%143 = OpLoad  %3  %30
+OpStore %42 %140
-%146 = OpExtInst  %7  %1 UMin %28 %145
+%144 = OpLoad  %10  %44
-%147 = OpISub  %7  %145 %146
+%145 = OpLoad  %10  %44
-%148 = OpExtInst  %7  %1 UMin %29 %147
+%147 = OpExtInst  %7  %1 UMin %28 %104
-%144 = OpBitFieldInsert  %3  %142 %143 %146 %148
+%148 = OpISub  %7  %104 %147
-OpStore %30 %144
+%149 = OpExtInst  %7  %1 UMin %29 %148
-%149 = OpLoad  %4  %32
+%146 = OpBitFieldInsert  %10  %144 %145 %147 %149
-%150 = OpLoad  %4  %32
+OpStore %44 %146
-%152 = OpExtInst  %7  %1 UMin %28 %145
+%150 = OpLoad  %3  %30
-%153 = OpISub  %7  %145 %152
+%152 = OpExtInst  %7  %1 UMin %28 %104
 %153 = OpISub  %7  %104 %152
 %154 = OpExtInst  %7  %1 UMin %29 %153
-%151 = OpBitFieldInsert  %4  %149 %150 %152 %154
+%151 = OpBitFieldSExtract  %3  %150 %152 %154
-OpStore %32 %151
+OpStore %30 %151
-%155 = OpLoad  %5  %34
+%155 = OpLoad  %4  %32
-%156 = OpLoad  %5  %34
+%157 = OpExtInst  %7  %1 UMin %28 %104
-%158 = OpExtInst  %7  %1 UMin %28 %145
+%158 = OpISub  %7  %104 %157
-%159 = OpISub  %7  %145 %158
+%159 = OpExtInst  %7  %1 UMin %29 %158
-%160 = OpExtInst  %7  %1 UMin %29 %159
+%156 = OpBitFieldSExtract  %4  %155 %157 %159
-%157 = OpBitFieldInsert  %5  %155 %156 %158 %160
+OpStore %32 %156
-OpStore %34 %157
+%160 = OpLoad  %5  %34
-%161 = OpLoad  %6  %36
+%162 = OpExtInst  %7  %1 UMin %28 %104
-%162 = OpLoad  %6  %36
+%163 = OpISub  %7  %104 %162
-%164 = OpExtInst  %7  %1 UMin %28 %145
+%164 = OpExtInst  %7  %1 UMin %29 %163
-%165 = OpISub  %7  %145 %164
+%161 = OpBitFieldSExtract  %5  %160 %162 %164
-%166 = OpExtInst  %7  %1 UMin %29 %165
+OpStore %34 %161
-%163 = OpBitFieldInsert  %6  %161 %162 %164 %166
+%165 = OpLoad  %6  %36
-OpStore %36 %163
+%167 = OpExtInst  %7  %1 UMin %28 %104
-%167 = OpLoad  %7  %38
+%168 = OpISub  %7  %104 %167
-%168 = OpLoad  %7  %38
+%169 = OpExtInst  %7  %1 UMin %29 %168
-%170 = OpExtInst  %7  %1 UMin %28 %145
+%166 = OpBitFieldSExtract  %6  %165 %167 %169
-%171 = OpISub  %7  %145 %170
+OpStore %36 %166
-%172 = OpExtInst  %7  %1 UMin %29 %171
+%170 = OpLoad  %7  %38
-%169 = OpBitFieldInsert  %7  %167 %168 %170 %172
+%172 = OpExtInst  %7  %1 UMin %28 %104
-OpStore %38 %169
+%173 = OpISub  %7  %104 %172
-%173 = OpLoad  %8  %40
+%174 = OpExtInst  %7  %1 UMin %29 %173
-%174 = OpLoad  %8  %40
+%171 = OpBitFieldUExtract  %7  %170 %172 %174
-%176 = OpExtInst  %7  %1 UMin %28 %145
+OpStore %38 %171
-%177 = OpISub  %7  %145 %176
+%175 = OpLoad  %8  %40
-%178 = OpExtInst  %7  %1 UMin %29 %177
+%177 = OpExtInst  %7  %1 UMin %28 %104
-%175 = OpBitFieldInsert  %8  %173 %174 %176 %178
+%178 = OpISub  %7  %104 %177
-OpStore %40 %175
+%179 = OpExtInst  %7  %1 UMin %29 %178
-%179 = OpLoad  %9  %42
+%176 = OpBitFieldUExtract  %8  %175 %177 %179
 OpStore %40 %176
 %180 = OpLoad  %9  %42
-%182 = OpExtInst  %7  %1 UMin %28 %145
+%182 = OpExtInst  %7  %1 UMin %28 %104
-%183 = OpISub  %7  %145 %182
+%183 = OpISub  %7  %104 %182
 %184 = OpExtInst  %7  %1 UMin %29 %183
-%181 = OpBitFieldInsert  %9  %179 %180 %182 %184
+%181 = OpBitFieldUExtract  %9  %180 %182 %184
 OpStore %42 %181
 %185 = OpLoad  %10  %44
-%186 = OpLoad  %10  %44
+%187 = OpExtInst  %7  %1 UMin %28 %104
-%188 = OpExtInst  %7  %1 UMin %28 %145
+%188 = OpISub  %7  %104 %187
-%189 = OpISub  %7  %145 %188
+%189 = OpExtInst  %7  %1 UMin %29 %188
-%190 = OpExtInst  %7  %1 UMin %29 %189
+%186 = OpBitFieldUExtract  %10  %185 %187 %189
-%187 = OpBitFieldInsert  %10  %185 %186 %188 %190
+OpStore %44 %186
-OpStore %44 %187
+%190 = OpLoad  %3  %30
-%191 = OpLoad  %3  %30
+%191 = OpExtInst  %3  %1 FindILsb %190
-%193 = OpExtInst  %7  %1 UMin %28 %145
+OpStore %30 %191
-%194 = OpISub  %7  %145 %193
+%192 = OpLoad  %8  %40
-%195 = OpExtInst  %7  %1 UMin %29 %194
+%193 = OpExtInst  %8  %1 FindILsb %192
-%192 = OpBitFieldSExtract  %3  %191 %193 %195
+OpStore %40 %193
-OpStore %30 %192
+%194 = OpLoad  %5  %34
-%196 = OpLoad  %4  %32
+%195 = OpExtInst  %5  %1 FindSMsb %194
-%198 = OpExtInst  %7  %1 UMin %28 %145
+OpStore %34 %195
-%199 = OpISub  %7  %145 %198
+%196 = OpLoad  %9  %42
-%200 = OpExtInst  %7  %1 UMin %29 %199
+%197 = OpExtInst  %9  %1 FindUMsb %196
-%197 = OpBitFieldSExtract  %4  %196 %198 %200
+OpStore %42 %197
-OpStore %32 %197
+%198 = OpLoad  %3  %30
-%201 = OpLoad  %5  %34
+%199 = OpExtInst  %3  %1 FindSMsb %198
-%203 = OpExtInst  %7  %1 UMin %28 %145
+OpStore %30 %199
-%204 = OpISub  %7  %145 %203
+%200 = OpLoad  %7  %38
-%205 = OpExtInst  %7  %1 UMin %29 %204
+%201 = OpExtInst  %7  %1 FindUMsb %200
-%202 = OpBitFieldSExtract  %5  %201 %203 %205
+OpStore %38 %201
-OpStore %34 %202
+%202 = OpLoad  %3  %30
-%206 = OpLoad  %6  %36
+%203 = OpBitCount  %3  %202
-%208 = OpExtInst  %7  %1 UMin %28 %145
+OpStore %30 %203
-%209 = OpISub  %7  %145 %208
+%204 = OpLoad  %4  %32
-%210 = OpExtInst  %7  %1 UMin %29 %209
+%205 = OpBitCount  %4  %204
-%207 = OpBitFieldSExtract  %6  %206 %208 %210
+OpStore %32 %205
-OpStore %36 %207
+%206 = OpLoad  %5  %34
-%211 = OpLoad  %7  %38
+%207 = OpBitCount  %5  %206
-%213 = OpExtInst  %7  %1 UMin %28 %145
+OpStore %34 %207
-%214 = OpISub  %7  %145 %213
+%208 = OpLoad  %6  %36
-%215 = OpExtInst  %7  %1 UMin %29 %214
+%209 = OpBitCount  %6  %208
-%212 = OpBitFieldUExtract  %7  %211 %213 %215
+OpStore %36 %209
-OpStore %38 %212
+%210 = OpLoad  %7  %38
-%216 = OpLoad  %8  %40
+%211 = OpBitCount  %7  %210
-%218 = OpExtInst  %7  %1 UMin %28 %145
+OpStore %38 %211
-%219 = OpISub  %7  %145 %218
+%212 = OpLoad  %8  %40
-%220 = OpExtInst  %7  %1 UMin %29 %219
+%213 = OpBitCount  %8  %212
-%217 = OpBitFieldUExtract  %8  %216 %218 %220
+OpStore %40 %213
-OpStore %40 %217
+%214 = OpLoad  %9  %42
-%221 = OpLoad  %9  %42
+%215 = OpBitCount  %9  %214
-%223 = OpExtInst  %7  %1 UMin %28 %145
+OpStore %42 %215
-%224 = OpISub  %7  %145 %223
+%216 = OpLoad  %10  %44
-%225 = OpExtInst  %7  %1 UMin %29 %224
+%217 = OpBitCount  %10  %216
-%222 = OpBitFieldUExtract  %9  %221 %223 %225
+OpStore %44 %217
-OpStore %42 %222
+%218 = OpLoad  %3  %30
-%226 = OpLoad  %10  %44
+%219 = OpBitReverse  %3  %218
-%228 = OpExtInst  %7  %1 UMin %28 %145
+OpStore %30 %219
-%229 = OpISub  %7  %145 %228
+%220 = OpLoad  %4  %32
-%230 = OpExtInst  %7  %1 UMin %29 %229
+%221 = OpBitReverse  %4  %220
-%227 = OpBitFieldUExtract  %10  %226 %228 %230
+OpStore %32 %221
-OpStore %44 %227
+%222 = OpLoad  %5  %34
-%231 = OpLoad  %3  %30
+%223 = OpBitReverse  %5  %222
-%232 = OpExtInst  %3  %1 FindILsb %231
+OpStore %34 %223
-OpStore %30 %232
+%224 = OpLoad  %6  %36
-%233 = OpLoad  %8  %40
+%225 = OpBitReverse  %6  %224
-%234 = OpExtInst  %8  %1 FindILsb %233
+OpStore %36 %225
-OpStore %40 %234
+%226 = OpLoad  %7  %38
-%235 = OpLoad  %5  %34
+%227 = OpBitReverse  %7  %226
-%236 = OpExtInst  %5  %1 FindSMsb %235
+OpStore %38 %227
-OpStore %34 %236
+%228 = OpLoad  %8  %40
-%237 = OpLoad  %9  %42
+%229 = OpBitReverse  %8  %228
-%238 = OpExtInst  %9  %1 FindUMsb %237
+OpStore %40 %229
-OpStore %42 %238
+%230 = OpLoad  %9  %42
-%239 = OpLoad  %3  %30
+%231 = OpBitReverse  %9  %230
-%240 = OpExtInst  %3  %1 FindSMsb %239
+OpStore %42 %231
-OpStore %30 %240
+%232 = OpLoad  %10  %44
-%241 = OpLoad  %7  %38
+%233 = OpBitReverse  %10  %232
-%242 = OpExtInst  %7  %1 FindUMsb %241
+OpStore %44 %233
 OpStore %38 %242
 %243 = OpLoad  %3  %30
 %244 = OpBitCount  %3  %243
 OpStore %30 %244
 %245 = OpLoad  %4  %32
 %246 = OpBitCount  %4  %245
 OpStore %32 %246
 %247 = OpLoad  %5  %34
 %248 = OpBitCount  %5  %247
 OpStore %34 %248
 %249 = OpLoad  %6  %36
 %250 = OpBitCount  %6  %249
 OpStore %36 %250
 %251 = OpLoad  %7  %38
 %252 = OpBitCount  %7  %251
 OpStore %38 %252
 %253 = OpLoad  %8  %40
 %254 = OpBitCount  %8  %253
 OpStore %40 %254
 %255 = OpLoad  %9  %42
 %256 = OpBitCount  %9  %255
 OpStore %42 %256
 %257 = OpLoad  %10  %44
 %258 = OpBitCount  %10  %257
 OpStore %44 %258
 %259 = OpLoad  %3  %30
 %260 = OpBitReverse  %3  %259
 OpStore %30 %260
 %261 = OpLoad  %4  %32
 %262 = OpBitReverse  %4  %261
 OpStore %32 %262
 %263 = OpLoad  %5  %34
 %264 = OpBitReverse  %5  %263
 OpStore %34 %264
 %265 = OpLoad  %6  %36
 %266 = OpBitReverse  %6  %265
 OpStore %36 %266
 %267 = OpLoad  %7  %38
 %268 = OpBitReverse  %7  %267
 OpStore %38 %268
 %269 = OpLoad  %8  %40
 %270 = OpBitReverse  %8  %269
 OpStore %40 %270
 %271 = OpLoad  %9  %42
 %272 = OpBitReverse  %9  %271
 OpStore %42 %272
 %273 = OpLoad  %10  %44
 %274 = OpBitReverse  %10  %273
 OpStore %44 %274
 OpReturn
 OpFunctionEnd