diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py new file mode 100644 index 000000000..74a728b75 --- /dev/null +++ b/tools/hlsl_generator/gen.py @@ -0,0 +1,256 @@ +# TODO: OVERLOADS! Currently, we are generating multiple functions for the same function with different types. +# e.g. `groupNonUniformIAdd` and `groupNonUniformFAdd` can be simplifed to a single function named `groupNonUniformAdd` +# with multiple overloads. as an extra point, we can drop the requirement for templates and generate the type + +import json +import io +from enum import Enum +from argparse import ArgumentParser +import os +from typing import NamedTuple +from typing import Optional + +head = """#ifdef __HLSL_VERSION +#include "spirv/unified1/spirv.hpp" +#include "spirv/unified1/GLSL.std.450.h" +#endif + +#include "nbl/builtin/hlsl/type_traits.hlsl" + +namespace nbl +{ +namespace hlsl +{ +#ifdef __HLSL_VERSION +namespace spirv +{ + +//! General Decls +template +using pointer_t = vk::SpirvOpaqueType>, T>; + +// The holy operation that makes addrof possible +template +[[vk::ext_instruction(spv::OpCopyObject)]] +pointer_t copyObject([[vk::ext_reference]] T value); + +//! Std 450 Extended set operations +template +[[vk::ext_instruction(GLSLstd450MatrixInverse)]] +SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); + +// Add specializations if you need to emit a `ext_capability` (this means that the instruction needs to forward through an `impl::` struct and so on) +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpBitcast)]] +enable_if_t && is_spirv_type_v, T> bitcast(U); + +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpBitcast)]] +uint64_t bitcast(pointer_t); + +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpBitcast)]] +pointer_t bitcast(uint64_t); + +template +[[vk::ext_instruction(spv::OpBitcast)]] +T bitcast(U); +""" + +foot = """} + +#endif + } +} + +#endif +""" + +def gen(grammer_path, metadata_path, output_path): + grammer_raw = open(grammer_path, "r").read() + grammer = json.loads(grammer_raw) + del grammer_raw + + metadata_raw = open(metadata_path, "r").read() + metadata = json.loads(metadata_raw) + del metadata_raw + + output = open(output_path, "w", buffering=1024**2) + + builtins = [x for x in grammer["operand_kinds"] if x["kind"] == "BuiltIn"][0]["enumerants"] + execution_modes = [x for x in grammer["operand_kinds"] if x["kind"] == "ExecutionMode"][0]["enumerants"] + group_operations = [x for x in grammer["operand_kinds"] if x["kind"] == "GroupOperation"][0]["enumerants"] + + with output as writer: + writer.write(head) + + writer.write("\n//! Builtins\n") + for name in metadata["builtins"].keys(): + # Validate + builtin_exist = False + for b in builtins: + if b["enumerant"] == name: builtin_exist = True + + if (builtin_exist): + bm = metadata["builtins"][name] + is_mutable = "const" in bm.keys() and bm["mutable"] + writer.write("[[vk::ext_builtin_input(spv::BuiltIn" + name + ")]]\n") + writer.write("static " + ("" if is_mutable else "const ") + bm["type"] + " " + name + ";\n") + else: + raise Exception("Invalid builtin " + name) + + writer.write("\n//! Execution Modes\nnamespace execution_mode\n{") + for em in execution_modes: + name = em["enumerant"] + name_l = name[0].lower() + name[1:] + writer.write("\n\tvoid " + name_l + "()\n\t{\n\t\tvk::ext_execution_mode(spv::ExecutionMode" + name + ");\n\t}\n") + writer.write("}\n") + + writer.write("\n//! Group Operations\nnamespace group_operation\n{\n") + for go in group_operations: + name = go["enumerant"] + value = go["value"] + writer.write("\tstatic const uint32_t " + name + " = " + str(value) + ";\n") + writer.write("}\n") + + writer.write("\n//! Instructions\n") + for instruction in grammer["instructions"]: + match instruction["class"]: + case "Atomic": + match instruction["opname"]: + # integers operate on 2s complement so same op for signed and unsigned + case "OpAtomicIAdd" | "OpAtomicISub" | "OpAtomicIIncrement" | "OpAtomicIDecrement" | "OpAtomicAnd" | "OpAtomicOr" | "OpAtomicXor": + processInst(writer, instruction, InstOptions({"uint32_t", "int32_t"})) + processInst(writer, instruction, InstOptions({"uint32_t", "int32_t"}, Shape.PTR_TEMPLATE)) + processInst(writer, instruction, InstOptions({"uint64_t", "int64_t"})) + processInst(writer, instruction, InstOptions({"uint64_t", "int64_t"}, Shape.PTR_TEMPLATE)) + case "OpAtomicUMin" | "OpAtomicUMax": + processInst(writer, instruction, InstOptions({"uint32_t"})) + processInst(writer, instruction, InstOptions({"uint32_t"}, Shape.PTR_TEMPLATE)) + case "OpAtomicSMin" | "OpAtomicSMax": + processInst(writer, instruction, InstOptions({"int32_t"})) + processInst(writer, instruction, InstOptions({"int32_t"}, Shape.PTR_TEMPLATE)) + case "OpAtomicFMinEXT" | "OpAtomicFMaxEXT" | "OpAtomicFAddEXT": + processInst(writer, instruction, InstOptions({"float"})) + processInst(writer, instruction, InstOptions({"float"}, Shape.PTR_TEMPLATE)) + case _: + processInst(writer, instruction, InstOptions()) + processInst(writer, instruction, InstOptions({}, Shape.PTR_TEMPLATE)) + case "Memory": + processInst(writer, instruction, InstOptions({}, Shape.PTR_TEMPLATE)) + processInst(writer, instruction, InstOptions({}, Shape.PSB_RT)) + case "Barrier": + processInst(writer, instruction, InstOptions()) + case "Bit": + match instruction["opname"]: + case "OpBitFieldUExtract": + processInst(writer, instruction, InstOptions({"Unsigned"})) + case "OpBitFieldSExtract": + processInst(writer, instruction, InstOptions({"Signed"})) + case "OpBitFieldInsert": + processInst(writer, instruction, InstOptions({"Signed", "Unsigned"})) + case "Reserved": + match instruction["opname"]: + case "OpBeginInvocationInterlockEXT" | "OpEndInvocationInterlockEXT": + processInst(writer, instruction, InstOptions()) + case "Non-Uniform": + processInst(writer, instruction, InstOptions()) + case _: continue # TODO + + writer.write(foot) + +class Shape(Enum): + DEFAULT = 0, + PTR_TEMPLATE = 1, # TODO: this is a DXC Workaround + PSB_RT = 2, # PhysicalStorageBuffer Result Type + +class InstOptions(NamedTuple): + allowed_types: list = {} + shape: Shape = Shape.DEFAULT + +def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): + name = instruction["opname"] + + # Attributes + templates = ["typename T"] + conds = [] + result_ty = "void" + args = [] + + if options.shape == Shape.PTR_TEMPLATE: + templates.append("typename P") + + if options.shape == Shape.PTR_TEMPLATE: + conds.append("is_spirv_type_v

") + if len(options.allowed_types) > 0: + allowed_types_conds = [] + for at in options.allowed_types: + if at == "Signed": + allowed_types_conds.append("is_signed_v") + elif at == "Unsigned": + allowed_types_conds.append("is_unsigned_v") + else: + allowed_types_conds.append("is_same_v") + conds.append("(" + " || ".join(allowed_types_conds) + ")") + + if "operands" in instruction: + for operand in instruction["operands"]: + op_name = operand["name"].strip("'") if "name" in operand else None + op_name = op_name[0].lower() + op_name[1:] if (op_name != None) else "" + match operand["kind"]: + case "IdResultType" | "IdResult": + result_ty = "T" + case "IdRef": + match operand["name"]: + case "'Pointer'": + if options.shape == Shape.PTR_TEMPLATE: + args.append("P " + op_name) + elif options.shape == Shape.PSB_RT: + args.append("pointer_t " + op_name) + else: + args.append("[[vk::ext_reference]] T " + op_name) + case "'Value'" | "'Object'" | "'Comparator'" | "'Base'" | "'Insert'": + args.append("T " + op_name) + case "'Offset'" | "'Count'" | "'Id'" | "'Index'" | "'Mask'" | "'Delta'": + args.append("uint32_t " + op_name) + case "'Predicate'": args.append("bool " + op_name) + case "'ClusterSize'": + if "quantifier" in operand and operand["quantifier"] == "?": continue # TODO: overload + else: return # TODO + case _: return # TODO + case "IdScope": args.append("uint32_t " + op_name.lower() + "Scope") + case "IdMemorySemantics": args.append(" uint32_t " + op_name) + case "GroupOperation": args.append("[[vk::ext_literal]] uint32_t " + op_name) + case "MemoryAccess": + writeInst(writer, templates, name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) + writeInst(writer, templates, name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) + writeInst(writer, templates + ["uint32_t alignment"], name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) + case _: return # TODO + + writeInst(writer, templates, name, conds, result_ty, args) + + +def writeInst(writer: io.TextIOWrapper, templates, name, conds, result_ty, args): + fn_name = name[2].lower() + name[3:] + writer.write("template<" + ", ".join(templates) + ">\n[[vk::ext_instruction(spv::" + name + ")]]\n") + if len(conds) > 0: + writer.write("enable_if_t<" + " && ".join(conds) + ", " + result_ty + ">") + else: + writer.write(result_ty) + writer.write(" " + fn_name + "(" + ", ".join(args) + ");\n\n") + + +if __name__ == "__main__": + script_dir_path = os.path.abspath(os.path.dirname(__file__)) + + parser = ArgumentParser(description="Generate HLSL from SPIR-V instructions") + parser.add_argument("output", type=str, help="HLSL output file") + parser.add_argument("--grammer", required=False, type=str, help="Input SPIR-V grammer JSON file", default=os.path.join(script_dir_path, "../../include/spirv/unified1/spirv.core.grammar.json")) + parser.add_argument("--metadata", required=False, type=str, help="Input SPIR-V Instructions/BuiltIns type mapping/attributes/etc", default=os.path.join(script_dir_path, "metadata.json")) + args = parser.parse_args() + + gen(args.grammer, args.metadata, args.output) + diff --git a/tools/hlsl_generator/metadata.json b/tools/hlsl_generator/metadata.json new file mode 100644 index 000000000..b3ed59809 --- /dev/null +++ b/tools/hlsl_generator/metadata.json @@ -0,0 +1,66 @@ +{ + "builtins": { + "HelperInvocation": { + "type": "bool", + "mutable": true + }, + "Position": { + "type": "float32_t4" + }, + "VertexIndex": { + "type": "uint32_t", + "mutable": true + }, + "InstanceIndex": { + "type": "uint32_t", + "mutable": true + }, + "NumWorkgroups": { + "type": "uint32_t3", + "mutable": true + }, + "WorkgroupId": { + "type": "uint32_t3", + "mutable": true + }, + "LocalInvocationId": { + "type": "uint32_t3", + "mutable": true + }, + "GlobalInvocationId": { + "type": "uint32_t3", + "mutable": true + }, + "LocalInvocationIndex": { + "type": "uint32_t", + "mutable": true + }, + "SubgroupEqMask": { + "type": "uint32_t4" + }, + "SubgroupGeMask": { + "type": "uint32_t4" + }, + "SubgroupGtMask": { + "type": "uint32_t4" + }, + "SubgroupLeMask": { + "type": "uint32_t4" + }, + "SubgroupLtMask": { + "type": "uint32_t4" + }, + "SubgroupSize": { + "type": "uint32_t" + }, + "NumSubgroups": { + "type": "uint32_t" + }, + "SubgroupId": { + "type": "uint32_t" + }, + "SubgroupLocalInvocationId": { + "type": "uint32_t" + } + } +} \ No newline at end of file diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl new file mode 100644 index 000000000..60061f9a7 --- /dev/null +++ b/tools/hlsl_generator/out.hlsl @@ -0,0 +1,1062 @@ +#ifdef __HLSL_VERSION +#include "spirv/unified1/spirv.hpp" +#include "spirv/unified1/GLSL.std.450.h" +#endif + +#include "nbl/builtin/hlsl/type_traits.hlsl" + +namespace nbl +{ +namespace hlsl +{ +#ifdef __HLSL_VERSION +namespace spirv +{ + +//! General Decls +template +using pointer_t = vk::SpirvOpaqueType>, T>; + +// The holy operation that makes addrof possible +template +[[vk::ext_instruction(spv::OpCopyObject)]] +pointer_t copyObject([[vk::ext_reference]] T value); + +//! Std 450 Extended set operations +template +[[vk::ext_instruction(GLSLstd450MatrixInverse)]] +SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); + +// Add specializations if you need to emit a `ext_capability` (this means that the instruction needs to forward through an `impl::` struct and so on) +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpBitcast)]] +enable_if_t && is_spirv_type_v, T> bitcast(U); + +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpBitcast)]] +uint64_t bitcast(pointer_t); + +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpBitcast)]] +pointer_t bitcast(uint64_t); + +template +[[vk::ext_instruction(spv::OpBitcast)]] +T bitcast(U); + +//! Builtins +[[vk::ext_builtin_input(spv::BuiltInHelperInvocation)]] +static const bool HelperInvocation; +[[vk::ext_builtin_input(spv::BuiltInPosition)]] +static const float32_t4 Position; +[[vk::ext_builtin_input(spv::BuiltInVertexIndex)]] +static const uint32_t VertexIndex; +[[vk::ext_builtin_input(spv::BuiltInInstanceIndex)]] +static const uint32_t InstanceIndex; +[[vk::ext_builtin_input(spv::BuiltInNumWorkgroups)]] +static const uint32_t3 NumWorkgroups; +[[vk::ext_builtin_input(spv::BuiltInWorkgroupId)]] +static const uint32_t3 WorkgroupId; +[[vk::ext_builtin_input(spv::BuiltInLocalInvocationId)]] +static const uint32_t3 LocalInvocationId; +[[vk::ext_builtin_input(spv::BuiltInGlobalInvocationId)]] +static const uint32_t3 GlobalInvocationId; +[[vk::ext_builtin_input(spv::BuiltInLocalInvocationIndex)]] +static const uint32_t LocalInvocationIndex; +[[vk::ext_builtin_input(spv::BuiltInSubgroupEqMask)]] +static const uint32_t4 SubgroupEqMask; +[[vk::ext_builtin_input(spv::BuiltInSubgroupGeMask)]] +static const uint32_t4 SubgroupGeMask; +[[vk::ext_builtin_input(spv::BuiltInSubgroupGtMask)]] +static const uint32_t4 SubgroupGtMask; +[[vk::ext_builtin_input(spv::BuiltInSubgroupLeMask)]] +static const uint32_t4 SubgroupLeMask; +[[vk::ext_builtin_input(spv::BuiltInSubgroupLtMask)]] +static const uint32_t4 SubgroupLtMask; +[[vk::ext_builtin_input(spv::BuiltInSubgroupSize)]] +static const uint32_t SubgroupSize; +[[vk::ext_builtin_input(spv::BuiltInNumSubgroups)]] +static const uint32_t NumSubgroups; +[[vk::ext_builtin_input(spv::BuiltInSubgroupId)]] +static const uint32_t SubgroupId; +[[vk::ext_builtin_input(spv::BuiltInSubgroupLocalInvocationId)]] +static const uint32_t SubgroupLocalInvocationId; + +//! Execution Modes +namespace execution_mode +{ + void invocations() + { + vk::ext_execution_mode(spv::ExecutionModeInvocations); + } + + void spacingEqual() + { + vk::ext_execution_mode(spv::ExecutionModeSpacingEqual); + } + + void spacingFractionalEven() + { + vk::ext_execution_mode(spv::ExecutionModeSpacingFractionalEven); + } + + void spacingFractionalOdd() + { + vk::ext_execution_mode(spv::ExecutionModeSpacingFractionalOdd); + } + + void vertexOrderCw() + { + vk::ext_execution_mode(spv::ExecutionModeVertexOrderCw); + } + + void vertexOrderCcw() + { + vk::ext_execution_mode(spv::ExecutionModeVertexOrderCcw); + } + + void pixelCenterInteger() + { + vk::ext_execution_mode(spv::ExecutionModePixelCenterInteger); + } + + void originUpperLeft() + { + vk::ext_execution_mode(spv::ExecutionModeOriginUpperLeft); + } + + void originLowerLeft() + { + vk::ext_execution_mode(spv::ExecutionModeOriginLowerLeft); + } + + void earlyFragmentTests() + { + vk::ext_execution_mode(spv::ExecutionModeEarlyFragmentTests); + } + + void pointMode() + { + vk::ext_execution_mode(spv::ExecutionModePointMode); + } + + void xfb() + { + vk::ext_execution_mode(spv::ExecutionModeXfb); + } + + void depthReplacing() + { + vk::ext_execution_mode(spv::ExecutionModeDepthReplacing); + } + + void depthGreater() + { + vk::ext_execution_mode(spv::ExecutionModeDepthGreater); + } + + void depthLess() + { + vk::ext_execution_mode(spv::ExecutionModeDepthLess); + } + + void depthUnchanged() + { + vk::ext_execution_mode(spv::ExecutionModeDepthUnchanged); + } + + void localSize() + { + vk::ext_execution_mode(spv::ExecutionModeLocalSize); + } + + void localSizeHint() + { + vk::ext_execution_mode(spv::ExecutionModeLocalSizeHint); + } + + void inputPoints() + { + vk::ext_execution_mode(spv::ExecutionModeInputPoints); + } + + void inputLines() + { + vk::ext_execution_mode(spv::ExecutionModeInputLines); + } + + void inputLinesAdjacency() + { + vk::ext_execution_mode(spv::ExecutionModeInputLinesAdjacency); + } + + void triangles() + { + vk::ext_execution_mode(spv::ExecutionModeTriangles); + } + + void inputTrianglesAdjacency() + { + vk::ext_execution_mode(spv::ExecutionModeInputTrianglesAdjacency); + } + + void quads() + { + vk::ext_execution_mode(spv::ExecutionModeQuads); + } + + void isolines() + { + vk::ext_execution_mode(spv::ExecutionModeIsolines); + } + + void outputVertices() + { + vk::ext_execution_mode(spv::ExecutionModeOutputVertices); + } + + void outputPoints() + { + vk::ext_execution_mode(spv::ExecutionModeOutputPoints); + } + + void outputLineStrip() + { + vk::ext_execution_mode(spv::ExecutionModeOutputLineStrip); + } + + void outputTriangleStrip() + { + vk::ext_execution_mode(spv::ExecutionModeOutputTriangleStrip); + } + + void vecTypeHint() + { + vk::ext_execution_mode(spv::ExecutionModeVecTypeHint); + } + + void contractionOff() + { + vk::ext_execution_mode(spv::ExecutionModeContractionOff); + } + + void initializer() + { + vk::ext_execution_mode(spv::ExecutionModeInitializer); + } + + void finalizer() + { + vk::ext_execution_mode(spv::ExecutionModeFinalizer); + } + + void subgroupSize() + { + vk::ext_execution_mode(spv::ExecutionModeSubgroupSize); + } + + void subgroupsPerWorkgroup() + { + vk::ext_execution_mode(spv::ExecutionModeSubgroupsPerWorkgroup); + } + + void subgroupsPerWorkgroupId() + { + vk::ext_execution_mode(spv::ExecutionModeSubgroupsPerWorkgroupId); + } + + void localSizeId() + { + vk::ext_execution_mode(spv::ExecutionModeLocalSizeId); + } + + void localSizeHintId() + { + vk::ext_execution_mode(spv::ExecutionModeLocalSizeHintId); + } + + void nonCoherentColorAttachmentReadEXT() + { + vk::ext_execution_mode(spv::ExecutionModeNonCoherentColorAttachmentReadEXT); + } + + void nonCoherentDepthAttachmentReadEXT() + { + vk::ext_execution_mode(spv::ExecutionModeNonCoherentDepthAttachmentReadEXT); + } + + void nonCoherentStencilAttachmentReadEXT() + { + vk::ext_execution_mode(spv::ExecutionModeNonCoherentStencilAttachmentReadEXT); + } + + void subgroupUniformControlFlowKHR() + { + vk::ext_execution_mode(spv::ExecutionModeSubgroupUniformControlFlowKHR); + } + + void postDepthCoverage() + { + vk::ext_execution_mode(spv::ExecutionModePostDepthCoverage); + } + + void denormPreserve() + { + vk::ext_execution_mode(spv::ExecutionModeDenormPreserve); + } + + void denormFlushToZero() + { + vk::ext_execution_mode(spv::ExecutionModeDenormFlushToZero); + } + + void signedZeroInfNanPreserve() + { + vk::ext_execution_mode(spv::ExecutionModeSignedZeroInfNanPreserve); + } + + void roundingModeRTE() + { + vk::ext_execution_mode(spv::ExecutionModeRoundingModeRTE); + } + + void roundingModeRTZ() + { + vk::ext_execution_mode(spv::ExecutionModeRoundingModeRTZ); + } + + void earlyAndLateFragmentTestsAMD() + { + vk::ext_execution_mode(spv::ExecutionModeEarlyAndLateFragmentTestsAMD); + } + + void stencilRefReplacingEXT() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefReplacingEXT); + } + + void coalescingAMDX() + { + vk::ext_execution_mode(spv::ExecutionModeCoalescingAMDX); + } + + void maxNodeRecursionAMDX() + { + vk::ext_execution_mode(spv::ExecutionModeMaxNodeRecursionAMDX); + } + + void staticNumWorkgroupsAMDX() + { + vk::ext_execution_mode(spv::ExecutionModeStaticNumWorkgroupsAMDX); + } + + void shaderIndexAMDX() + { + vk::ext_execution_mode(spv::ExecutionModeShaderIndexAMDX); + } + + void maxNumWorkgroupsAMDX() + { + vk::ext_execution_mode(spv::ExecutionModeMaxNumWorkgroupsAMDX); + } + + void stencilRefUnchangedFrontAMD() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefUnchangedFrontAMD); + } + + void stencilRefGreaterFrontAMD() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefGreaterFrontAMD); + } + + void stencilRefLessFrontAMD() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefLessFrontAMD); + } + + void stencilRefUnchangedBackAMD() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefUnchangedBackAMD); + } + + void stencilRefGreaterBackAMD() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefGreaterBackAMD); + } + + void stencilRefLessBackAMD() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefLessBackAMD); + } + + void quadDerivativesKHR() + { + vk::ext_execution_mode(spv::ExecutionModeQuadDerivativesKHR); + } + + void requireFullQuadsKHR() + { + vk::ext_execution_mode(spv::ExecutionModeRequireFullQuadsKHR); + } + + void outputLinesEXT() + { + vk::ext_execution_mode(spv::ExecutionModeOutputLinesEXT); + } + + void outputLinesNV() + { + vk::ext_execution_mode(spv::ExecutionModeOutputLinesNV); + } + + void outputPrimitivesEXT() + { + vk::ext_execution_mode(spv::ExecutionModeOutputPrimitivesEXT); + } + + void outputPrimitivesNV() + { + vk::ext_execution_mode(spv::ExecutionModeOutputPrimitivesNV); + } + + void derivativeGroupQuadsNV() + { + vk::ext_execution_mode(spv::ExecutionModeDerivativeGroupQuadsNV); + } + + void derivativeGroupLinearNV() + { + vk::ext_execution_mode(spv::ExecutionModeDerivativeGroupLinearNV); + } + + void outputTrianglesEXT() + { + vk::ext_execution_mode(spv::ExecutionModeOutputTrianglesEXT); + } + + void outputTrianglesNV() + { + vk::ext_execution_mode(spv::ExecutionModeOutputTrianglesNV); + } + + void pixelInterlockOrderedEXT() + { + vk::ext_execution_mode(spv::ExecutionModePixelInterlockOrderedEXT); + } + + void pixelInterlockUnorderedEXT() + { + vk::ext_execution_mode(spv::ExecutionModePixelInterlockUnorderedEXT); + } + + void sampleInterlockOrderedEXT() + { + vk::ext_execution_mode(spv::ExecutionModeSampleInterlockOrderedEXT); + } + + void sampleInterlockUnorderedEXT() + { + vk::ext_execution_mode(spv::ExecutionModeSampleInterlockUnorderedEXT); + } + + void shadingRateInterlockOrderedEXT() + { + vk::ext_execution_mode(spv::ExecutionModeShadingRateInterlockOrderedEXT); + } + + void shadingRateInterlockUnorderedEXT() + { + vk::ext_execution_mode(spv::ExecutionModeShadingRateInterlockUnorderedEXT); + } + + void sharedLocalMemorySizeINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeSharedLocalMemorySizeINTEL); + } + + void roundingModeRTPINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeRoundingModeRTPINTEL); + } + + void roundingModeRTNINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeRoundingModeRTNINTEL); + } + + void floatingPointModeALTINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeFloatingPointModeALTINTEL); + } + + void floatingPointModeIEEEINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeFloatingPointModeIEEEINTEL); + } + + void maxWorkgroupSizeINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeMaxWorkgroupSizeINTEL); + } + + void maxWorkDimINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeMaxWorkDimINTEL); + } + + void noGlobalOffsetINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeNoGlobalOffsetINTEL); + } + + void numSIMDWorkitemsINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeNumSIMDWorkitemsINTEL); + } + + void schedulerTargetFmaxMhzINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeSchedulerTargetFmaxMhzINTEL); + } + + void maximallyReconvergesKHR() + { + vk::ext_execution_mode(spv::ExecutionModeMaximallyReconvergesKHR); + } + + void fPFastMathDefault() + { + vk::ext_execution_mode(spv::ExecutionModeFPFastMathDefault); + } + + void streamingInterfaceINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeStreamingInterfaceINTEL); + } + + void registerMapInterfaceINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeRegisterMapInterfaceINTEL); + } + + void namedBarrierCountINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeNamedBarrierCountINTEL); + } + + void maximumRegistersINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeMaximumRegistersINTEL); + } + + void maximumRegistersIdINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeMaximumRegistersIdINTEL); + } + + void namedMaximumRegistersINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeNamedMaximumRegistersINTEL); + } +} + +//! Group Operations +namespace group_operation +{ + static const uint32_t Reduce = 0; + static const uint32_t InclusiveScan = 1; + static const uint32_t ExclusiveScan = 2; + static const uint32_t ClusteredReduce = 3; + static const uint32_t PartitionedReduceNV = 6; + static const uint32_t PartitionedInclusiveScanNV = 7; + static const uint32_t PartitionedExclusiveScanNV = 8; +} + +//! Instructions +template +[[vk::ext_instruction(spv::OpLoad)]] +enable_if_t, T> load(P pointer, [[vk::ext_literal]] uint32_t memoryAccess); + +template +[[vk::ext_instruction(spv::OpLoad)]] +enable_if_t, T> load(P pointer, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); + +template +[[vk::ext_instruction(spv::OpLoad)]] +enable_if_t, T> load(P pointer, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpLoad)]] +enable_if_t, T> load(P pointer); + +template +[[vk::ext_instruction(spv::OpLoad)]] +T load(pointer_t pointer, [[vk::ext_literal]] uint32_t memoryAccess); + +template +[[vk::ext_instruction(spv::OpLoad)]] +T load(pointer_t pointer, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); + +template +[[vk::ext_instruction(spv::OpLoad)]] +T load(pointer_t pointer, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpLoad)]] +T load(pointer_t pointer); + +template +[[vk::ext_instruction(spv::OpStore)]] +enable_if_t, void> store(P pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess); + +template +[[vk::ext_instruction(spv::OpStore)]] +enable_if_t, void> store(P pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); + +template +[[vk::ext_instruction(spv::OpStore)]] +enable_if_t, void> store(P pointer, T object, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpStore)]] +enable_if_t, void> store(P pointer, T object); + +template +[[vk::ext_instruction(spv::OpStore)]] +void store(pointer_t pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess); + +template +[[vk::ext_instruction(spv::OpStore)]] +void store(pointer_t pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); + +template +[[vk::ext_instruction(spv::OpStore)]] +void store(pointer_t pointer, T object, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpStore)]] +void store(pointer_t pointer, T object); + +template +[[vk::ext_instruction(spv::OpGenericPtrMemSemantics)]] +enable_if_t, T> genericPtrMemSemantics(P pointer); + +template +[[vk::ext_instruction(spv::OpGenericPtrMemSemantics)]] +T genericPtrMemSemantics(pointer_t pointer); + +template +[[vk::ext_instruction(spv::OpBitFieldInsert)]] +enable_if_t<(is_signed_v || is_unsigned_v), T> bitFieldInsert(T base, T insert, uint32_t offset, uint32_t count); + +template +[[vk::ext_instruction(spv::OpBitFieldSExtract)]] +enable_if_t<(is_signed_v), T> bitFieldSExtract(T base, uint32_t offset, uint32_t count); + +template +[[vk::ext_instruction(spv::OpBitFieldUExtract)]] +enable_if_t<(is_unsigned_v), T> bitFieldUExtract(T base, uint32_t offset, uint32_t count); + +template +[[vk::ext_instruction(spv::OpControlBarrier)]] +void controlBarrier(uint32_t executionScope, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpMemoryBarrier)]] +void memoryBarrier(uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicLoad)]] +T atomicLoad([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicLoad)]] +enable_if_t, T> atomicLoad(P pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicStore)]] +void atomicStore([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicStore)]] +enable_if_t, void> atomicStore(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicExchange)]] +T atomicExchange([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicExchange)]] +enable_if_t, T> atomicExchange(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicCompareExchange)]] +T atomicCompareExchange([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t equal, uint32_t unequal, T value, T comparator); + +template +[[vk::ext_instruction(spv::OpAtomicCompareExchange)]] +enable_if_t, T> atomicCompareExchange(P pointer, uint32_t memoryScope, uint32_t equal, uint32_t unequal, T value, T comparator); + +template +[[vk::ext_instruction(spv::OpAtomicCompareExchangeWeak)]] +T atomicCompareExchangeWeak([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t equal, uint32_t unequal, T value, T comparator); + +template +[[vk::ext_instruction(spv::OpAtomicCompareExchangeWeak)]] +enable_if_t, T> atomicCompareExchangeWeak(P pointer, uint32_t memoryScope, uint32_t equal, uint32_t unequal, T value, T comparator); + +template +[[vk::ext_instruction(spv::OpAtomicIIncrement)]] +enable_if_t<(is_same_v || is_same_v), T> atomicIIncrement([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicIIncrement)]] +enable_if_t && (is_same_v || is_same_v), T> atomicIIncrement(P pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicIIncrement)]] +enable_if_t<(is_same_v || is_same_v), T> atomicIIncrement([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicIIncrement)]] +enable_if_t && (is_same_v || is_same_v), T> atomicIIncrement(P pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicIDecrement)]] +enable_if_t<(is_same_v || is_same_v), T> atomicIDecrement([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicIDecrement)]] +enable_if_t && (is_same_v || is_same_v), T> atomicIDecrement(P pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicIDecrement)]] +enable_if_t<(is_same_v || is_same_v), T> atomicIDecrement([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicIDecrement)]] +enable_if_t && (is_same_v || is_same_v), T> atomicIDecrement(P pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicIAdd)]] +enable_if_t<(is_same_v || is_same_v), T> atomicIAdd([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicIAdd)]] +enable_if_t && (is_same_v || is_same_v), T> atomicIAdd(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicIAdd)]] +enable_if_t<(is_same_v || is_same_v), T> atomicIAdd([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicIAdd)]] +enable_if_t && (is_same_v || is_same_v), T> atomicIAdd(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicISub)]] +enable_if_t<(is_same_v || is_same_v), T> atomicISub([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicISub)]] +enable_if_t && (is_same_v || is_same_v), T> atomicISub(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicISub)]] +enable_if_t<(is_same_v || is_same_v), T> atomicISub([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicISub)]] +enable_if_t && (is_same_v || is_same_v), T> atomicISub(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicSMin)]] +enable_if_t<(is_same_v), T> atomicSMin([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicSMin)]] +enable_if_t && (is_same_v), T> atomicSMin(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicUMin)]] +enable_if_t<(is_same_v), T> atomicUMin([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicUMin)]] +enable_if_t && (is_same_v), T> atomicUMin(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicSMax)]] +enable_if_t<(is_same_v), T> atomicSMax([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicSMax)]] +enable_if_t && (is_same_v), T> atomicSMax(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicUMax)]] +enable_if_t<(is_same_v), T> atomicUMax([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicUMax)]] +enable_if_t && (is_same_v), T> atomicUMax(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicAnd)]] +enable_if_t<(is_same_v || is_same_v), T> atomicAnd([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicAnd)]] +enable_if_t && (is_same_v || is_same_v), T> atomicAnd(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicAnd)]] +enable_if_t<(is_same_v || is_same_v), T> atomicAnd([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicAnd)]] +enable_if_t && (is_same_v || is_same_v), T> atomicAnd(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicOr)]] +enable_if_t<(is_same_v || is_same_v), T> atomicOr([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicOr)]] +enable_if_t && (is_same_v || is_same_v), T> atomicOr(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicOr)]] +enable_if_t<(is_same_v || is_same_v), T> atomicOr([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicOr)]] +enable_if_t && (is_same_v || is_same_v), T> atomicOr(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicXor)]] +enable_if_t<(is_same_v || is_same_v), T> atomicXor([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicXor)]] +enable_if_t && (is_same_v || is_same_v), T> atomicXor(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicXor)]] +enable_if_t<(is_same_v || is_same_v), T> atomicXor([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicXor)]] +enable_if_t && (is_same_v || is_same_v), T> atomicXor(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicFlagTestAndSet)]] +T atomicFlagTestAndSet([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicFlagTestAndSet)]] +enable_if_t, T> atomicFlagTestAndSet(P pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicFlagClear)]] +void atomicFlagClear([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicFlagClear)]] +enable_if_t, void> atomicFlagClear(P pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformElect)]] +T groupNonUniformElect(uint32_t executionScope); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformAll)]] +T groupNonUniformAll(uint32_t executionScope, bool predicate); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformAny)]] +T groupNonUniformAny(uint32_t executionScope, bool predicate); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformAllEqual)]] +T groupNonUniformAllEqual(uint32_t executionScope, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformBroadcast)]] +T groupNonUniformBroadcast(uint32_t executionScope, T value, uint32_t id); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformBroadcastFirst)]] +T groupNonUniformBroadcastFirst(uint32_t executionScope, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformBallot)]] +T groupNonUniformBallot(uint32_t executionScope, bool predicate); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformInverseBallot)]] +T groupNonUniformInverseBallot(uint32_t executionScope, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformBallotBitExtract)]] +T groupNonUniformBallotBitExtract(uint32_t executionScope, T value, uint32_t index); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformBallotBitCount)]] +T groupNonUniformBallotBitCount(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformBallotFindLSB)]] +T groupNonUniformBallotFindLSB(uint32_t executionScope, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformBallotFindMSB)]] +T groupNonUniformBallotFindMSB(uint32_t executionScope, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformShuffle)]] +T groupNonUniformShuffle(uint32_t executionScope, T value, uint32_t id); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformShuffleXor)]] +T groupNonUniformShuffleXor(uint32_t executionScope, T value, uint32_t mask); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformShuffleUp)]] +T groupNonUniformShuffleUp(uint32_t executionScope, T value, uint32_t delta); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformShuffleDown)]] +T groupNonUniformShuffleDown(uint32_t executionScope, T value, uint32_t delta); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformIAdd)]] +T groupNonUniformIAdd(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] +T groupNonUniformFAdd(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformIMul)]] +T groupNonUniformIMul(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] +T groupNonUniformFMul(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] +T groupNonUniformSMin(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] +T groupNonUniformUMin(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] +T groupNonUniformFMin(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] +T groupNonUniformSMax(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] +T groupNonUniformUMax(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] +T groupNonUniformFMax(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformBitwiseAnd)]] +T groupNonUniformBitwiseAnd(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformBitwiseOr)]] +T groupNonUniformBitwiseOr(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformBitwiseXor)]] +T groupNonUniformBitwiseXor(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformLogicalAnd)]] +T groupNonUniformLogicalAnd(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformLogicalOr)]] +T groupNonUniformLogicalOr(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformLogicalXor)]] +T groupNonUniformLogicalXor(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformQuadBroadcast)]] +T groupNonUniformQuadBroadcast(uint32_t executionScope, T value, uint32_t index); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformQuadAllKHR)]] +T groupNonUniformQuadAllKHR(bool predicate); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformQuadAnyKHR)]] +T groupNonUniformQuadAnyKHR(bool predicate); + +template +[[vk::ext_instruction(spv::OpGroupNonUniformPartitionNV)]] +T groupNonUniformPartitionNV(T value); + +template +[[vk::ext_instruction(spv::OpBeginInvocationInterlockEXT)]] +void beginInvocationInterlockEXT(); + +template +[[vk::ext_instruction(spv::OpEndInvocationInterlockEXT)]] +void endInvocationInterlockEXT(); + +template +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t<(is_same_v), T> atomicFMinEXT([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t && (is_same_v), T> atomicFMinEXT(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +enable_if_t<(is_same_v), T> atomicFMaxEXT([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +enable_if_t && (is_same_v), T> atomicFMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t<(is_same_v), T> atomicFAddEXT([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t && (is_same_v), T> atomicFAddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpControlBarrierArriveINTEL)]] +void controlBarrierArriveINTEL(uint32_t executionScope, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpControlBarrierWaitINTEL)]] +void controlBarrierWaitINTEL(uint32_t executionScope, uint32_t memoryScope, uint32_t semantics); + +} + +#endif + } +} + +#endif