From f3891307235ef3b4115677bbcd9e34fa99fb78ee Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Sun, 8 Sep 2019 17:58:22 -0400 Subject: [PATCH 01/19] Initial commit --- .gitignore | 3 + .gitmodules | 9 + CMakeLists.txt | 7 + external/CMakeLists.txt | 2 + external/SPIRV-Cross | 1 + external/fmt | 1 + external/usse-decoder-gen | 1 + generate-usse.py | 59 ++ src/CMakeLists.txt | 8 + src/gxp/CMakeLists.txt | 12 + src/gxp/include/gxp/builder.h | 73 ++ src/gxp/include/gxp/gxp.h | 219 +++++ src/gxp/include/gxp/instructions.h | 445 +++++++++ src/gxp/include/gxp/usse.h | 120 +++ src/gxp/src/builder.cpp | 478 +++++++++ src/gxp/src/gxp.cpp | 117 +++ src/gxp/src/instructions.cpp | 930 ++++++++++++++++++ src/gxp/src/usse.cpp | 381 +++++++ src/interface/CMakeLists.txt | 8 + src/interface/include/interface/interface.h | 18 + src/interface/src/interface.cpp | 50 + src/main.cpp | 5 + src/translator/CMakeLists.txt | 7 + .../include/translator/translator.h | 68 ++ src/translator/src/codes.cpp | 577 +++++++++++ src/translator/src/translator.cpp | 245 +++++ src/util/CMakeLists.txt | 6 + src/util/include/util/spirv.h | 5 + src/util/include/util/util.h | 16 + 29 files changed, 3871 insertions(+) create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 CMakeLists.txt create mode 100644 external/CMakeLists.txt create mode 160000 external/SPIRV-Cross create mode 160000 external/fmt create mode 160000 external/usse-decoder-gen create mode 100644 generate-usse.py create mode 100644 src/CMakeLists.txt create mode 100644 src/gxp/CMakeLists.txt create mode 100644 src/gxp/include/gxp/builder.h create mode 100644 src/gxp/include/gxp/gxp.h create mode 100644 src/gxp/include/gxp/instructions.h create mode 100644 src/gxp/include/gxp/usse.h create mode 100644 src/gxp/src/builder.cpp create mode 100644 src/gxp/src/gxp.cpp create mode 100644 src/gxp/src/instructions.cpp create mode 100644 src/gxp/src/usse.cpp create mode 100644 src/interface/CMakeLists.txt create mode 100644 src/interface/include/interface/interface.h create mode 100644 src/interface/src/interface.cpp create mode 100644 src/main.cpp create mode 100644 src/translator/CMakeLists.txt create mode 100644 src/translator/include/translator/translator.h create mode 100644 src/translator/src/codes.cpp create mode 100644 src/translator/src/translator.cpp create mode 100644 src/util/CMakeLists.txt create mode 100644 src/util/include/util/spirv.h create mode 100644 src/util/include/util/util.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..228eddf --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.DS_Store +.idea +cmake-build-debug diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..b222eaf --- /dev/null +++ b/.gitmodules @@ -0,0 +1,9 @@ +[submodule "external/fmt"] + path = external/fmt + url = https://github.com/fmtlib/fmt.git +[submodule "external/SPIRV-Cross"] + path = external/SPIRV-Cross + url = https://github.com/KhronosGroup/SPIRV-Cross.git +[submodule "external/usse-decoder-gen"] + path = external/usse-decoder-gen + url = https://github.com/Vita3K/usse-decoder-gen.git diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..135012d --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,7 @@ +cmake_minimum_required(VERSION 3.14) +project(psp2spvc) + +set(CMAKE_CXX_STANDARD 17) + +add_subdirectory(external) +add_subdirectory(src) diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt new file mode 100644 index 0000000..e9e1233 --- /dev/null +++ b/external/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(fmt) +add_subdirectory(SPIRV-Cross) diff --git a/external/SPIRV-Cross b/external/SPIRV-Cross new file mode 160000 index 0000000..4ce0448 --- /dev/null +++ b/external/SPIRV-Cross @@ -0,0 +1 @@ +Subproject commit 4ce04480ec5469fe7ebbdd66c3016090a704d81b diff --git a/external/fmt b/external/fmt new file mode 160000 index 0000000..2aae6b1 --- /dev/null +++ b/external/fmt @@ -0,0 +1 @@ +Subproject commit 2aae6b120c23dee785eeb7e7072516e8161d388d diff --git a/external/usse-decoder-gen b/external/usse-decoder-gen new file mode 160000 index 0000000..0618cd1 --- /dev/null +++ b/external/usse-decoder-gen @@ -0,0 +1 @@ +Subproject commit 0618cd11a7980b82b32c41ba9924c7c115dcd45c diff --git a/generate-usse.py b/generate-usse.py new file mode 100644 index 0000000..c86913f --- /dev/null +++ b/generate-usse.py @@ -0,0 +1,59 @@ +from yaml import load, Loader + +bit_types = """ + typedef uint64_t Instruction; + typedef uint64_t Param; +""" + +with open('external/usse-decoder-gen/grammar.yaml', 'r') as stream: + instructions = load(stream, Loader=Loader) + + header = '#pragma once\n\n#include \n\nnamespace usse {' + bit_types + '\n' + source = '#include \n\nnamespace usse {\n' + + for instruction_name, instruction in instructions.items(): + members = instruction['members'] + first = True + declaration = 'Instruction make' + instruction_name + '(' + parameters = '' + function = '\t\tInstruction inst = 0;\n' + index = 64 + for member in members: + member_name = list(member)[0] + member_info = member[member_name] + if type(member_info) is str: + index -= len(member_info) + function += '\t\tinst |= 0b' + member_info + 'ull << ' + str(index) + 'u;\n' + elif type(member_info) is int: + index -= member_info + if not first: + parameters += ',\n\t\t\t' + parameters += 'Param/*' + str(member_info) + '*/ ' + member_name + first = False + function += '\t\tinst |= ' + member_name + ' << ' + str(index) + 'u;\n' + else: + if 'offset' in member_info: + index = member_info['offset'] + else: + index -= member_info['size'] + if 'match' in member_info: + function += '\t\tinst |= 0b' + member_info['match'] + 'ull << ' + str(index) + 'u;\n' + else: + if not first: + parameters += ',\n\t\t\t' + parameters += 'Param/*' + str(member_info['size']) + '*/ ' + member_name + first = False + function += '\t\tinst |= ' + member_name + ' << ' + str(index) + 'u;\n' + + if parameters: + declaration += '\n\t\t\t' + parameters + declaration += ')' + header += '\t' + declaration + ';\n\n' + source += '\t' + declaration + ' {\n' + function + '\t\treturn inst;\n\t}\n\n' + + header += '}' + source += '}' + with open('instructions.h', 'w+') as header_out: + header_out.write(header) + with open('instructions.cpp', 'w+') as source_out: + source_out.write(source) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..80dbe73 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,8 @@ +add_subdirectory(util) +add_subdirectory(gxp) +add_subdirectory(translator) +add_subdirectory(interface) + +add_executable(psp2spvc + main.cpp) +target_link_libraries(psp2spvc util interface) diff --git a/src/gxp/CMakeLists.txt b/src/gxp/CMakeLists.txt new file mode 100644 index 0000000..ee2a46f --- /dev/null +++ b/src/gxp/CMakeLists.txt @@ -0,0 +1,12 @@ +add_library(gxp + include/gxp/usse.h + include/gxp/instructions.h + include/gxp/gxp.h + include/gxp/builder.h + + src/usse.cpp + src/instructions.cpp + src/gxp.cpp + src/builder.cpp) +target_include_directories(gxp PUBLIC include) +target_link_libraries(gxp PUBLIC util) diff --git a/src/gxp/include/gxp/builder.h b/src/gxp/include/gxp/builder.h new file mode 100644 index 0000000..d845e70 --- /dev/null +++ b/src/gxp/include/gxp/builder.h @@ -0,0 +1,73 @@ +#pragma once + +#include + +#include +#include + +namespace gxp { + typedef uint64_t Instruction; + + class Builder; + + enum class ShaderType : uint8_t { + Vertex = 0, + Fragment = 1, + }; + + class Block { + std::vector instructions; + + explicit Block(Builder &parent); + friend class gxp::Builder; + public: + Builder &parent; + + void createNop(); + void createMov(usse::RegisterReference source, usse::RegisterReference destination); + void createPack(usse::RegisterReference source, usse::RegisterReference destination); + }; + + class Parameter { + public: + std::string name; + ParameterCategory category = ParameterCategory::Uniform; + ParameterSemantic semantic = ParameterSemantic::None; + + usse::DataType type; + + // Will be filled out by Builder. + uint32_t resourceIndex = 0; + uint32_t containerIndex = 0; + + usse::RegisterBank getBank(); + }; + + class Builder { + ProgramHeader header; + ProgramVaryings varyings; + + uint32_t paRegPointer = 0; + uint32_t saRegPointer = 0; + uint32_t oRegPointer = 0; + + std::vector primaryBlocks; + std::vector secondaryBlocks; + std::vector parameters; + public: + void setType(ShaderType type); + + Block *createPrimaryBlock(); + Block *createSecondaryBlock(); + + usse::RegisterReference allocateRegister(usse::RegisterBank bank, usse::DataType type); + + usse::RegisterReference registerParameter(const Parameter ¶meter); + std::unordered_map registerVaryings( + const std::vector &outputs, const std::vector &texCoords); + + std::vector build(); + + Builder(); + }; +} diff --git a/src/gxp/include/gxp/gxp.h b/src/gxp/include/gxp/gxp.h new file mode 100644 index 0000000..d30f492 --- /dev/null +++ b/src/gxp/include/gxp/gxp.h @@ -0,0 +1,219 @@ +#pragma once + +#include + +#include + +namespace gxp { + // GXP\0 + constexpr uint32_t gxpMagic = 0x00505847; + + enum class ParameterCategory : uint8_t{ + Attribute, + Uniform, + Sampler, + AuxiliarySurface, + UniformBuffer, + }; + + enum class ParameterType : uint8_t { + Float32, + Float16, + Fixed10, + Unsigned32, + Signed32, + Unsigned16, + Signed16, + Unsigned8, + Signed8, + Aggregate, + }; + + ParameterType getParameterTypeFromUSSEType(usse::Type type); + uint32_t getParameterTypeSize(ParameterType type); + + enum class ParameterSemantic : uint16_t { + None, + Attr, + Bcol, + BiNormal, + BlendIndices, + BlendWeight, + Color, + Diffuse, + FogCoord, + Normal, + PointSize, + Position, + Specular, + Tangent, + TexCoord, + }; + + enum class ProgramVaryingVertexBits : uint32_t { + Clip0 = 0x0001, + Clip1 = 0x0002, + Clip2 = 0x0004, + Clip3 = 0x0008, + Clip4 = 0x0010, + Clip5 = 0x0020, + Clip6 = 0x0040, + Clip7 = 0x0080, + PointSize = 0x100, + Fog = 0x0200, + Color1 = 0x0400, + Color0 = 0x0800, + }; + + enum class ProgramTexCoordMasks : uint32_t { + TexCoord0 = 0b111u << (3u * 0u), + TexCoord1 = 0b111u << (3u * 1u), + TexCoord2 = 0b111u << (3u * 2u), + TexCoord3 = 0b111u << (3u * 3u), + TexCoord4 = 0b111u << (3u * 4u), + TexCoord5 = 0b111u << (3u * 5u), + TexCoord6 = 0b111u << (3u * 6u), + TexCoord7 = 0b111u << (3u * 7u), + TexCoord8 = 0b111u << (3u * 8u), + TexCoord9 = 0b111u << (3u * 9u), + }; + + enum class ProgramVarying { + None, + Position, + Fog, + Color0, + Color1, + TexCoord0, + TexCoord1, + TexCoord2, + TexCoord3, + TexCoord4, + TexCoord5, + TexCoord6, + TexCoord7, + TexCoord8, + TexCoord9, + PointSize, + Clip0, + Clip1, + Clip2, + Clip3, + Clip4, + Clip5, + Clip6, + Clip7, + }; + + bool iClipVarying(ProgramVarying varying); + bool isTexCoordVarying(ProgramVarying varying); + uint32_t getVaryingBits(ProgramVarying varying); + + class ProgramTexCoordInfo { + public: + ProgramVarying varying; + uint32_t componentCount; + }; + + class ParameterConfig { + uint16_t config = 0; + public: + void setCategory(ParameterCategory category); + void setType(ParameterType type); + void setComponentCount(uint32_t componentCount); + void setContainerIndex(uint32_t containerIndex); + + ParameterCategory getCategory(); + ParameterType getType(); + uint32_t getComponentCount(); + uint32_t getContainerIndex(); + }; + + class ProgramParameterInfo { + public: + int32_t nameOffset = 0; + ParameterConfig config; + uint16_t semantic = 0; + uint32_t arraySize = 0; + uint32_t resourceIndex = 0; + }; + + class ProgramVaryings { + public: + std::uint8_t unk0[10] = { }; + std::uint8_t output_param_type = 0; + std::uint8_t output_comp_count = 0; + + std::uint16_t varyings_count = 0; + std::uint16_t pad0 = 0; + std::uint32_t vertex_outputs1 = 0; + std::uint32_t vertex_outputs2 = 0; + }; + + // Vita3K's SceGxmProgram structure + class ProgramHeader { + public: + uint32_t magic = 0; + + uint8_t majorVersion = 0; + uint8_t minorVersion = 0; + uint16_t unk6 = 0; + + uint32_t size = 0; + uint32_t unkC = 0; + + uint16_t unk10 = 0; + uint8_t unk12 = 0; + uint8_t unk13 = 0; + + uint8_t type = 0; + uint8_t unk15 = 0; + uint8_t unk16 = 0; + uint8_t unk17 = 0; + + uint32_t unk18 = 0; + uint32_t unk1C = 0; + uint32_t unk20 = 0; + + uint32_t parameterCount = 0; + uint32_t parametersOffset = 0; + uint32_t varyingsOffset = 0; + + uint16_t primaryRegCount = 0; + uint16_t secondaryRegCount = 0; + uint16_t tempRegCount1 = 0; + uint16_t unk36 = 0; + uint16_t tempRegCount2 = 0; + uint16_t unk3A = 0; + + uint32_t primaryProgramInstructionCount = 0; + uint32_t primaryProgramOffset = 0; + + uint32_t unk44 = 0; + + uint32_t secondaryProgramOffset = 0; + uint32_t secondaryProgramOffsetEnd = 0; + + uint32_t unk50 = 0; + uint32_t unk54 = 0; + uint32_t unk58 = 0; + uint32_t unk5C = 0; + + uint32_t unk60 = 0; + uint32_t defaultUniformBufferCount = 0; + uint32_t unk68 = 0; + uint32_t unk6C = 0; + + uint32_t literalsCount = 0; + uint32_t literalsOffset = 0; + uint32_t unk78 = 0; + uint32_t alternativeParametersOffset = 0; + + uint32_t dependentSamplerCount = 0; + uint32_t dependentSamplerOffset = 0; + uint32_t unk88 = 0; + uint32_t unk8C = 0; + uint32_t containerCount = 0; + uint32_t containerOffset = 0; + }; +} diff --git a/src/gxp/include/gxp/instructions.h b/src/gxp/include/gxp/instructions.h new file mode 100644 index 0000000..c4d0e51 --- /dev/null +++ b/src/gxp/include/gxp/instructions.h @@ -0,0 +1,445 @@ +#pragma once + +#include + +namespace usse { + typedef uint64_t Instruction; + typedef uint64_t Param; + + Instruction makeVMOV( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ test_bit_2, + Param/*1*/ src0_comp_sel, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end_or_src0_bank_ext, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*2*/ move_type, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*3*/ move_data_type, + Param/*1*/ test_bit_1, + Param/*4*/ src0_swiz, + Param/*1*/ src0_bank_sel, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*4*/ dest_mask, + Param/*6*/ dest_n, + Param/*6*/ src0_n, + Param/*6*/ src1_n, + Param/*6*/ src2_n); + + Instruction makeVMAD( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ gpi1_swiz_ext, + Param/*1*/ opcode2, + Param/*1*/ dest_use_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*2*/ increment_mode, + Param/*1*/ gpi0_abs, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*4*/ write_mask, + Param/*1*/ src1_neg, + Param/*1*/ src1_abs, + Param/*1*/ gpi1_neg, + Param/*1*/ gpi1_abs, + Param/*1*/ gpi0_swiz_ext, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ gpi0_n, + Param/*6*/ dest_n, + Param/*4*/ gpi0_swiz, + Param/*4*/ gpi1_swiz, + Param/*2*/ gpi1_n, + Param/*1*/ gpi0_neg, + Param/*1*/ src1_swiz_ext, + Param/*4*/ src1_swiz, + Param/*6*/ src1_n); + + Instruction makeVMAD2( + Param/*1*/ dat_fmt, + Param/*2*/ pred, + Param/*1*/ skipinv, + Param/*1*/ src0_swiz_bits2, + Param/*1*/ syncstart, + Param/*1*/ src0_abs, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*3*/ src2_swiz, + Param/*1*/ src1_swiz_bit2, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*2*/ src2_mod, + Param/*1*/ src0_bank, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ dest_n, + Param/*2*/ src1_swiz_bits01, + Param/*2*/ src0_swiz_bits01, + Param/*6*/ src0_n, + Param/*6*/ src1_n, + Param/*6*/ src2_n); + + Instruction makeVDP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ clip_plane_enable, + Param/*1*/ opcode2, + Param/*1*/ dest_use_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*2*/ increment_mode, + Param/*1*/ gpi0_abs, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*4*/ write_mask, + Param/*1*/ src1_neg, + Param/*1*/ src1_abs, + Param/*3*/ clip_plane_n, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ gpi0_n, + Param/*6*/ dest_n, + Param/*4*/ gpi0_swiz, + Param/*3*/ src1_swiz_w, + Param/*3*/ src1_swiz_z, + Param/*3*/ src1_swiz_y, + Param/*3*/ src1_swiz_x, + Param/*6*/ src1_n); + + Instruction makeVNMAD32( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ src1_swiz_10_11, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ src1_swiz_9, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ src2_swiz, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*1*/ src2_mod, + Param/*2*/ src1_swiz_7_8, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*6*/ dest_n, + Param/*7*/ src1_swiz_0_6, + Param/*3*/ op2, + Param/*6*/ src1_n, + Param/*6*/ src2_n); + + Instruction makeVNMAD16( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ src1_swiz_10_11, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ src1_swiz_9, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ src2_swiz, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*1*/ src2_mod, + Param/*2*/ src1_swiz_7_8, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*6*/ dest_n, + Param/*7*/ src1_swiz_0_6, + Param/*3*/ op2, + Param/*6*/ src1_n, + Param/*6*/ src2_n); + + Instruction makeVLDST( + Param/*2*/ op1, + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ moe_expand, + Param/*1*/ sync_start, + Param/*1*/ cache_ext, + Param/*1*/ src0_bank_ext, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ mask_count, + Param/*2*/ addr_mode, + Param/*2*/ mode, + Param/*1*/ dest_bank_primattr, + Param/*1*/ range_enable, + Param/*2*/ data_type, + Param/*1*/ increment_or_decrement, + Param/*1*/ src0_bank, + Param/*1*/ cache_by_pass12, + Param/*1*/ drc_sel, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n); + + Instruction makeVTST( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ onceonly, + Param/*1*/ syncstart, + Param/*1*/ dest_ext, + Param/*1*/ src1_neg, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*1*/ prec, + Param/*1*/ src2_vscomp, + Param/*2*/ rpt_count, + Param/*2*/ sign_test, + Param/*2*/ zero_test, + Param/*1*/ test_crcomb_and, + Param/*3*/ chan_cc, + Param/*2*/ pdst_n, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ test_wben, + Param/*2*/ alu_sel, + Param/*4*/ alu_op, + Param/*7*/ src1_n, + Param/*7*/ src2_n); + + Instruction makeVTSTMSK( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ onceonly, + Param/*1*/ syncstart, + Param/*1*/ dest_ext, + Param/*1*/ test_flag_2, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*1*/ prec, + Param/*1*/ src2_vscomp, + Param/*2*/ rpt_count, + Param/*2*/ sign_test, + Param/*2*/ zero_test, + Param/*1*/ test_crcomb_and, + Param/*2*/ tst_mask_type, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ test_wben, + Param/*2*/ alu_sel, + Param/*4*/ alu_op, + Param/*7*/ src1_n, + Param/*7*/ src2_n); + + Instruction makeVPCK( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ unknown, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*3*/ repeat_count, + Param/*3*/ src_fmt, + Param/*3*/ dest_fmt, + Param/*4*/ dest_mask, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*7*/ dest_n, + Param/*2*/ comp_sel_3, + Param/*1*/ scale, + Param/*2*/ comp_sel_1, + Param/*2*/ comp_sel_2, + Param/*6*/ src1_n, + Param/*1*/ comp0_sel_bit1, + Param/*6*/ src2_n, + Param/*1*/ comp_sel_0_bit0); + + Instruction makeVBW( + Param/*3*/ op1, + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ repeat_count, + Param/*1*/ sync_start, + Param/*1*/ dest_ext, + Param/*1*/ end, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*4*/ mask_count, + Param/*1*/ src2_invert, + Param/*5*/ src2_rot, + Param/*2*/ src2_exth, + Param/*1*/ op2, + Param/*1*/ bitwise_partial, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src2_sel, + Param/*7*/ src1_n, + Param/*7*/ src2_n); + + Instruction makeSMP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ syncstart, + Param/*1*/ minpack, + Param/*1*/ src0_ext, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*2*/ fconv_type, + Param/*2*/ mask_count, + Param/*2*/ dim, + Param/*2*/ lod_mode, + Param/*1*/ dest_use_pa, + Param/*2*/ sb_mode, + Param/*2*/ src0_type, + Param/*1*/ src0_bank, + Param/*2*/ drc_sel, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n); + + Instruction makeVCOMP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ dest_type, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*4*/ repeat_count, + Param/*1*/ nosched, + Param/*2*/ op2, + Param/*2*/ src_type, + Param/*2*/ src1_mod, + Param/*2*/ src_comp, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*7*/ dest_n, + Param/*7*/ src1_n, + Param/*4*/ write_mask); + + Instruction makeSOP2( + Param/*2*/ pred, + Param/*1*/ cmod1, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*2*/ asel1, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*1*/ cmod2, + Param/*3*/ count, + Param/*1*/ amod1, + Param/*2*/ asel2, + Param/*3*/ csel1, + Param/*3*/ csel2, + Param/*1*/ amod2, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ src1_mod, + Param/*2*/ cop, + Param/*2*/ aop, + Param/*1*/ asrc1_mod, + Param/*1*/ dest_mod, + Param/*7*/ src1_n, + Param/*7*/ src2_n); + + Instruction makeBR( + Param/*3*/ pred, + Param/*1*/ syncend, + Param/*1*/ exception, + Param/*1*/ pwait, + Param/*1*/ sync_ext, + Param/*1*/ nosched, + Param/*1*/ br_monitor, + Param/*1*/ save_link, + Param/*1*/ br_type, + Param/*1*/ any_inst, + Param/*1*/ all_inst, + Param/*20*/ br_off); + + Instruction makePHAS( + Param/*1*/ sprvv, + Param/*1*/ end, + Param/*1*/ imm, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*1*/ mode, + Param/*1*/ rate_hi, + Param/*1*/ rate_lo_or_nosched, + Param/*3*/ wait_cond, + Param/*8*/ temp_count, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ exe_addr_high, + Param/*7*/ src1_n_or_exe_addr_mid, + Param/*7*/ src2_n_or_exe_addr_low); + + Instruction makeNOP(); + + Instruction makeSMLSI( + Param/*1*/ nosched, + Param/*4*/ temp_limit, + Param/*4*/ pa_limit, + Param/*4*/ sa_limit, + Param/*1*/ dest_inc_mode, + Param/*1*/ src0_inc_mode, + Param/*1*/ src1_inc_mode, + Param/*1*/ src2_inc_mode, + Param/*8*/ dest_inc, + Param/*8*/ src0_inc, + Param/*8*/ src1_inc, + Param/*8*/ src2_inc); + + Instruction makeEMIT( + Param/*2*/ sideband_high, + Param/*1*/ src0_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*2*/ target, + Param/*1*/ task_start_or_mte_hi, + Param/*1*/ task_end_or_mte_lo, + Param/*1*/ nosched, + Param/*6*/ sideband_mid, + Param/*1*/ src0_bank, + Param/*2*/ incp, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ sideband_low, + Param/*1*/ freep, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n); + + Instruction makeSPEC( + Param/*1*/ special, + Param/*2*/ category); + +} \ No newline at end of file diff --git a/src/gxp/include/gxp/usse.h b/src/gxp/include/gxp/usse.h new file mode 100644 index 0000000..05caa18 --- /dev/null +++ b/src/gxp/include/gxp/usse.h @@ -0,0 +1,120 @@ +#pragma once + +#include +#include +#include +#include + +namespace usse { + + enum class RegisterBank { + Temp, + Primary, + Output, + Secondary, + FloatInternal, + Special, + Global, + FloatConstant, + Immediate, + Index, + Indexed1, + Indexed2, + Predicate, + + Invalid, + }; + + enum class SwizzleChannel : uint8_t { + X, + Y, + Z, + W, + Zero, + One, + Two, + Half, + + DontCare, + }; + + enum class Type : uint8_t { + Signed8, + Signed16, + Signed32, + Fixed10, + Float16, + Float32, + Unsigned8, + Unsigned16, + Unsigned32, + Output8, + }; + + typedef std::array SwizzleVec3; + typedef std::array SwizzleVec4; + + class DataType { + public: + Type type = usse::Type::Float32; + uint32_t components = 1; + uint32_t arraySize = 1; + }; + + class RegisterReference { + public: + DataType type; + RegisterBank bank = RegisterBank::Invalid; + uint32_t index = 0; + uint32_t size = 1; + bool lockSwizzle = false; + std::vector swizzle; + + RegisterReference operator+(uint32_t value); + uint32_t getSwizzleMask(); + RegisterReference getComponents(uint32_t component, uint32_t count); + + RegisterReference() = default; + RegisterReference(DataType type, RegisterBank bank, uint32_t index, uint32_t size); + }; + + class BankLayout { + public: + usse::RegisterBank bank = usse::RegisterBank::Invalid; + + uint8_t extension = 0; + uint8_t number = 0; + + bool needsDiv(); + + static BankLayout destLayout(RegisterBank bank); + static BankLayout src0Layout(RegisterBank bank); + static BankLayout srcLayout(RegisterBank bank); + }; + + std::string getTypeName(Type type); + uint32_t getTypeSize(Type type); + std::string getBankName(RegisterBank bank); + + bool areSwizzlesInMatchingHalf(SwizzleChannel x, SwizzleChannel y); + int32_t getFPConstantIndex(float constant); + int32_t getSwizzleScalarIndex(SwizzleChannel element); + int32_t getSwizzleVec3Index(SwizzleVec3 elements, bool extended = false); + int32_t getSwizzleVec4Index(SwizzleVec4 elements, bool extended = false); + + inline SwizzleVec3 getSwizzleVec3All(SwizzleChannel channel) { + return { channel, channel, channel }; + } + + inline SwizzleVec4 getSwizzleVec4All(SwizzleChannel channel) { + return { channel, channel, channel, channel }; + } + + inline SwizzleVec3 getSwizzleVec3Default() { + return { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Z }; + } + + inline SwizzleVec4 getwizzleVec4Default() { + return { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::W }; + } +} diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp new file mode 100644 index 0000000..c79c6b7 --- /dev/null +++ b/src/gxp/src/builder.cpp @@ -0,0 +1,478 @@ +#include + +#include +#include + +#include + +#define OFFSET_OF(parent, child) (reinterpret_cast(&parent.child) - reinterpret_cast(&parent)) + +namespace gxp { + class MovRegisterData { + public: + uint8_t destMask = 0; + int32_t swizzleIndex = 0; + usse::RegisterReference source; + usse::RegisterReference destination; + }; + + std::vector splitRegisterF32(usse::RegisterReference source, usse::RegisterReference destination) { + std::vector data; + + uint32_t destMask = destination.getSwizzleMask(); +// usse::SwizzleVec4 swizzleDescription = usse::getSwizzleVec4DontCare(); +// uint32_t swizzleIndex = 0; +// for (uint32_t a = 0; a < 4; a++) { +// if (destMask & (1u << a)) { +// if (swizzleIndex >= source.swizzle.size()) +// throw std::runtime_error("Swizzle out of bounds."); +// swizzleDescription[a] = source.swizzle[swizzleIndex++]; +// } +// } + + for (uint32_t a = 0; a < 2; a++) { + uint8_t mask = (destMask & (0b11u << (a * 2))) >> (a * 2); + + usse::RegisterReference sourceHalf = source.getComponents(a * 2, 2); + usse::RegisterReference destinationHalf = destination.getComponents(a * 2, 2); + + if (mask & 0b01u) { + if (mask & 0b10u) { + if (usse::areSwizzlesInMatchingHalf(sourceHalf.swizzle[0], sourceHalf.swizzle[1])) { + data.push_back({ + mask, + usse::getSwizzleVec4Index({ + sourceHalf.swizzle[0], + sourceHalf.swizzle[1], + usse::SwizzleChannel::DontCare, + usse::SwizzleChannel::DontCare, + }), + source.getComponents(a * 2, 2), // is this source index right? + destination.getComponents(a * 2, 2) + }); + } else { + // X and Y swizzle seperately + data.push_back({ + 0b01, + usse::getSwizzleVec4Index({ + sourceHalf.swizzle[0], + usse::SwizzleChannel::DontCare, + usse::SwizzleChannel::DontCare, + usse::SwizzleChannel::DontCare, + }), + source.getComponents(a * 2, 2), // copy both components anyway + destination.getComponents(a * 2, 2) + }); + data.push_back({ + 0b10, + usse::getSwizzleVec4Index({ + usse::SwizzleChannel::DontCare, + sourceHalf.swizzle[1], + usse::SwizzleChannel::DontCare, + usse::SwizzleChannel::DontCare, + }), + source.getComponents(a * 2, 2), // copy both components anyway + destination.getComponents(a * 2, 2) + }); + } + } else { + data.push_back({ + 0b01, + usse::getSwizzleVec4Index({ + sourceHalf.swizzle[0], + usse::SwizzleChannel::DontCare, + usse::SwizzleChannel::DontCare, + usse::SwizzleChannel::DontCare, + }), + source.getComponents(a * 2, 2), // copy both components anyway + destination.getComponents(a * 2, 2) + }); + } + } else if (mask & 0b10u) { + data.push_back({ + 0b10, + usse::getSwizzleVec4Index({ + usse::SwizzleChannel::DontCare, + sourceHalf.swizzle[1], + usse::SwizzleChannel::DontCare, + usse::SwizzleChannel::DontCare, + }), + source.getComponents(a * 2, 2), // copy both components anyway + destination.getComponents(a * 2, 2) + }); + } + } + + return data; + } + + void Builder::setType(gxp::ShaderType type) { + header.type = static_cast(type); + } + + void Block::createNop() { + instructions.push_back(usse::makeNOP()); + } + + void Block::createMov(usse::RegisterReference source, usse::RegisterReference destination) { + usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + const auto movs = splitRegisterF32(source, destination); + + for (const auto &mov : movs) { + instructions.push_back(usse::makeVMOV( + 0, // pred + 0, // skipinv + 0, // test_bit_2 + 0, // src0_comp_sel + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + 0, // end_or_src0_bank_ext + srcBankLayout.extension, // src1_bank_ext + 0, // src2_bank_ext + 0, // move_type + 0, // repeat_count + 0, // nosched + static_cast(destination.type.type) & 0b111u, // move_data_type + 0, // test_bit_1 + mov.swizzleIndex, // src0_swiz + 0, // src0_bank_sel + destBankLayout.number, // dest_bank_sel + srcBankLayout.number, // src1_bank_sel + 0, // src2_bank_sel + mov.destMask, // dest_mask + mov.destination.index / (destBankLayout.needsDiv() ? 2 : 1), // dest_n + 0, // src0_n + mov.source.index / (srcBankLayout.needsDiv() ? 2 : 1), // src1_n + 0 // src2_n + )); + } + } + + void Block::createPack(usse::RegisterReference source, usse::RegisterReference destination) { + usse::BankLayout srcBankLayout = usse::BankLayout::destLayout(source.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + if (destination.type.type == usse::Type::Signed32 || destination.type.type == usse::Type::Unsigned32 + || source.type.type == usse::Type::Signed32 || source.type.type == usse::Type::Unsigned32) + throw std::runtime_error("Cannot pack S32/U32 type."); + + usse::Param typeTable[] = { + 1, // Signed8 + 4, // Signed16 + 0, // Signed32 - Unsupported + 7, // Fixed10 + 5, // Float16 + 6, // Float32 + 0, // Unsigned8 + 3, // Unsigned16 + 0, // Unsigned32 - Unsupported + 2, // Output8 + }; + + instructions.push_back(usse::makeVPCK( + 0, // pred + 0, // skipinv + 0, // nosched + 0, // unknown + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + 0, // end + srcBankLayout.extension, // src1_bank_ext + srcBankLayout.extension, // src2_bank_ext + 0, // repeat_count + typeTable[static_cast(source.type.type)], // src_fmt + typeTable[static_cast(destination.type.type)], // dest_fmt + destination.getSwizzleMask(), // dest_mask + destBankLayout.number, // dest_bank_sel + srcBankLayout.number, // src1_bank_sel + srcBankLayout.number, // src2_bank_sel + destination.index, // dest_n + static_cast(destination.swizzle[3]) & 0b11u, // comp_sel_3 + 1, // scale + static_cast(destination.swizzle[1]) & 0b11u, // comp_sel_1 + static_cast(destination.swizzle[2]) & 0b11u, // comp_sel_2 + source.index, // src1_n + static_cast(destination.swizzle[0]) & 0b10u >> 1u, // comp0_sel_bit1 + source.index + source.size / 2, // src2_n + static_cast(destination.swizzle[3]) & 0b01u // comp_sel_0_bit0 + )); + } + + Block::Block(gxp::Builder &parent) : parent(parent) { } + + usse::RegisterBank Parameter::getBank() { + switch (category) { + case ParameterCategory::Attribute: + return usse::RegisterBank::Primary; + case ParameterCategory::Uniform: + return usse::RegisterBank::Secondary; + default: + return usse::RegisterBank::Invalid; + } + } + + Block *Builder::createPrimaryBlock() { + size_t index = primaryBlocks.size(); + primaryBlocks.push_back(Block(*this)); + + return &primaryBlocks[index]; + } + + Block *Builder::createSecondaryBlock() { + size_t index = secondaryBlocks.size(); + secondaryBlocks.push_back(Block(*this)); + + return &secondaryBlocks[index]; + } + + usse::RegisterReference Builder::allocateRegister(usse::RegisterBank bank, usse::DataType type) { + uint32_t index = 0; + uint32_t size = usse::getTypeSize(type.type) * type.components / 4; + + switch (bank) { + case usse::RegisterBank::Primary: + index = paRegPointer; + paRegPointer += size; + break; + case usse::RegisterBank::Secondary: + index = saRegPointer; + saRegPointer += size; + break; + case usse::RegisterBank::Output: + index = oRegPointer; + oRegPointer += size; + break; + default: + throw std::runtime_error("Missing allocation method for bank."); + } + +// fmt::print("Allocating {} registers of type {} (vec{}[{}]), size {} at index {}.\n", +// usse::getBankName(bank), usse::getTypeName(type.type), type.components, type.arraySize, reg.size, reg.index); + + return usse::RegisterReference(type, bank, index, size); + } + + usse::RegisterReference Builder::registerParameter(const Parameter ¶meter) { + size_t index = parameters.size(); + parameters.push_back(parameter); + + usse::RegisterReference reg = allocateRegister(parameters[index].getBank(), parameter.type); + + return reg; + } + + std::unordered_map Builder::registerVaryings( + const std::vector &outputs, const std::vector &texCoords) { + // TODO: This is only for vertex. Either make this method work for fragment or make another method for fragment. + varyings.varyings_count = outputs.size() + texCoords.size(); + + varyings.vertex_outputs1 = 0; + varyings.vertex_outputs2 = 0; + + for (ProgramVarying output : outputs) { + if (output == ProgramVarying::Position) continue; + if (isTexCoordVarying(output)) + throw std::runtime_error("TexCoord passed as regular output to createVaryings."); + + varyings.vertex_outputs1 |= getVaryingBits(output); + } + + for (ProgramTexCoordInfo texCoord : texCoords) { + if (!isTexCoordVarying(texCoord.varying)) + throw std::runtime_error("Non-TexCoord passed as TexCoord to createVaryings."); + + uint32_t texCoordIndex = static_cast(texCoord.varying) + - static_cast(ProgramVarying::TexCoord0); + + uint32_t texCoordBits = 0; + texCoordBits |= (texCoord.componentCount - 1) & 0b11u; + + varyings.vertex_outputs2 |= texCoordBits << (texCoordIndex * 3u); + } + + std::unordered_map references; + + for (auto a = static_cast(ProgramVarying::Position); + a < static_cast(ProgramVarying::TexCoord0); a++) { + auto varying = static_cast(a); + + if (varying == ProgramVarying::Position || varyings.vertex_outputs1 & getVaryingBits(varying)) { + references[varying] = allocateRegister(usse::RegisterBank::Output, + { usse::Type::Float32, 4, 1 }); + + oRegPointer += 4; + } + } + + for (auto a = static_cast(ProgramVarying::TexCoord0); + a <= static_cast(ProgramVarying::TexCoord9); a++) { + auto varying = static_cast(a); + + auto texCoordInfo = std::find_if(texCoords.begin(), texCoords.end(), + [varying](const ProgramTexCoordInfo &info) { return info.varying == varying; }); + + if (texCoordInfo != texCoords.end()) { + references[varying] = allocateRegister(usse::RegisterBank::Output, + { usse::Type::Float32, texCoordInfo->componentCount, 1 }); + + oRegPointer += texCoordInfo->componentCount; + } + } + + for (auto a = static_cast(ProgramVarying::PointSize); + a <= static_cast(ProgramVarying::Clip7); a++) { + auto varying = static_cast(a); + + if (varyings.vertex_outputs1 & getVaryingBits(varying)) { + uint32_t varyingSize = varying == ProgramVarying::PointSize ? 1 : 4; + + references[varying] = allocateRegister(usse::RegisterBank::Output, + { usse::Type::Float32, varyingSize, 1 }); + + oRegPointer += varyingSize; + } + } + + return references; + } + + std::vector Builder::build() { + std::vector data(sizeof(ProgramHeader)); + + // Strings + class StringEntry { + public: + size_t index = 0; + std::string text; + }; + + std::vector stringDB; + + for (const Parameter ¶m : parameters) { + StringEntry entry; + entry.index = data.size(); + entry.text = param.name; + data.insert(data.end(), entry.text.begin(), entry.text.end()); + data.insert(data.end(), '\0'); + stringDB.push_back(entry); + } + + // Parameters + header.parameterCount = parameters.size(); + header.parametersOffset = data.size() - OFFSET_OF(header, parametersOffset); + for (const Parameter ¶m : parameters) { + ProgramParameterInfo parameter; + parameter.resourceIndex = param.resourceIndex; + parameter.arraySize = param.type.arraySize; + parameter.semantic = static_cast(param.semantic); + parameter.config.setType(getParameterTypeFromUSSEType(param.type.type)); + parameter.config.setCategory(param.category); + parameter.config.setComponentCount(param.type.components); + parameter.config.setContainerIndex(param.containerIndex); + + auto stringEntry = std::find_if(stringDB.begin(), stringDB.end(), [param](const StringEntry &entry) { + return entry.text == param.name; + }); + if (stringEntry == stringDB.end()) + throw std::runtime_error(fmt::format("String \"{}\" is missing from gxp db.", param.name)); + + parameter.nameOffset = stringEntry->index - (data.size() + OFFSET_OF(parameter, nameOffset)); + + std::vector paramData(sizeof(parameter)); + std::memcpy(paramData.data(), ¶meter, sizeof(parameter)); + data.insert(data.end(), paramData.begin(), paramData.end()); + } + + // Varyings + header.varyingsOffset = data.size() - OFFSET_OF(header, varyingsOffset); + { + std::vector varyingsData(sizeof(varyings)); + std::memcpy(varyingsData.data(), &varyings, sizeof(varyings)); + data.insert(data.end(), varyingsData.begin(), varyingsData.end()); + } + + // Code + header.primaryRegCount = paRegPointer; + header.secondaryRegCount = saRegPointer; + { + header.secondaryProgramOffset = data.size() - OFFSET_OF(header, secondaryProgramOffset); + for (const Block &block : secondaryBlocks) { + data.insert(data.end(), + reinterpret_cast(block.instructions.data()), + reinterpret_cast(block.instructions.data()) + + block.instructions.size() * sizeof(usse::Instruction)); + } + header.secondaryProgramOffsetEnd = data.size() - OFFSET_OF(header, secondaryProgramOffsetEnd); + + header.primaryProgramOffset = data.size() - OFFSET_OF(header, primaryProgramOffset); + usse::Instruction phase = usse::makePHAS( + 0, // sprvv + true, // end + true, // imm + 0, // src1_bank_ext + 0, // src2_bank_ext + 0, // mode + 0, // rate_hi + 0, // rate_lo_or_nosched + 0, // wait_cond + 0, // temp_count + 0, // src1_bank + 0, // src2_bank + 0, // exe_addr_high + 0, // src1_n_or_exe_addr_mid + 0 // src2_n_or_exe_addr_low + ); + data.insert(data.end(), + reinterpret_cast(&phase), + reinterpret_cast(&phase) + sizeof(phase)); + + header.primaryProgramInstructionCount++; + for (const Block &block : primaryBlocks) { + header.primaryProgramInstructionCount += block.instructions.size(); + data.insert(data.end(), + reinterpret_cast(block.instructions.data()), + reinterpret_cast(block.instructions.data()) + + block.instructions.size() * sizeof(usse::Instruction)); + } + + usse::BankLayout emitLayout = usse::BankLayout::srcLayout(usse::RegisterBank::Immediate); + + usse::Instruction emit = usse::makeEMIT( + 0, // sideband_high + 0, // src0_bank_ext + true, // end + 0, // src1_bank_ext + emitLayout.extension, // src2_bank_ext + 1, /* Target = MTE */ // target + 0, // task_start_or_mte_hi + 1, /* MTE = Vertex */ // task_end_or_mte_lo + 0, // nosched + 0, // sideband_mid + 0, // src0_bank + 0, // incp + 0, // src1_bank + emitLayout.number, // src2_bank + 0, // sideband_low + true, // freep + 0, // src0_n + 0, // src1_n + 0 // src2_n + ); + data.insert(data.end(), + reinterpret_cast(&emit), + reinterpret_cast(&emit) + sizeof(emit)); + } + + std::memcpy(data.data(), &header, sizeof(ProgramHeader)); + + return data; + } + + Builder::Builder() { + header.magic = gxpMagic; + header.majorVersion = 1; + header.minorVersion = 4; + } +} diff --git a/src/gxp/src/gxp.cpp b/src/gxp/src/gxp.cpp new file mode 100644 index 0000000..d4c035e --- /dev/null +++ b/src/gxp/src/gxp.cpp @@ -0,0 +1,117 @@ +#include + +#include + +namespace gxp { + ParameterType getParameterTypeFromUSSEType(usse::Type type) { + switch (type) { + case usse::Type::Signed8: return ParameterType::Signed8; + case usse::Type::Signed16: return ParameterType::Signed16; + case usse::Type::Signed32: return ParameterType::Signed32; + case usse::Type::Fixed10: return ParameterType::Fixed10; + case usse::Type::Float16: return ParameterType::Float16; + case usse::Type::Float32: return ParameterType::Float32; + case usse::Type::Unsigned8: return ParameterType::Unsigned8; + case usse::Type::Unsigned16: return ParameterType::Unsigned16; + case usse::Type::Unsigned32: return ParameterType::Unsigned32; + default: + throw std::runtime_error(""); + } + } + + uint32_t getParameterTypeSize(ParameterType type) { + switch (type) { + case ParameterType::Unsigned32: + case ParameterType::Signed32: + case ParameterType::Float32: + return 4; + case ParameterType::Unsigned16: + case ParameterType::Signed16: + case ParameterType::Float16: + return 2; + case ParameterType::Unsigned8: + case ParameterType::Signed8: + return 1; + default: + throw std::runtime_error( + fmt::format("Parameter type {} has no defined size.", static_cast(type))); + } + } + + bool iClipVarying(ProgramVarying varying) { + auto varyingNum = static_cast(varying); + return varyingNum >= static_cast(ProgramVarying::Clip0) + && varyingNum <= static_cast(ProgramVarying::Clip7); + } + bool isTexCoordVarying(ProgramVarying varying) { + auto varyingNum = static_cast(varying); + return varyingNum >= static_cast(ProgramVarying::TexCoord0) + && varyingNum <= static_cast(ProgramVarying::TexCoord9); + } + + uint32_t getVaryingBits(ProgramVarying varying) { + switch (varying) { + case ProgramVarying::Fog: return static_cast(ProgramVaryingVertexBits::Fog); + case ProgramVarying::Color0: return static_cast(ProgramVaryingVertexBits::Color0); + case ProgramVarying::Color1: return static_cast(ProgramVaryingVertexBits::Color1); + case ProgramVarying::PointSize: return static_cast(ProgramVaryingVertexBits::PointSize); + case ProgramVarying::Clip0: return static_cast(ProgramVaryingVertexBits::Clip0); + case ProgramVarying::Clip1: return static_cast(ProgramVaryingVertexBits::Clip1); + case ProgramVarying::Clip2: return static_cast(ProgramVaryingVertexBits::Clip2); + case ProgramVarying::Clip3: return static_cast(ProgramVaryingVertexBits::Clip3); + case ProgramVarying::Clip4: return static_cast(ProgramVaryingVertexBits::Clip4); + case ProgramVarying::Clip5: return static_cast(ProgramVaryingVertexBits::Clip5); + case ProgramVarying::Clip6: return static_cast(ProgramVaryingVertexBits::Clip6); + case ProgramVarying::Clip7: return static_cast(ProgramVaryingVertexBits::Clip7); + + case ProgramVarying::TexCoord0: return static_cast(ProgramTexCoordMasks::TexCoord0); + case ProgramVarying::TexCoord1: return static_cast(ProgramTexCoordMasks::TexCoord1); + case ProgramVarying::TexCoord2: return static_cast(ProgramTexCoordMasks::TexCoord2); + case ProgramVarying::TexCoord3: return static_cast(ProgramTexCoordMasks::TexCoord3); + case ProgramVarying::TexCoord4: return static_cast(ProgramTexCoordMasks::TexCoord4); + case ProgramVarying::TexCoord5: return static_cast(ProgramTexCoordMasks::TexCoord5); + case ProgramVarying::TexCoord6: return static_cast(ProgramTexCoordMasks::TexCoord6); + case ProgramVarying::TexCoord7: return static_cast(ProgramTexCoordMasks::TexCoord7); + case ProgramVarying::TexCoord8: return static_cast(ProgramTexCoordMasks::TexCoord8); + case ProgramVarying::TexCoord9: return static_cast(ProgramTexCoordMasks::TexCoord9); + + default: return 0; + } + } + + void ParameterConfig::setCategory(ParameterCategory category) { + config &= ~0b1111u; + config |= static_cast(category) & 0b1111u; + } + + void ParameterConfig::setType(ParameterType type) { + config &= ~(0b1111u << 4u); + config |= (static_cast(type) & 0b1111u) << 4u; + } + + void ParameterConfig::setComponentCount(uint32_t componentCount) { + config &= ~(0b1111u << 8u); + config |= (componentCount & 0b1111u) << 8u; + } + + void ParameterConfig::setContainerIndex(uint32_t containerIndex) { + config &= ~(0b1111u << 12u); + config |= (containerIndex & 0b1111u) << 12u; + } + + ParameterCategory ParameterConfig::getCategory() { + return static_cast(config & 0b1111u); + } + + ParameterType ParameterConfig::getType() { + return static_cast((config & (0b1111u << 4u)) >> 4u); + } + + uint32_t ParameterConfig::getComponentCount() { + return (config & (0b1111u << 8u)) >> 8u; + } + + uint32_t ParameterConfig::getContainerIndex() { + return (config & (0b1111u << 12u)) >> 12u; + } +} diff --git a/src/gxp/src/instructions.cpp b/src/gxp/src/instructions.cpp new file mode 100644 index 0000000..e18555d --- /dev/null +++ b/src/gxp/src/instructions.cpp @@ -0,0 +1,930 @@ +#include + +namespace usse { + Instruction makeVMOV( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ test_bit_2, + Param/*1*/ src0_comp_sel, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end_or_src0_bank_ext, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*2*/ move_type, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*3*/ move_data_type, + Param/*1*/ test_bit_1, + Param/*4*/ src0_swiz, + Param/*1*/ src0_bank_sel, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*4*/ dest_mask, + Param/*6*/ dest_n, + Param/*6*/ src0_n, + Param/*6*/ src1_n, + Param/*6*/ src2_n) { + Instruction inst = 0; + inst |= 0b00111ull << 59u; + inst |= pred << 56u; + inst |= skipinv << 55u; + inst |= test_bit_2 << 54u; + inst |= src0_comp_sel << 53u; + inst |= syncstart << 52u; + inst |= dest_bank_ext << 51u; + inst |= end_or_src0_bank_ext << 50u; + inst |= src1_bank_ext << 49u; + inst |= src2_bank_ext << 48u; + inst |= move_type << 46u; + inst |= repeat_count << 44u; + inst |= nosched << 43u; + inst |= move_data_type << 40u; + inst |= test_bit_1 << 39u; + inst |= src0_swiz << 35u; + inst |= src0_bank_sel << 34u; + inst |= dest_bank_sel << 32u; + inst |= src1_bank_sel << 30u; + inst |= src2_bank_sel << 28u; + inst |= dest_mask << 24u; + inst |= dest_n << 18u; + inst |= src0_n << 12u; + inst |= src1_n << 6u; + inst |= src2_n << 0u; + return inst; + } + + Instruction makeVMAD( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ gpi1_swiz_ext, + Param/*1*/ opcode2, + Param/*1*/ dest_use_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*2*/ increment_mode, + Param/*1*/ gpi0_abs, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*4*/ write_mask, + Param/*1*/ src1_neg, + Param/*1*/ src1_abs, + Param/*1*/ gpi1_neg, + Param/*1*/ gpi1_abs, + Param/*1*/ gpi0_swiz_ext, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ gpi0_n, + Param/*6*/ dest_n, + Param/*4*/ gpi0_swiz, + Param/*4*/ gpi1_swiz, + Param/*2*/ gpi1_n, + Param/*1*/ gpi0_neg, + Param/*1*/ src1_swiz_ext, + Param/*4*/ src1_swiz, + Param/*6*/ src1_n) { + Instruction inst = 0; + inst |= 0b00011ull << 59u; + inst |= pred << 56u; + inst |= skipinv << 55u; + inst |= gpi1_swiz_ext << 54u; + inst |= 0b1ull << 53u; + inst |= opcode2 << 52u; + inst |= dest_use_bank_ext << 51u; + inst |= end << 50u; + inst |= src1_bank_ext << 49u; + inst |= increment_mode << 47u; + inst |= gpi0_abs << 46u; + inst |= repeat_count << 44u; + inst |= nosched << 43u; + inst |= write_mask << 39u; + inst |= src1_neg << 38u; + inst |= src1_abs << 37u; + inst |= gpi1_neg << 36u; + inst |= gpi1_abs << 35u; + inst |= gpi0_swiz_ext << 34u; + inst |= dest_bank << 32u; + inst |= src1_bank << 30u; + inst |= gpi0_n << 28u; + inst |= dest_n << 22u; + inst |= gpi0_swiz << 18u; + inst |= gpi1_swiz << 14u; + inst |= gpi1_n << 12u; + inst |= gpi0_neg << 11u; + inst |= src1_swiz_ext << 10u; + inst |= src1_swiz << 6u; + inst |= src1_n << 0u; + return inst; + } + + Instruction makeVMAD2( + Param/*1*/ dat_fmt, + Param/*2*/ pred, + Param/*1*/ skipinv, + Param/*1*/ src0_swiz_bits2, + Param/*1*/ syncstart, + Param/*1*/ src0_abs, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*3*/ src2_swiz, + Param/*1*/ src1_swiz_bit2, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*2*/ src2_mod, + Param/*1*/ src0_bank, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ dest_n, + Param/*2*/ src1_swiz_bits01, + Param/*2*/ src0_swiz_bits01, + Param/*6*/ src0_n, + Param/*6*/ src1_n, + Param/*6*/ src2_n) { + Instruction inst = 0; + inst |= 0b00000ull << 59u; + inst |= dat_fmt << 58u; + inst |= pred << 56u; + inst |= skipinv << 55u; + inst |= src0_swiz_bits2 << 53u; + inst |= syncstart << 52u; + inst |= src0_abs << 50u; + inst |= src1_bank_ext << 49u; + inst |= src2_bank_ext << 48u; + inst |= src2_swiz << 45u; + inst |= src1_swiz_bit2 << 44u; + inst |= nosched << 43u; + inst |= dest_mask << 39u; + inst |= src1_mod << 37u; + inst |= src2_mod << 35u; + inst |= src0_bank << 34u; + inst |= dest_bank << 32u; + inst |= src1_bank << 30u; + inst |= src2_bank << 28u; + inst |= dest_n << 22u; + inst |= src1_swiz_bits01 << 20u; + inst |= src0_swiz_bits01 << 18u; + inst |= src0_n << 12u; + inst |= src1_n << 6u; + inst |= src2_n << 0u; + return inst; + } + + Instruction makeVDP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ clip_plane_enable, + Param/*1*/ opcode2, + Param/*1*/ dest_use_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*2*/ increment_mode, + Param/*1*/ gpi0_abs, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*4*/ write_mask, + Param/*1*/ src1_neg, + Param/*1*/ src1_abs, + Param/*3*/ clip_plane_n, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ gpi0_n, + Param/*6*/ dest_n, + Param/*4*/ gpi0_swiz, + Param/*3*/ src1_swiz_w, + Param/*3*/ src1_swiz_z, + Param/*3*/ src1_swiz_y, + Param/*3*/ src1_swiz_x, + Param/*6*/ src1_n) { + Instruction inst = 0; + inst |= 0b00011ull << 59u; + inst |= pred << 56u; + inst |= skipinv << 55u; + inst |= clip_plane_enable << 54u; + inst |= 0b0ull << 53u; + inst |= opcode2 << 52u; + inst |= dest_use_bank_ext << 51u; + inst |= end << 50u; + inst |= src1_bank_ext << 49u; + inst |= increment_mode << 47u; + inst |= gpi0_abs << 46u; + inst |= repeat_count << 44u; + inst |= nosched << 43u; + inst |= write_mask << 39u; + inst |= src1_neg << 38u; + inst |= src1_abs << 37u; + inst |= clip_plane_n << 34u; + inst |= dest_bank << 32u; + inst |= src1_bank << 30u; + inst |= gpi0_n << 28u; + inst |= dest_n << 22u; + inst |= gpi0_swiz << 18u; + inst |= src1_swiz_w << 15u; + inst |= src1_swiz_z << 12u; + inst |= src1_swiz_y << 9u; + inst |= src1_swiz_x << 6u; + inst |= src1_n << 0u; + return inst; + } + + Instruction makeVNMAD32( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ src1_swiz_10_11, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ src1_swiz_9, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ src2_swiz, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*1*/ src2_mod, + Param/*2*/ src1_swiz_7_8, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*6*/ dest_n, + Param/*7*/ src1_swiz_0_6, + Param/*3*/ op2, + Param/*6*/ src1_n, + Param/*6*/ src2_n) { + Instruction inst = 0; + inst |= 0b00001ull << 59u; + inst |= pred << 56u; + inst |= skipinv << 55u; + inst |= src1_swiz_10_11 << 53u; + inst |= syncstart << 52u; + inst |= dest_bank_ext << 51u; + inst |= src1_swiz_9 << 50u; + inst |= src1_bank_ext << 49u; + inst |= src2_bank_ext << 48u; + inst |= src2_swiz << 44u; + inst |= nosched << 43u; + inst |= dest_mask << 39u; + inst |= src1_mod << 37u; + inst |= src2_mod << 36u; + inst |= src1_swiz_7_8 << 34u; + inst |= dest_bank_sel << 32u; + inst |= src1_bank_sel << 30u; + inst |= src2_bank_sel << 28u; + inst |= dest_n << 22u; + inst |= src1_swiz_0_6 << 15u; + inst |= op2 << 12u; + inst |= src1_n << 6u; + inst |= src2_n << 0u; + return inst; + } + + Instruction makeVNMAD16( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ src1_swiz_10_11, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ src1_swiz_9, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ src2_swiz, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*1*/ src2_mod, + Param/*2*/ src1_swiz_7_8, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*6*/ dest_n, + Param/*7*/ src1_swiz_0_6, + Param/*3*/ op2, + Param/*6*/ src1_n, + Param/*6*/ src2_n) { + Instruction inst = 0; + inst |= 0b00010ull << 59u; + inst |= pred << 56u; + inst |= skipinv << 55u; + inst |= src1_swiz_10_11 << 53u; + inst |= syncstart << 52u; + inst |= dest_bank_ext << 51u; + inst |= src1_swiz_9 << 50u; + inst |= src1_bank_ext << 49u; + inst |= src2_bank_ext << 48u; + inst |= src2_swiz << 44u; + inst |= nosched << 43u; + inst |= dest_mask << 39u; + inst |= src1_mod << 37u; + inst |= src2_mod << 36u; + inst |= src1_swiz_7_8 << 34u; + inst |= dest_bank_sel << 32u; + inst |= src1_bank_sel << 30u; + inst |= src2_bank_sel << 28u; + inst |= dest_n << 22u; + inst |= src1_swiz_0_6 << 15u; + inst |= op2 << 12u; + inst |= src1_n << 6u; + inst |= src2_n << 0u; + return inst; + } + + Instruction makeVLDST( + Param/*2*/ op1, + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ moe_expand, + Param/*1*/ sync_start, + Param/*1*/ cache_ext, + Param/*1*/ src0_bank_ext, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ mask_count, + Param/*2*/ addr_mode, + Param/*2*/ mode, + Param/*1*/ dest_bank_primattr, + Param/*1*/ range_enable, + Param/*2*/ data_type, + Param/*1*/ increment_or_decrement, + Param/*1*/ src0_bank, + Param/*1*/ cache_by_pass12, + Param/*1*/ drc_sel, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b111ull << 61u; + inst |= op1 << 59u; + inst |= pred << 56u; + inst |= skipinv << 55u; + inst |= nosched << 54u; + inst |= moe_expand << 53u; + inst |= sync_start << 52u; + inst |= cache_ext << 51u; + inst |= src0_bank_ext << 50u; + inst |= src1_bank_ext << 49u; + inst |= src2_bank_ext << 48u; + inst |= mask_count << 44u; + inst |= addr_mode << 42u; + inst |= mode << 40u; + inst |= dest_bank_primattr << 39u; + inst |= range_enable << 38u; + inst |= data_type << 36u; + inst |= increment_or_decrement << 35u; + inst |= src0_bank << 34u; + inst |= cache_by_pass12 << 33u; + inst |= drc_sel << 32u; + inst |= src1_bank << 30u; + inst |= src2_bank << 28u; + inst |= dest_n << 21u; + inst |= src0_n << 14u; + inst |= src1_n << 7u; + inst |= src2_n << 0u; + return inst; + } + + Instruction makeVTST( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ onceonly, + Param/*1*/ syncstart, + Param/*1*/ dest_ext, + Param/*1*/ src1_neg, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*1*/ prec, + Param/*1*/ src2_vscomp, + Param/*2*/ rpt_count, + Param/*2*/ sign_test, + Param/*2*/ zero_test, + Param/*1*/ test_crcomb_and, + Param/*3*/ chan_cc, + Param/*2*/ pdst_n, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ test_wben, + Param/*2*/ alu_sel, + Param/*4*/ alu_op, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b01001ull << 59u; + inst |= pred << 56u; + inst |= skipinv << 55u; + inst |= onceonly << 53u; + inst |= syncstart << 52u; + inst |= dest_ext << 51u; + inst |= src1_neg << 50u; + inst |= src1_ext << 49u; + inst |= src2_ext << 48u; + inst |= prec << 47u; + inst |= src2_vscomp << 46u; + inst |= rpt_count << 44u; + inst |= sign_test << 42u; + inst |= zero_test << 40u; + inst |= test_crcomb_and << 39u; + inst |= chan_cc << 36u; + inst |= pdst_n << 34u; + inst |= dest_bank << 32u; + inst |= src1_bank << 30u; + inst |= src2_bank << 28u; + inst |= dest_n << 21u; + inst |= test_wben << 20u; + inst |= alu_sel << 18u; + inst |= alu_op << 14u; + inst |= src1_n << 7u; + inst |= src2_n << 0u; + return inst; + } + + Instruction makeVTSTMSK( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ onceonly, + Param/*1*/ syncstart, + Param/*1*/ dest_ext, + Param/*1*/ test_flag_2, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*1*/ prec, + Param/*1*/ src2_vscomp, + Param/*2*/ rpt_count, + Param/*2*/ sign_test, + Param/*2*/ zero_test, + Param/*1*/ test_crcomb_and, + Param/*2*/ tst_mask_type, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ test_wben, + Param/*2*/ alu_sel, + Param/*4*/ alu_op, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b01111ull << 59u; + inst |= pred << 56u; + inst |= skipinv << 55u; + inst |= onceonly << 53u; + inst |= syncstart << 52u; + inst |= dest_ext << 51u; + inst |= test_flag_2 << 50u; + inst |= src1_ext << 49u; + inst |= src2_ext << 48u; + inst |= prec << 47u; + inst |= src2_vscomp << 46u; + inst |= rpt_count << 44u; + inst |= sign_test << 42u; + inst |= zero_test << 40u; + inst |= test_crcomb_and << 39u; + inst |= tst_mask_type << 36u; + inst |= dest_bank << 32u; + inst |= src1_bank << 30u; + inst |= src2_bank << 28u; + inst |= dest_n << 21u; + inst |= test_wben << 20u; + inst |= alu_sel << 18u; + inst |= alu_op << 14u; + inst |= src1_n << 7u; + inst |= src2_n << 0u; + return inst; + } + + Instruction makeVPCK( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ unknown, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*3*/ repeat_count, + Param/*3*/ src_fmt, + Param/*3*/ dest_fmt, + Param/*4*/ dest_mask, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*7*/ dest_n, + Param/*2*/ comp_sel_3, + Param/*1*/ scale, + Param/*2*/ comp_sel_1, + Param/*2*/ comp_sel_2, + Param/*6*/ src1_n, + Param/*1*/ comp0_sel_bit1, + Param/*6*/ src2_n, + Param/*1*/ comp_sel_0_bit0) { + Instruction inst = 0; + inst |= 0b01000ull << 59u; + inst |= pred << 56u; + inst |= skipinv << 55u; + inst |= nosched << 54u; + inst |= unknown << 53u; + inst |= syncstart << 52u; + inst |= dest_bank_ext << 51u; + inst |= end << 50u; + inst |= src1_bank_ext << 49u; + inst |= src2_bank_ext << 48u; + inst |= repeat_count << 44u; + inst |= src_fmt << 41u; + inst |= dest_fmt << 38u; + inst |= dest_mask << 34u; + inst |= dest_bank_sel << 32u; + inst |= src1_bank_sel << 30u; + inst |= src2_bank_sel << 28u; + inst |= dest_n << 21u; + inst |= comp_sel_3 << 19u; + inst |= scale << 18u; + inst |= comp_sel_1 << 16u; + inst |= comp_sel_2 << 14u; + inst |= src1_n << 8u; + inst |= comp0_sel_bit1 << 7u; + inst |= src2_n << 1u; + inst |= comp_sel_0_bit0 << 0u; + return inst; + } + + Instruction makeVBW( + Param/*3*/ op1, + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ repeat_count, + Param/*1*/ sync_start, + Param/*1*/ dest_ext, + Param/*1*/ end, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*4*/ mask_count, + Param/*1*/ src2_invert, + Param/*5*/ src2_rot, + Param/*2*/ src2_exth, + Param/*1*/ op2, + Param/*1*/ bitwise_partial, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src2_sel, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b01ull << 62u; + inst |= op1 << 59u; + inst |= pred << 56u; + inst |= skipinv << 55u; + inst |= nosched << 54u; + inst |= repeat_count << 53u; + inst |= sync_start << 52u; + inst |= dest_ext << 51u; + inst |= end << 50u; + inst |= src1_ext << 49u; + inst |= src2_ext << 48u; + inst |= mask_count << 44u; + inst |= src2_invert << 43u; + inst |= src2_rot << 38u; + inst |= src2_exth << 36u; + inst |= op2 << 35u; + inst |= bitwise_partial << 34u; + inst |= dest_bank << 32u; + inst |= src1_bank << 30u; + inst |= src2_bank << 28u; + inst |= dest_n << 21u; + inst |= src2_sel << 14u; + inst |= src1_n << 7u; + inst |= src2_n << 0u; + return inst; + } + + Instruction makeSMP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ syncstart, + Param/*1*/ minpack, + Param/*1*/ src0_ext, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*2*/ fconv_type, + Param/*2*/ mask_count, + Param/*2*/ dim, + Param/*2*/ lod_mode, + Param/*1*/ dest_use_pa, + Param/*2*/ sb_mode, + Param/*2*/ src0_type, + Param/*1*/ src0_bank, + Param/*2*/ drc_sel, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b11100ull << 59u; + inst |= pred << 56u; + inst |= skipinv << 55u; + inst |= nosched << 54u; + inst |= syncstart << 52u; + inst |= minpack << 51u; + inst |= src0_ext << 50u; + inst |= src1_ext << 49u; + inst |= src2_ext << 48u; + inst |= fconv_type << 46u; + inst |= mask_count << 44u; + inst |= dim << 42u; + inst |= lod_mode << 40u; + inst |= dest_use_pa << 39u; + inst |= sb_mode << 37u; + inst |= src0_type << 35u; + inst |= src0_bank << 34u; + inst |= drc_sel << 32u; + inst |= src1_bank << 30u; + inst |= src2_bank << 28u; + inst |= dest_n << 21u; + inst |= src0_n << 14u; + inst |= src1_n << 7u; + inst |= src2_n << 0u; + return inst; + } + + Instruction makeVCOMP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ dest_type, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*4*/ repeat_count, + Param/*1*/ nosched, + Param/*2*/ op2, + Param/*2*/ src_type, + Param/*2*/ src1_mod, + Param/*2*/ src_comp, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*7*/ dest_n, + Param/*7*/ src1_n, + Param/*4*/ write_mask) { + Instruction inst = 0; + inst |= 0b00110ull << 59u; + inst |= pred << 56u; + inst |= skipinv << 55u; + inst |= dest_type << 53u; + inst |= syncstart << 52u; + inst |= dest_bank_ext << 51u; + inst |= end << 50u; + inst |= src1_bank_ext << 49u; + inst |= repeat_count << 44u; + inst |= nosched << 43u; + inst |= op2 << 41u; + inst |= src_type << 39u; + inst |= src1_mod << 37u; + inst |= src_comp << 35u; + inst |= dest_bank << 32u; + inst |= src1_bank << 30u; + inst |= dest_n << 21u; + inst |= src1_n << 7u; + inst |= write_mask << 0u; + return inst; + } + + Instruction makeSOP2( + Param/*2*/ pred, + Param/*1*/ cmod1, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*2*/ asel1, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*1*/ cmod2, + Param/*3*/ count, + Param/*1*/ amod1, + Param/*2*/ asel2, + Param/*3*/ csel1, + Param/*3*/ csel2, + Param/*1*/ amod2, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ src1_mod, + Param/*2*/ cop, + Param/*2*/ aop, + Param/*1*/ asrc1_mod, + Param/*1*/ dest_mod, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b10000ull << 59u; + inst |= pred << 57u; + inst |= cmod1 << 56u; + inst |= skipinv << 55u; + inst |= nosched << 54u; + inst |= asel1 << 52u; + inst |= dest_bank_ext << 51u; + inst |= end << 50u; + inst |= src1_bank_ext << 49u; + inst |= src2_bank_ext << 48u; + inst |= cmod2 << 47u; + inst |= count << 44u; + inst |= amod1 << 43u; + inst |= asel2 << 41u; + inst |= csel1 << 38u; + inst |= csel2 << 35u; + inst |= amod2 << 34u; + inst |= dest_bank << 32u; + inst |= src1_bank << 30u; + inst |= src2_bank << 28u; + inst |= dest_n << 21u; + inst |= src1_mod << 20u; + inst |= cop << 18u; + inst |= aop << 16u; + inst |= asrc1_mod << 15u; + inst |= dest_mod << 14u; + inst |= src1_n << 7u; + inst |= src2_n << 0u; + return inst; + } + + Instruction makeBR( + Param/*3*/ pred, + Param/*1*/ syncend, + Param/*1*/ exception, + Param/*1*/ pwait, + Param/*1*/ sync_ext, + Param/*1*/ nosched, + Param/*1*/ br_monitor, + Param/*1*/ save_link, + Param/*1*/ br_type, + Param/*1*/ any_inst, + Param/*1*/ all_inst, + Param/*20*/ br_off) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= pred << 56u; + inst |= syncend << 55u; + inst |= 0b0ull << 54u; + inst |= 0b00ull << 52u; + inst |= exception << 51u; + inst |= pwait << 45u; + inst |= sync_ext << 44u; + inst |= nosched << 43u; + inst |= br_monitor << 42u; + inst |= save_link << 41u; + inst |= 0b00ull << 39u; + inst |= br_type << 38u; + inst |= any_inst << 21u; + inst |= all_inst << 20u; + inst |= br_off << 0u; + return inst; + } + + Instruction makePHAS( + Param/*1*/ sprvv, + Param/*1*/ end, + Param/*1*/ imm, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*1*/ mode, + Param/*1*/ rate_hi, + Param/*1*/ rate_lo_or_nosched, + Param/*3*/ wait_cond, + Param/*8*/ temp_count, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ exe_addr_high, + Param/*7*/ src1_n_or_exe_addr_mid, + Param/*7*/ src2_n_or_exe_addr_low) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= 0b010ull << 56u; + inst |= sprvv << 55u; + inst |= 0b100ull << 52u; + inst |= end << 51u; + inst |= imm << 50u; + inst |= src1_bank_ext << 49u; + inst |= src2_bank_ext << 48u; + inst |= mode << 45u; + inst |= rate_hi << 44u; + inst |= rate_lo_or_nosched << 43u; + inst |= wait_cond << 40u; + inst |= temp_count << 32u; + inst |= src1_bank << 30u; + inst |= src2_bank << 28u; + inst |= exe_addr_high << 14u; + inst |= src1_n_or_exe_addr_mid << 7u; + inst |= src2_n_or_exe_addr_low << 0u; + return inst; + } + + Instruction makeNOP() { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= 0b0ull << 54u; + inst |= 0b00ull << 52u; + inst |= 0b101ull << 38u; + return inst; + } + + Instruction makeSMLSI( + Param/*1*/ nosched, + Param/*4*/ temp_limit, + Param/*4*/ pa_limit, + Param/*4*/ sa_limit, + Param/*1*/ dest_inc_mode, + Param/*1*/ src0_inc_mode, + Param/*1*/ src1_inc_mode, + Param/*1*/ src2_inc_mode, + Param/*8*/ dest_inc, + Param/*8*/ src0_inc, + Param/*8*/ src1_inc, + Param/*8*/ src2_inc) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= 0b010ull << 56u; + inst |= 0b01ull << 52u; + inst |= nosched << 50u; + inst |= temp_limit << 44u; + inst |= pa_limit << 40u; + inst |= sa_limit << 36u; + inst |= dest_inc_mode << 35u; + inst |= src0_inc_mode << 34u; + inst |= src1_inc_mode << 33u; + inst |= src2_inc_mode << 32u; + inst |= dest_inc << 24u; + inst |= src0_inc << 16u; + inst |= src1_inc << 8u; + inst |= src2_inc << 0u; + return inst; + } + + Instruction makeEMIT( + Param/*2*/ sideband_high, + Param/*1*/ src0_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*2*/ target, + Param/*1*/ task_start_or_mte_hi, + Param/*1*/ task_end_or_mte_lo, + Param/*1*/ nosched, + Param/*6*/ sideband_mid, + Param/*1*/ src0_bank, + Param/*2*/ incp, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ sideband_low, + Param/*1*/ freep, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= 0b011ull << 56u; + inst |= sideband_high << 54u; + inst |= 0b10ull << 52u; + inst |= src0_bank_ext << 51u; + inst |= end << 50u; + inst |= src1_bank_ext << 49u; + inst |= src2_bank_ext << 48u; + inst |= target << 46u; + inst |= task_start_or_mte_hi << 45u; + inst |= task_end_or_mte_lo << 44u; + inst |= nosched << 43u; + inst |= sideband_mid << 35u; + inst |= src0_bank << 34u; + inst |= incp << 32u; + inst |= src1_bank << 30u; + inst |= src2_bank << 28u; + inst |= sideband_low << 22u; + inst |= freep << 21u; + inst |= src0_n << 14u; + inst |= src1_n << 7u; + inst |= src2_n << 0u; + return inst; + } + + Instruction makeSPEC( + Param/*1*/ special, + Param/*2*/ category) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= special << 54u; + inst |= category << 52u; + return inst; + } + +} diff --git a/src/gxp/src/usse.cpp b/src/gxp/src/usse.cpp new file mode 100644 index 0000000..6c1b959 --- /dev/null +++ b/src/gxp/src/usse.cpp @@ -0,0 +1,381 @@ +#include + +#include + +#include +#include + +namespace usse { + float fpConstants[] = { + 0.0f, + 0.0f, + 1.0f, + 1.0f, + 2.0f, + 8.0f, + 32.0f, + 128.0f, + 512.0f, + 2048.0f, + 8192.0f, + 32768.0f, + 0.5f, + 0.125f, + 0.03125f, + 0.0078125f, + 0.001953125f, + 0.00048828125f, + 0.0001220703125f, + 3.0517578125e-05f, + 2.7182817459106445f, + 1.4142135381698608f, + 3.1415927410125732f, + 0.7853981852531433f, + 6.2831854820251465f, + 25.132741928100586f, + 1.52587890625e-05f, + 1.5259021893143654e-05f, + 1.5500992276429315e-06f, + 0.0002604166802484542f, + 0.02083333395421505f, + 0.5f, + 0.0f, + 0.0f, + 0.007826805114746094f, + 513.0f, + 2.204391964672e+12f, + 9.472403081849855e+21f, + 4.07034684917033e+31f, + 1.1941301636397839e-07f, + 2.7789456933519086e-17f, + 6.467081701001855e-27f, + 1.50500011103716e-36f, + 1.68573558312346e-06f, + 0.0003208939451724291f, + 0.1955653429031372f, + 3281298.0f, + 0.0f, + 0.0f, + 4.661918073800564e-10f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + -NAN, + -NAN, + -NAN, + -NAN, + NAN, + NAN, + NAN, + NAN, + }; + + const uint32_t swizzleStandardSize = 16; + std::array swizzleVector4[2][swizzleStandardSize] = { + { + { SwizzleChannel::X, SwizzleChannel::X, SwizzleChannel::X, SwizzleChannel::X }, + { SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::Y }, + { SwizzleChannel::Z, SwizzleChannel::Z, SwizzleChannel::Z, SwizzleChannel::Z }, + { SwizzleChannel::W, SwizzleChannel::W, SwizzleChannel::W, SwizzleChannel::W }, + { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::W }, + { SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::W, SwizzleChannel::W }, + { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::Z }, + { SwizzleChannel::X, SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Z }, + { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::X, SwizzleChannel::Y }, + { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::W, SwizzleChannel::Z }, + { SwizzleChannel::Z, SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::W }, + { SwizzleChannel::Z, SwizzleChannel::W, SwizzleChannel::Z, SwizzleChannel::W }, + { SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::X, SwizzleChannel::Z }, + { SwizzleChannel::X, SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Y }, + { SwizzleChannel::X, SwizzleChannel::Z, SwizzleChannel::W, SwizzleChannel::W }, + { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::One }, + }, + { + { SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::X, SwizzleChannel::W }, + { SwizzleChannel::Z, SwizzleChannel::W, SwizzleChannel::X, SwizzleChannel::Y }, + { SwizzleChannel::X, SwizzleChannel::Z, SwizzleChannel::W, SwizzleChannel::Y }, + { SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::W, SwizzleChannel::W }, + { SwizzleChannel::W, SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::W }, + { SwizzleChannel::W, SwizzleChannel::Z, SwizzleChannel::W, SwizzleChannel::Z }, + { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::X }, + { SwizzleChannel::Z, SwizzleChannel::Z, SwizzleChannel::W, SwizzleChannel::W }, + { SwizzleChannel::X, SwizzleChannel::W, SwizzleChannel::Z, SwizzleChannel::X }, + { SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::X }, + { SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::Z }, + { SwizzleChannel::X, SwizzleChannel::Z, SwizzleChannel::Y, SwizzleChannel::W }, + { SwizzleChannel::X, SwizzleChannel::X, SwizzleChannel::X, SwizzleChannel::Y }, + { SwizzleChannel::Z, SwizzleChannel::Y, SwizzleChannel::X, SwizzleChannel::W }, + { SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::Z }, + { SwizzleChannel::Z, SwizzleChannel::Z, SwizzleChannel::Z, SwizzleChannel::Y }, + }, + }; + + std::array swizzleVector3[2][swizzleStandardSize] = { + { + { SwizzleChannel::X, SwizzleChannel::X, SwizzleChannel::X }, + { SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::Y }, + { SwizzleChannel::Z, SwizzleChannel::Z, SwizzleChannel::Z }, + { SwizzleChannel::W, SwizzleChannel::W, SwizzleChannel::W }, + { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Z }, + { SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::W }, + { SwizzleChannel::X, SwizzleChannel::X, SwizzleChannel::Y }, + { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::X }, + { SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::X }, + { SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::Z }, + { SwizzleChannel::Z, SwizzleChannel::X, SwizzleChannel::Y }, + { SwizzleChannel::X, SwizzleChannel::Z, SwizzleChannel::Y }, + { SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::X }, + { SwizzleChannel::Z, SwizzleChannel::Y, SwizzleChannel::X }, + { SwizzleChannel::Z, SwizzleChannel::Z, SwizzleChannel::Y }, + { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::One }, + }, + { + { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Y }, + { SwizzleChannel::Y, SwizzleChannel::X, SwizzleChannel::Y }, + { SwizzleChannel::X, SwizzleChannel::X, SwizzleChannel::Z }, + { SwizzleChannel::Y, SwizzleChannel::X, SwizzleChannel::X }, + { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Zero }, + { SwizzleChannel::X, SwizzleChannel::One, SwizzleChannel::Zero }, + { SwizzleChannel::Zero, SwizzleChannel::Zero, SwizzleChannel::Zero }, + { SwizzleChannel::One, SwizzleChannel::One, SwizzleChannel::One }, + { SwizzleChannel::Half, SwizzleChannel::Half, SwizzleChannel::Half }, + { SwizzleChannel::Two, SwizzleChannel::Two, SwizzleChannel::Two }, + { SwizzleChannel::X, SwizzleChannel::Zero, SwizzleChannel::Zero }, + { /* Undefined */ }, + { /* Undefined */ }, + { /* Undefined */ }, + { /* Undefined */ }, + }, + }; + + bool BankLayout::needsDiv() { + return bank != usse::RegisterBank::FloatConstant; + } + + BankLayout BankLayout::destLayout(RegisterBank bank) { + switch (bank) { + case RegisterBank::Primary: return { bank, 0, 2 }; + case RegisterBank::Secondary: return { bank, 1, 0 }; + case RegisterBank::Output: return { bank, 0, 1 }; + case RegisterBank::Temp: return { bank, 0, 0 }; + case RegisterBank::Special: return { bank, 1, 1 }; + case RegisterBank::Index: return { bank, 1, 2 }; + case RegisterBank::Indexed1: return { bank, 0, 3 }; + case RegisterBank::Indexed2: return { bank, 1, 3 }; + default: + throw std::runtime_error("Unsupported dest bank."); + } + } + BankLayout BankLayout::src0Layout(RegisterBank bank) { + switch (bank) { + case RegisterBank::Primary: return { bank, 0, 1 }; + case RegisterBank::Secondary: return { bank, 1, 1 }; + case RegisterBank::Output: return { bank, 1, 0 }; + case RegisterBank::Temp: return { bank, 0, 0 }; + default: + throw std::runtime_error("Unsupported src0 bank."); + } + } + BankLayout BankLayout::srcLayout(RegisterBank bank) { + switch (bank) { + case RegisterBank::Temp: return { bank, 0, 0 }; + case RegisterBank::Primary: return { bank, 0, 2 }; + case RegisterBank::Output: return { bank, 0, 1 }; + case RegisterBank::Secondary: return { bank, 0, 3 }; + case RegisterBank::FloatConstant: + case RegisterBank::Special: return { bank, 1, 1 }; + case RegisterBank::Immediate: return { bank, 1, 2 }; + case RegisterBank::Indexed1: return { bank, 1, 0 }; + case RegisterBank::Indexed2: return { bank, 1, 3 }; + default: + throw std::runtime_error("Unsupported src bank."); + } + } + + usse::RegisterReference RegisterReference::operator+(uint32_t value) { + usse::RegisterReference ref = *this; + + ref.index += value; + + return ref; + } + + uint32_t RegisterReference::getSwizzleMask() { + uint32_t mask = 0; + + for (SwizzleChannel channel : swizzle) { + switch (channel) { + case SwizzleChannel::X: + mask |= 0b0001u; + break; + case SwizzleChannel::Y: + mask |= 0b0010u; + break; + case SwizzleChannel::Z: + mask |= 0b0100u; + break; + case SwizzleChannel::W: + mask |= 0b1000u; + break; + default: + throw std::runtime_error("Unimplemented swizzle."); + } + } + + return mask; + } + + usse::RegisterReference RegisterReference::getComponents(uint32_t component, uint32_t count) { +// if (component + count > type.components) +// throw std::runtime_error(fmt::format( +// "Tried to get component {} (size: {}) on a register with only {} components.", +// component, count, type.components)); + // Other restrictions, can't do size > 2, can't do .yz swizzle... + + usse::RegisterReference ref = *this; + + int32_t swizzleOffset = 0; + + if (component >= 2) { + ref.index += 2; + swizzleOffset = -2; + } + + ref.type.components = count; + ref.type.arraySize = 1; + + ref.swizzle.clear(); + for (uint32_t a = 0; a < count; a++) { + if (lockSwizzle) + ref.swizzle.push_back(swizzle[component + a]); + else + ref.swizzle.push_back(static_cast(component + a + swizzleOffset)); + } + + return ref; + } + + RegisterReference::RegisterReference(DataType type, RegisterBank bank, uint32_t index, uint32_t size) + : type(type), bank(bank), index(index), size(size) { + for (uint32_t a = 0; a < type.components; a++) { + swizzle.push_back(static_cast(a)); + } + } + + std::string getTypeName(Type type) { + switch (type) { + case Type::Signed8: return "Byte"; + case Type::Signed16: return "Short"; + case Type::Signed32: return "Int"; + case Type::Fixed10: return "Fixed"; + case Type::Float16: return "Half"; + case Type::Float32: return "Float"; + case Type::Unsigned8: return "Ubyte"; + case Type::Unsigned16: return "Ushort"; + case Type::Unsigned32: return "Uint"; + case Type::Output8: return "Output"; + default: return "Invalid"; + } + } + + uint32_t getTypeSize(Type type) { + switch (type) { + case Type::Signed8: return 1; + case Type::Signed16: return 2; + case Type::Signed32: return 4; + case Type::Fixed10: return 2; // Oops + case Type::Float16: return 2; + case Type::Float32: return 4; + case Type::Unsigned8: return 1; + case Type::Unsigned16: return 2; + case Type::Unsigned32: return 4; + case Type::Output8: return 1; + default: + throw std::runtime_error("Unknown type."); + } + } + + std::string getBankName(RegisterBank bank) { + switch (bank) { + case RegisterBank::Temp: return "Temp"; + case RegisterBank::Primary: return "Primary"; + case RegisterBank::Output: return "Output"; + case RegisterBank::Secondary: return "Secondary"; + case RegisterBank::FloatInternal: return "Float Internal"; + case RegisterBank::Special: return "Special"; + case RegisterBank::Global: return "Global"; + case RegisterBank::FloatConstant: return "Float Constant"; + case RegisterBank::Immediate: return "Immediate"; + case RegisterBank::Index: return "Index"; + case RegisterBank::Indexed1: return "Indexed 1"; + case RegisterBank::Indexed2: return "Indexed 2"; + case RegisterBank::Predicate: return "Predicate"; + case RegisterBank::Invalid: + default: + return "Invalid"; + } + } + + int32_t getFPConstantIndex(float constant) { + for (size_t a = 0; a < (sizeof(fpConstants) / sizeof(fpConstants[0])); a++) { + if (fpConstants[a] == constant) + return a; + } + + return -1; + } + + bool areSwizzlesInMatchingHalf(SwizzleChannel x, SwizzleChannel y) { + return + ((x == SwizzleChannel::X || x == SwizzleChannel::Y) && + (y == SwizzleChannel::X || y == SwizzleChannel::Y)) || + ((x == SwizzleChannel::Z || x == SwizzleChannel::W) && + (y == SwizzleChannel::Z || y == SwizzleChannel::W)); + } + + int32_t getSwizzleScalarIndex(SwizzleChannel element) { + return static_cast(element); + } + int32_t getSwizzleVec3Index(std::array elements, bool extended) { + for (uint32_t a = 0; a < swizzleStandardSize; a++) { + bool matches = true; + + for (uint32_t b = 0; b < 4; b++) { + if (elements[b] != SwizzleChannel::DontCare) { + if (swizzleVector3[extended][a][b] != elements[b]) { + matches = false; + break; + } + } + } + + if (matches) + return a; + } + + + return -1; + } + int32_t getSwizzleVec4Index(std::array elements, bool extended) { + for (uint32_t a = 0; a < swizzleStandardSize; a++) { + bool matches = true; + + for (uint32_t b = 0; b < 4; b++) { + if (elements[b] != SwizzleChannel::DontCare) { + if (swizzleVector4[extended][a][b] != elements[b]) { + matches = false; + break; + } + } + } + + if (matches) + return a; + } + + return -1; + } +} diff --git a/src/interface/CMakeLists.txt b/src/interface/CMakeLists.txt new file mode 100644 index 0000000..2104063 --- /dev/null +++ b/src/interface/CMakeLists.txt @@ -0,0 +1,8 @@ +add_library(interface + include/interface/interface.h + + src/interface.cpp) + +target_include_directories(interface PUBLIC include) +target_link_libraries(interface PUBLIC util translator) +target_link_libraries(interface PRIVATE fmt) diff --git a/src/interface/include/interface/interface.h b/src/interface/include/interface/interface.h new file mode 100644 index 0000000..1bea41e --- /dev/null +++ b/src/interface/include/interface/interface.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +#include + +/** + * Parses command line options and deals with any user interface. + */ +class Interface { + std::string inputFilePath; + std::string outputFilePath; + + bool parseParams(int count, char **args); + +public: + int exec(int count, char **args); +}; diff --git a/src/interface/src/interface.cpp b/src/interface/src/interface.cpp new file mode 100644 index 0000000..f3f866c --- /dev/null +++ b/src/interface/src/interface.cpp @@ -0,0 +1,50 @@ +#include + +#include +#include + +#include + +#define ERROR_RETURN_IF(cond, text) if (cond) { fmt::print(text); return false; } + +bool Interface::parseParams(int count, char **args) { + for (int a = 1; a < count; a++) { + if (strcmp(args[a], "-o") == 0) { + ERROR_RETURN_IF(a + 1 >= count, "Error, no output specified.") + ERROR_RETURN_IF(!outputFilePath.empty(), "Multiple output files specified.") + outputFilePath = args[a + 1]; + a++; + } else { + ERROR_RETURN_IF(!inputFilePath.empty(), "Multiple input files specified.") + inputFilePath = args[a]; + } + } + + ERROR_RETURN_IF(inputFilePath.empty(), "You must specify an input.") + ERROR_RETURN_IF(outputFilePath.empty(), "You must specify an output.") + + return true; +} + +#undef ERROR_RETURN_IF + +int Interface::exec(int count, char **args) { + if (!parseParams(count, args)) return 1; + auto spirvData = loadFileData(inputFilePath); + +#ifdef NDEBUG + try { +#endif + CompilerGXP compiler(spirvData); + std::vector gxpData = compiler.compileData(); + std::ofstream stream(outputFilePath); + stream.write(reinterpret_cast(gxpData.data()), gxpData.size()); + stream.close(); +#ifdef NDEBUG + } catch (std::runtime_error &e) { + fmt::print("{}\n", e.what()); + } +#endif + + return 0; +} diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..63008ae --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,5 @@ +#include + +int main(int count, char **args) { + return Interface().exec(count, args); +} diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt new file mode 100644 index 0000000..d0a0a94 --- /dev/null +++ b/src/translator/CMakeLists.txt @@ -0,0 +1,7 @@ +add_library(translator + include/translator/translator.h + + src/codes.cpp + src/translator.cpp) +target_include_directories(translator PUBLIC include) +target_link_libraries(translator PUBLIC util gxp) diff --git a/src/translator/include/translator/translator.h b/src/translator/include/translator/translator.h new file mode 100644 index 0000000..c9bb1b8 --- /dev/null +++ b/src/translator/include/translator/translator.h @@ -0,0 +1,68 @@ +#pragma once + +#include +#include + +#include + +namespace gxp { class Block; } +class CompilerGXP; +class TranslatorArguments; + +typedef void(CompilerGXP::*TranslatorImplementation)(const TranslatorArguments &arguments); + +class TranslatorCode { +public: + spv::Op op; + std::string name; + TranslatorImplementation implementation; +}; + +class TranslatorArguments { +public: + gxp::Block █ + const TranslatorCode &code; + const uint32_t *instruction; + uint32_t wordCount; + + TranslatorArguments( + gxp::Block &block, + const TranslatorCode &code, + const uint32_t *instruction, + uint32_t wordCount); +}; + +class CompilerGXP : public Compiler { + gxp::Builder builder; + + std::vector codes; + std::unordered_map idVaryings; + std::unordered_map idRegisters; + std::unordered_map varyingReferences; + + static usse::Type translateType(SPIRType::BaseType baseType); + static usse::DataType translateType(const SPIRType &type); + static gxp::ParameterSemantic translateDecorations(spv::BuiltIn builtIn); + static gxp::ProgramVarying translateVarying(spv::BuiltIn builtIn); + + void createBlock(const SPIRBlock &block); + void createFunction(const SPIRFunction &function); + void createShaderResources(); + void createTranslators(); + + // SPRIV Translation OPs + void unimplemented(const TranslatorArguments &arguments); + void undefined(const TranslatorArguments &arguments); + + void opLoad(const TranslatorArguments &arguments); + void opStore(const TranslatorArguments &arguments); + void opConvertUToF(const TranslatorArguments &arguments); + void opCompositeExtract(const TranslatorArguments &arguments); + void opCompositeConstruct(const TranslatorArguments &arguments); + void opAccessChain(const TranslatorArguments &arguments); +public: + + std::vector compileData(); + + explicit CompilerGXP(const std::vector &data); +}; diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp new file mode 100644 index 0000000..2b4dc3a --- /dev/null +++ b/src/translator/src/codes.cpp @@ -0,0 +1,577 @@ +#include + +#include + +static std::string getString(const uint32_t *program, size_t &length) { + auto *cString = reinterpret_cast(program); + size_t size = strlen(cString) + 1; + size_t remainder = size % sizeof(uint32_t); + + length = size / sizeof(uint32_t) + (remainder == 0 ? 0 : 1); + return std::string(cString); +} + +void CompilerGXP::unimplemented(const TranslatorArguments &arguments) { + throw std::runtime_error(fmt::format("{} is not implemented.", arguments.code.name)); +} + +void CompilerGXP::undefined(const TranslatorArguments &arguments) { + throw std::runtime_error(fmt::format("Undefined instruction for code {}.", static_cast(arguments.code.op))); +} + +void CompilerGXP::opLoad(const TranslatorArguments &arguments) { + spv::Id type = arguments.instruction[0]; // TODO: Type is important. + spv::Id result = arguments.instruction[1]; + spv::Id pointer = arguments.instruction[2]; + + auto reg = idRegisters.find(pointer); + if (reg == idRegisters.end()) + throw std::runtime_error(fmt::format("Source ID {} does not have an associated register.", pointer)); + + idRegisters[result] = reg->second; +} + +void CompilerGXP::opStore(const TranslatorArguments &arguments) { + spv::Id destination = arguments.instruction[0]; + spv::Id source = arguments.instruction[1]; + + auto sourceRegister = idRegisters.find(source); + if (sourceRegister == idRegisters.end()) + throw std::runtime_error(fmt::format("Source ID {} was not loaded with a register reference.", source)); + + usse::RegisterReference destinationRegister = varyingReferences[idVaryings[destination]]; + + auto *var = maybe_get(destination); + if (var) { + SPIRType type = get_type(var->basetype); + + usse::DataType dataType = translateType(type); + + arguments.block.createMov(sourceRegister->second, destinationRegister); + } else { + arguments.block.createMov(sourceRegister->second, idRegisters[destination]); + } +} + +void CompilerGXP::opConvertUToF(const TranslatorArguments &arguments) { + spv::Id type = arguments.instruction[0]; // TODO: Type is important. + spv::Id destination = arguments.instruction[1]; + spv::Id source = arguments.instruction[2]; + + usse::RegisterReference srcReg = idRegisters[source]; + usse::RegisterReference destReg = arguments.block.parent.allocateRegister( + usse::RegisterBank::Primary, { usse::Type::Float32, 4, 1 }); + + arguments.block.createPack(srcReg, destReg); + + idRegisters[destination] = destReg; +} + +void CompilerGXP::opCompositeExtract(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id source = arguments.instruction[2]; + uint32_t index = arguments.instruction[3]; + + auto reg = idRegisters.find(source); + if (reg == idRegisters.end()) + throw std::runtime_error(fmt::format("Source ID {} was not loaded with a register reference.", source)); + + idRegisters[result] = reg->second.getComponents(index, 1); +} + +void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + + SPIRType type = get_type(typeId); + + usse::RegisterReference output = arguments.block.parent.allocateRegister(usse::RegisterBank::Primary, + { translateType(type.basetype), type.vecsize, 1 }); + + for (size_t a = 0; a < type.vecsize; a++) { + usse::RegisterReference source; + + spv::Id vecId = arguments.instruction[2 + a]; + + // This is very wrong. Rework this! + auto reg = idRegisters.find(vecId); + if (reg != idRegisters.end()) { + source = reg->second; + } else if (type.basetype == SPIRType::Float) { + SPIRConstant spvConstant = get(vecId); + + float packConstant = spvConstant.m.c[0].r[0].f32; + + // Especially here, when it returns it should try the next option or add it to literals. + // Continue the if case here. + int32_t constantIndex = usse::getFPConstantIndex(packConstant); + if (constantIndex != -1) { + source = usse::RegisterReference({ usse::Type::Float32, 1, 1 }, + usse::RegisterBank::FloatConstant, constantIndex, 1); + source.lockSwizzle = true; + usse::SwizzleVec4 swizzleTemp = usse::getSwizzleVec4All(usse::SwizzleChannel::X); + source.swizzle = std::vector(swizzleTemp.begin(), swizzleTemp.end()); + } + } + + arguments.block.createMov(source, output.getComponents(a, 1)); + } + + idRegisters[result] = output; +} + +void CompilerGXP::opAccessChain(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id base = arguments.instruction[2]; + spv::Id index = arguments.instruction[3]; // Multiple indices, apparently. + + // This right now is only supported to accessing gl_PerVertex structs. + + SPIRConstant constant = get(index); + uint32_t structIndex = constant.m.c[0].r[0].u32; + + SPIRVariable baseVariable = get(base); + SPIRType type = get_type(baseVariable.basetype); + if (type.basetype != SPIRType::Struct) + throw std::runtime_error("Access chain can only be created on gl_PerVertex struct."); + + SPIRType memberType = get_type(type.member_types[structIndex]); + spv::BuiltIn builtIn; + + if (is_member_builtin(type, structIndex, &builtIn)) { + gxp::ProgramVarying varying = translateVarying(builtIn); + auto varyingReference = varyingReferences.find(varying); + + if (varyingReference != varyingReferences.end()) { + idRegisters[result] = varyingReference->second; + } else { + throw std::runtime_error( + fmt::format("No varying registered with varying {}.", static_cast(varying))); + } + } else { + throw std::runtime_error("Access chain does not link to a varying."); + } +} + +TranslatorArguments::TranslatorArguments( + gxp::Block &block, + const TranslatorCode &code, + const uint32_t *instruction, + uint32_t wordCount) : + block(block), + code(code), + instruction(instruction), + wordCount(wordCount) + { } + +void CompilerGXP::createTranslators() { + // This could be a map but there are only so many undefineds so I couldn't bother. + codes = { + { spv::Op::OpNop, "OpNop", &CompilerGXP::unimplemented }, + { spv::Op::OpUndef, "OpUndef", &CompilerGXP::unimplemented }, + { spv::Op::OpSourceContinued, "OpSourceContinued", &CompilerGXP::unimplemented }, + { spv::Op::OpSource, "OpSource", &CompilerGXP::unimplemented }, + { spv::Op::OpSourceExtension, "OpSourceExtension", &CompilerGXP::unimplemented }, + { spv::Op::OpName, "OpName", &CompilerGXP::unimplemented }, + { spv::Op::OpMemberName, "OpMemberName", &CompilerGXP::unimplemented }, + { spv::Op::OpString, "OpString", &CompilerGXP::unimplemented }, + { spv::Op::OpLine, "OpLine", &CompilerGXP::unimplemented }, + { static_cast(9), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpExtension, "OpExtension", &CompilerGXP::unimplemented }, + { spv::Op::OpExtInstImport, "OpExtInstImport", &CompilerGXP::unimplemented }, + { spv::Op::OpExtInst, "OpExtInst", &CompilerGXP::unimplemented }, + { static_cast(13), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpMemoryModel, "OpMemoryModel", &CompilerGXP::unimplemented }, + { spv::Op::OpEntryPoint, "OpEntryPoint", &CompilerGXP::unimplemented }, + { spv::Op::OpExecutionMode, "OpExecutionMode", &CompilerGXP::unimplemented }, + { spv::Op::OpCapability, "OpCapability", &CompilerGXP::unimplemented }, + { static_cast(18), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpTypeVoid, "OpTypeVoid", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeBool, "OpTypeBool", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeInt, "OpTypeInt", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeFloat, "OpTypeFloat", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeVector, "OpTypeVector", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeMatrix, "OpTypeMatrix", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeImage, "OpTypeImage", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeSampler, "OpTypeSampler", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeSampledImage, "OpTypeSampledImage", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeArray, "OpTypeArray", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeRuntimeArray, "OpTypeRuntimeArray", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeStruct, "OpTypeStruct", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeOpaque, "OpTypeOpaque", &CompilerGXP::unimplemented }, + { spv::Op::OpTypePointer, "OpTypePointer", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeFunction, "OpTypeFunction", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeEvent, "OpTypeEvent", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeDeviceEvent, "OpTypeDeviceEvent", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeReserveId, "OpTypeReserveId", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeQueue, "OpTypeQueue", &CompilerGXP::unimplemented }, + { spv::Op::OpTypePipe, "OpTypePipe", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeForwardPointer, "OpTypeForwardPointer", &CompilerGXP::unimplemented }, + { static_cast(40), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpConstantTrue, "OpConstantTrue", &CompilerGXP::unimplemented }, + { spv::Op::OpConstantFalse, "OpConstantFalse", &CompilerGXP::unimplemented }, + { spv::Op::OpConstant, "OpConstant", &CompilerGXP::unimplemented }, + { spv::Op::OpConstantComposite, "OpConstantComposite", &CompilerGXP::unimplemented }, + { spv::Op::OpConstantSampler, "OpConstantSampler", &CompilerGXP::unimplemented }, + { spv::Op::OpConstantNull, "OpConstantNull", &CompilerGXP::unimplemented }, + { static_cast(47), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpSpecConstantTrue, "OpSpecConstantTrue", &CompilerGXP::unimplemented }, + { spv::Op::OpSpecConstantFalse, "OpSpecConstantFalse", &CompilerGXP::unimplemented }, + { spv::Op::OpSpecConstant, "OpSpecConstant", &CompilerGXP::unimplemented }, + { spv::Op::OpSpecConstantComposite, "OpSpecConstantComposite", &CompilerGXP::unimplemented }, + { spv::Op::OpSpecConstantOp, "OpSpecConstantOp", &CompilerGXP::unimplemented }, + { static_cast(53), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpFunction, "OpFunction", &CompilerGXP::unimplemented }, + { spv::Op::OpFunctionParameter, "OpFunctionParameter", &CompilerGXP::unimplemented }, + { spv::Op::OpFunctionEnd, "OpFunctionEnd", &CompilerGXP::unimplemented }, + { spv::Op::OpFunctionCall, "OpFunctionCall", &CompilerGXP::unimplemented }, + { static_cast(58), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpVariable, "OpVariable", &CompilerGXP::unimplemented }, + { spv::Op::OpImageTexelPointer, "OpImageTexelPointer", &CompilerGXP::unimplemented }, + { spv::Op::OpLoad, "OpLoad", &CompilerGXP::opLoad }, + { spv::Op::OpStore, "OpStore", &CompilerGXP::opStore }, + { spv::Op::OpCopyMemory, "OpCopyMemory", &CompilerGXP::unimplemented }, + { spv::Op::OpCopyMemorySized, "OpCopyMemorySized", &CompilerGXP::unimplemented }, + { spv::Op::OpAccessChain, "OpAccessChain", &CompilerGXP::opAccessChain }, + { spv::Op::OpInBoundsAccessChain, "OpInBoundsAccessChain", &CompilerGXP::unimplemented }, + { spv::Op::OpPtrAccessChain, "OpPtrAccessChain", &CompilerGXP::unimplemented }, + { spv::Op::OpArrayLength, "OpArrayLength", &CompilerGXP::unimplemented }, + { spv::Op::OpGenericPtrMemSemantics, "OpGenericPtrMemSemantics", &CompilerGXP::unimplemented }, + { spv::Op::OpInBoundsPtrAccessChain, "OpInBoundsPtrAccessChain", &CompilerGXP::unimplemented }, + { spv::Op::OpDecorate, "OpDecorate", &CompilerGXP::unimplemented }, + { spv::Op::OpMemberDecorate, "OpMemberDecorate", &CompilerGXP::unimplemented }, + { spv::Op::OpDecorationGroup, "OpDecorationGroup", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupDecorate, "OpGroupDecorate", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupMemberDecorate, "OpGroupMemberDecorate", &CompilerGXP::unimplemented }, + { static_cast(76), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpVectorExtractDynamic, "OpVectorExtractDynamic", &CompilerGXP::unimplemented }, + { spv::Op::OpVectorInsertDynamic, "OpVectorInsertDynamic", &CompilerGXP::unimplemented }, + { spv::Op::OpVectorShuffle, "OpVectorShuffle", &CompilerGXP::unimplemented }, + { spv::Op::OpCompositeConstruct, "OpCompositeConstruct", &CompilerGXP::opCompositeConstruct }, + { spv::Op::OpCompositeExtract, "OpCompositeExtract", &CompilerGXP::opCompositeExtract }, + { spv::Op::OpCompositeInsert, "OpCompositeInsert", &CompilerGXP::unimplemented }, + { spv::Op::OpCopyObject, "OpCopyObject", &CompilerGXP::unimplemented }, + { spv::Op::OpTranspose, "OpTranspose", &CompilerGXP::unimplemented }, + { static_cast(85), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpSampledImage, "OpSampledImage", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSampleImplicitLod, "OpImageSampleImplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSampleExplicitLod, "OpImageSampleExplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSampleDrefImplicitLod, "OpImageSampleDrefImplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSampleDrefExplicitLod, "OpImageSampleDrefExplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSampleProjImplicitLod, "OpImageSampleProjImplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSampleProjExplicitLod, "OpImageSampleProjExplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSampleProjDrefImplicitLod, "OpImageSampleProjDrefImplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSampleProjDrefExplicitLod, "OpImageSampleProjDrefExplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageFetch, "OpImageFetch", &CompilerGXP::unimplemented }, + { spv::Op::OpImageGather, "OpImageGather", &CompilerGXP::unimplemented }, + { spv::Op::OpImageDrefGather, "OpImageDrefGather", &CompilerGXP::unimplemented }, + { spv::Op::OpImageRead, "OpImageRead", &CompilerGXP::unimplemented }, + { spv::Op::OpImageWrite, "OpImageWrite", &CompilerGXP::unimplemented }, + { spv::Op::OpImage, "OpImage", &CompilerGXP::unimplemented }, + { spv::Op::OpImageQueryFormat, "OpImageQueryFormat", &CompilerGXP::unimplemented }, + { spv::Op::OpImageQueryOrder, "OpImageQueryOrder", &CompilerGXP::unimplemented }, + { spv::Op::OpImageQuerySizeLod, "OpImageQuerySizeLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageQuerySize, "OpImageQuerySize", &CompilerGXP::unimplemented }, + { spv::Op::OpImageQueryLod, "OpImageQueryLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageQueryLevels, "OpImageQueryLevels", &CompilerGXP::unimplemented }, + { spv::Op::OpImageQuerySamples, "OpImageQuerySamples", &CompilerGXP::unimplemented }, + { static_cast(108), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpConvertFToU, "OpConvertFToU", &CompilerGXP::unimplemented }, + { spv::Op::OpConvertFToS, "OpConvertFToS", &CompilerGXP::unimplemented }, + { spv::Op::OpConvertSToF, "OpConvertSToF", &CompilerGXP::unimplemented }, + { spv::Op::OpConvertUToF, "OpConvertUToF", &CompilerGXP::opConvertUToF }, + { spv::Op::OpUConvert, "OpUConvert", &CompilerGXP::unimplemented }, + { spv::Op::OpSConvert, "OpSConvert", &CompilerGXP::unimplemented }, + { spv::Op::OpFConvert, "OpFConvert", &CompilerGXP::unimplemented }, + { spv::Op::OpQuantizeToF16, "OpQuantizeToF16", &CompilerGXP::unimplemented }, + { spv::Op::OpConvertPtrToU, "OpConvertPtrToU", &CompilerGXP::unimplemented }, + { spv::Op::OpSatConvertSToU, "OpSatConvertSToU", &CompilerGXP::unimplemented }, + { spv::Op::OpSatConvertUToS, "OpSatConvertUToS", &CompilerGXP::unimplemented }, + { spv::Op::OpConvertUToPtr, "OpConvertUToPtr", &CompilerGXP::unimplemented }, + { spv::Op::OpPtrCastToGeneric, "OpPtrCastToGeneric", &CompilerGXP::unimplemented }, + { spv::Op::OpGenericCastToPtr, "OpGenericCastToPtr", &CompilerGXP::unimplemented }, + { spv::Op::OpGenericCastToPtrExplicit, "OpGenericCastToPtrExplicit", &CompilerGXP::unimplemented }, + { spv::Op::OpBitcast, "OpBitcast", &CompilerGXP::unimplemented }, + { static_cast(125), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpSNegate, "OpSNegate", &CompilerGXP::unimplemented }, + { spv::Op::OpFNegate, "OpFNegate", &CompilerGXP::unimplemented }, + { spv::Op::OpIAdd, "OpIAdd", &CompilerGXP::unimplemented }, + { spv::Op::OpFAdd, "OpFAdd", &CompilerGXP::unimplemented }, + { spv::Op::OpISub, "OpISub", &CompilerGXP::unimplemented }, + { spv::Op::OpFSub, "OpFSub", &CompilerGXP::unimplemented }, + { spv::Op::OpIMul, "OpIMul", &CompilerGXP::unimplemented }, + { spv::Op::OpFMul, "OpFMul", &CompilerGXP::unimplemented }, + { spv::Op::OpUDiv, "OpUDiv", &CompilerGXP::unimplemented }, + { spv::Op::OpSDiv, "OpSDiv", &CompilerGXP::unimplemented }, + { spv::Op::OpFDiv, "OpFDiv", &CompilerGXP::unimplemented }, + { spv::Op::OpUMod, "OpUMod", &CompilerGXP::unimplemented }, + { spv::Op::OpSRem, "OpSRem", &CompilerGXP::unimplemented }, + { spv::Op::OpSMod, "OpSMod", &CompilerGXP::unimplemented }, + { spv::Op::OpFRem, "OpFRem", &CompilerGXP::unimplemented }, + { spv::Op::OpFMod, "OpFMod", &CompilerGXP::unimplemented }, + { spv::Op::OpVectorTimesScalar, "OpVectorTimesScalar", &CompilerGXP::unimplemented }, + { spv::Op::OpMatrixTimesScalar, "OpMatrixTimesScalar", &CompilerGXP::unimplemented }, + { spv::Op::OpVectorTimesMatrix, "OpVectorTimesMatrix", &CompilerGXP::unimplemented }, + { spv::Op::OpMatrixTimesVector, "OpMatrixTimesVector", &CompilerGXP::unimplemented }, + { spv::Op::OpMatrixTimesMatrix, "OpMatrixTimesMatrix", &CompilerGXP::unimplemented }, + { spv::Op::OpOuterProduct, "OpOuterProduct", &CompilerGXP::unimplemented }, + { spv::Op::OpDot, "OpDot", &CompilerGXP::unimplemented }, + { spv::Op::OpIAddCarry, "OpIAddCarry", &CompilerGXP::unimplemented }, + { spv::Op::OpISubBorrow, "OpISubBorrow", &CompilerGXP::unimplemented }, + { spv::Op::OpUMulExtended, "OpUMulExtended", &CompilerGXP::unimplemented }, + { spv::Op::OpSMulExtended, "OpSMulExtended", &CompilerGXP::unimplemented }, + { static_cast(153), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpAny, "OpAny", &CompilerGXP::unimplemented }, + { spv::Op::OpAll, "OpAll", &CompilerGXP::unimplemented }, + { spv::Op::OpIsNan, "OpIsNan", &CompilerGXP::unimplemented }, + { spv::Op::OpIsInf, "OpIsInf", &CompilerGXP::unimplemented }, + { spv::Op::OpIsFinite, "OpIsFinite", &CompilerGXP::unimplemented }, + { spv::Op::OpIsNormal, "OpIsNormal", &CompilerGXP::unimplemented }, + { spv::Op::OpSignBitSet, "OpSignBitSet", &CompilerGXP::unimplemented }, + { spv::Op::OpLessOrGreater, "OpLessOrGreater", &CompilerGXP::unimplemented }, + { spv::Op::OpOrdered, "OpOrdered", &CompilerGXP::unimplemented }, + { spv::Op::OpUnordered, "OpUnordered", &CompilerGXP::unimplemented }, + { spv::Op::OpLogicalEqual, "OpLogicalEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpLogicalNotEqual, "OpLogicalNotEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpLogicalOr, "OpLogicalOr", &CompilerGXP::unimplemented }, + { spv::Op::OpLogicalAnd, "OpLogicalAnd", &CompilerGXP::unimplemented }, + { spv::Op::OpLogicalNot, "OpLogicalNot", &CompilerGXP::unimplemented }, + { spv::Op::OpSelect, "OpSelect", &CompilerGXP::unimplemented }, + { spv::Op::OpIEqual, "OpIEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpINotEqual, "OpINotEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpUGreaterThan, "OpUGreaterThan", &CompilerGXP::unimplemented }, + { spv::Op::OpSGreaterThan, "OpSGreaterThan", &CompilerGXP::unimplemented }, + { spv::Op::OpUGreaterThanEqual, "OpUGreaterThanEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpSGreaterThanEqual, "OpSGreaterThanEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpULessThan, "OpULessThan", &CompilerGXP::unimplemented }, + { spv::Op::OpSLessThan, "OpSLessThan", &CompilerGXP::unimplemented }, + { spv::Op::OpULessThanEqual, "OpULessThanEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpSLessThanEqual, "OpSLessThanEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpFOrdEqual, "OpFOrdEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpFUnordEqual, "OpFUnordEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpFOrdNotEqual, "OpFOrdNotEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpFUnordNotEqual, "OpFUnordNotEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpFOrdLessThan, "OpFOrdLessThan", &CompilerGXP::unimplemented }, + { spv::Op::OpFUnordLessThan, "OpFUnordLessThan", &CompilerGXP::unimplemented }, + { spv::Op::OpFOrdGreaterThan, "OpFOrdGreaterThan", &CompilerGXP::unimplemented }, + { spv::Op::OpFUnordGreaterThan, "OpFUnordGreaterThan", &CompilerGXP::unimplemented }, + { spv::Op::OpFOrdLessThanEqual, "OpFOrdLessThanEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpFUnordLessThanEqual, "OpFUnordLessThanEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpFOrdGreaterThanEqual, "OpFOrdGreaterThanEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpFUnordGreaterThanEqual, "OpFUnordGreaterThanEqual", &CompilerGXP::unimplemented }, + { static_cast(192), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(193), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpShiftRightLogical, "OpShiftRightLogical", &CompilerGXP::unimplemented }, + { spv::Op::OpShiftRightArithmetic, "OpShiftRightArithmetic", &CompilerGXP::unimplemented }, + { spv::Op::OpShiftLeftLogical, "OpShiftLeftLogical", &CompilerGXP::unimplemented }, + { spv::Op::OpBitwiseOr, "OpBitwiseOr", &CompilerGXP::unimplemented }, + { spv::Op::OpBitwiseXor, "OpBitwiseXor", &CompilerGXP::unimplemented }, + { spv::Op::OpBitwiseAnd, "OpBitwiseAnd", &CompilerGXP::unimplemented }, + { spv::Op::OpNot, "OpNot", &CompilerGXP::unimplemented }, + { spv::Op::OpBitFieldInsert, "OpBitFieldInsert", &CompilerGXP::unimplemented }, + { spv::Op::OpBitFieldSExtract, "OpBitFieldSExtract", &CompilerGXP::unimplemented }, + { spv::Op::OpBitFieldUExtract, "OpBitFieldUExtract", &CompilerGXP::unimplemented }, + { spv::Op::OpBitReverse, "OpBitReverse", &CompilerGXP::unimplemented }, + { spv::Op::OpBitCount, "OpBitCount", &CompilerGXP::unimplemented }, + { static_cast(206), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpDPdx, "OpDPdx", &CompilerGXP::unimplemented }, + { spv::Op::OpDPdy, "OpDPdy", &CompilerGXP::unimplemented }, + { spv::Op::OpFwidth, "OpFwidth", &CompilerGXP::unimplemented }, + { spv::Op::OpDPdxFine, "OpDPdxFine", &CompilerGXP::unimplemented }, + { spv::Op::OpDPdyFine, "OpDPdyFine", &CompilerGXP::unimplemented }, + { spv::Op::OpFwidthFine, "OpFwidthFine", &CompilerGXP::unimplemented }, + { spv::Op::OpDPdxCoarse, "OpDPdxCoarse", &CompilerGXP::unimplemented }, + { spv::Op::OpDPdyCoarse, "OpDPdyCoarse", &CompilerGXP::unimplemented }, + { spv::Op::OpFwidthCoarse, "OpFwidthCoarse", &CompilerGXP::unimplemented }, + { static_cast(216), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(217), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpEmitVertex, "OpEmitVertex", &CompilerGXP::unimplemented }, + { spv::Op::OpEndPrimitive, "OpEndPrimitive", &CompilerGXP::unimplemented }, + { spv::Op::OpEmitStreamVertex, "OpEmitStreamVertex", &CompilerGXP::unimplemented }, + { spv::Op::OpEndStreamPrimitive, "OpEndStreamPrimitive", &CompilerGXP::unimplemented }, + { static_cast(222), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(223), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpControlBarrier, "OpControlBarrier", &CompilerGXP::unimplemented }, + { spv::Op::OpMemoryBarrier, "OpMemoryBarrier", &CompilerGXP::unimplemented }, + { static_cast(226), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpAtomicLoad, "OpAtomicLoad", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicStore, "OpAtomicStore", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicExchange, "OpAtomicExchange", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicCompareExchange, "OpAtomicCompareExchange", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicCompareExchangeWeak, "OpAtomicCompareExchangeWeak", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicIIncrement, "OpAtomicIIncrement", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicIDecrement, "OpAtomicIDecrement", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicIAdd, "OpAtomicIAdd", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicISub, "OpAtomicISub", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicSMin, "OpAtomicSMin", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicUMin, "OpAtomicUMin", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicSMax, "OpAtomicSMax", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicUMax, "OpAtomicUMax", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicAnd, "OpAtomicAnd", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicOr, "OpAtomicOr", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicXor, "OpAtomicXor", &CompilerGXP::unimplemented }, + { static_cast(243), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(244), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpPhi, "OpPhi", &CompilerGXP::unimplemented }, + { spv::Op::OpLoopMerge, "OpLoopMerge", &CompilerGXP::unimplemented }, + { spv::Op::OpSelectionMerge, "OpSelectionMerge", &CompilerGXP::unimplemented }, + { spv::Op::OpLabel, "OpLabel", &CompilerGXP::unimplemented }, + { spv::Op::OpBranch, "OpBranch", &CompilerGXP::unimplemented }, + { spv::Op::OpBranchConditional, "OpBranchConditional", &CompilerGXP::unimplemented }, + { spv::Op::OpSwitch, "OpSwitch", &CompilerGXP::unimplemented }, + { spv::Op::OpKill, "OpKill", &CompilerGXP::unimplemented }, + { spv::Op::OpReturn, "OpReturn", &CompilerGXP::unimplemented }, + { spv::Op::OpReturnValue, "OpReturnValue", &CompilerGXP::unimplemented }, + { spv::Op::OpUnreachable, "OpUnreachable", &CompilerGXP::unimplemented }, + { spv::Op::OpLifetimeStart, "OpLifetimeStart", &CompilerGXP::unimplemented }, + { spv::Op::OpLifetimeStop, "OpLifetimeStop", &CompilerGXP::unimplemented }, + { static_cast(258), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpGroupAsyncCopy, "OpGroupAsyncCopy", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupWaitEvents, "OpGroupWaitEvents", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupAll, "OpGroupAll", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupAny, "OpGroupAny", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupBroadcast, "OpGroupBroadcast", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupIAdd, "OpGroupIAdd", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupFAdd, "OpGroupFAdd", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupFMin, "OpGroupFMin", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupUMin, "OpGroupUMin", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupSMin, "OpGroupSMin", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupFMax, "OpGroupFMax", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupUMax, "OpGroupUMax", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupSMax, "OpGroupSMax", &CompilerGXP::unimplemented }, + { static_cast(272), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(273), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpReadPipe, "OpReadPipe", &CompilerGXP::unimplemented }, + { spv::Op::OpWritePipe, "OpWritePipe", &CompilerGXP::unimplemented }, + { spv::Op::OpReservedReadPipe, "OpReservedReadPipe", &CompilerGXP::unimplemented }, + { spv::Op::OpReservedWritePipe, "OpReservedWritePipe", &CompilerGXP::unimplemented }, + { spv::Op::OpReserveReadPipePackets, "OpReserveReadPipePackets", &CompilerGXP::unimplemented }, + { spv::Op::OpReserveWritePipePackets, "OpReserveWritePipePackets", &CompilerGXP::unimplemented }, + { spv::Op::OpCommitReadPipe, "OpCommitReadPipe", &CompilerGXP::unimplemented }, + { spv::Op::OpCommitWritePipe, "OpCommitWritePipe", &CompilerGXP::unimplemented }, + { spv::Op::OpIsValidReserveId, "OpIsValidReserveId", &CompilerGXP::unimplemented }, + { spv::Op::OpGetNumPipePackets, "OpGetNumPipePackets", &CompilerGXP::unimplemented }, + { spv::Op::OpGetMaxPipePackets, "OpGetMaxPipePackets", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupReserveReadPipePackets, "OpGroupReserveReadPipePackets", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupReserveWritePipePackets, "OpGroupReserveWritePipePackets", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupCommitReadPipe, "OpGroupCommitReadPipe", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupCommitWritePipe, "OpGroupCommitWritePipe", &CompilerGXP::unimplemented }, + { static_cast(289), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(290), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpEnqueueMarker, "OpEnqueueMarker", &CompilerGXP::unimplemented }, + { spv::Op::OpEnqueueKernel, "OpEnqueueKernel", &CompilerGXP::unimplemented }, + { spv::Op::OpGetKernelNDrangeSubGroupCount, "OpGetKernelNDrangeSubGroupCount", &CompilerGXP::unimplemented }, + { spv::Op::OpGetKernelNDrangeMaxSubGroupSize, "OpGetKernelNDrangeMaxSubGroupSize", &CompilerGXP::unimplemented }, + { spv::Op::OpGetKernelWorkGroupSize, "OpGetKernelWorkGroupSize", &CompilerGXP::unimplemented }, + { spv::Op::OpGetKernelPreferredWorkGroupSizeMultiple, "OpGetKernelPreferredWorkGroupSizeMultiple", &CompilerGXP::unimplemented }, + { spv::Op::OpRetainEvent, "OpRetainEvent", &CompilerGXP::unimplemented }, + { spv::Op::OpReleaseEvent, "OpReleaseEvent", &CompilerGXP::unimplemented }, + { spv::Op::OpCreateUserEvent, "OpCreateUserEvent", &CompilerGXP::unimplemented }, + { spv::Op::OpIsValidEvent, "OpIsValidEvent", &CompilerGXP::unimplemented }, + { spv::Op::OpSetUserEventStatus, "OpSetUserEventStatus", &CompilerGXP::unimplemented }, + { spv::Op::OpCaptureEventProfilingInfo, "OpCaptureEventProfilingInfo", &CompilerGXP::unimplemented }, + { spv::Op::OpGetDefaultQueue, "OpGetDefaultQueue", &CompilerGXP::unimplemented }, + { spv::Op::OpBuildNDRange, "OpBuildNDRange", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSparseSampleImplicitLod, "OpImageSparseSampleImplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSparseSampleExplicitLod, "OpImageSparseSampleExplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSparseSampleDrefImplicitLod, "OpImageSparseSampleDrefImplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSparseSampleDrefExplicitLod, "OpImageSparseSampleDrefExplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSparseSampleProjImplicitLod, "OpImageSparseSampleProjImplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSparseSampleProjExplicitLod, "OpImageSparseSampleProjExplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSparseSampleProjDrefImplicitLod, "OpImageSparseSampleProjDrefImplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSparseSampleProjDrefExplicitLod, "OpImageSparseSampleProjDrefExplicitLod", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSparseFetch, "OpImageSparseFetch", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSparseGather, "OpImageSparseGather", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSparseDrefGather, "OpImageSparseDrefGather", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSparseTexelsResident, "OpImageSparseTexelsResident", &CompilerGXP::unimplemented }, + { spv::Op::OpNoLine, "OpNoLine", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicFlagTestAndSet, "OpAtomicFlagTestAndSet", &CompilerGXP::unimplemented }, + { spv::Op::OpAtomicFlagClear, "OpAtomicFlagClear", &CompilerGXP::unimplemented }, + { spv::Op::OpImageSparseRead, "OpImageSparseRead", &CompilerGXP::unimplemented }, + { spv::Op::OpSizeOf, "OpSizeOf", &CompilerGXP::unimplemented }, + { spv::Op::OpTypePipeStorage, "OpTypePipeStorage", &CompilerGXP::unimplemented }, + { spv::Op::OpConstantPipeStorage, "OpConstantPipeStorage", &CompilerGXP::unimplemented }, + { spv::Op::OpCreatePipeFromPipeStorage, "OpCreatePipeFromPipeStorage", &CompilerGXP::unimplemented }, + { spv::Op::OpGetKernelLocalSizeForSubgroupCount, "OpGetKernelLocalSizeForSubgroupCount", &CompilerGXP::unimplemented }, + { spv::Op::OpGetKernelMaxNumSubgroups, "OpGetKernelMaxNumSubgroups", &CompilerGXP::unimplemented }, + { spv::Op::OpTypeNamedBarrier, "OpTypeNamedBarrier", &CompilerGXP::unimplemented }, + { spv::Op::OpNamedBarrierInitialize, "OpNamedBarrierInitialize", &CompilerGXP::unimplemented }, + { spv::Op::OpMemoryNamedBarrier, "OpMemoryNamedBarrier", &CompilerGXP::unimplemented }, + { spv::Op::OpModuleProcessed, "OpModuleProcessed", &CompilerGXP::unimplemented }, + { spv::Op::OpExecutionModeId, "OpExecutionModeId", &CompilerGXP::unimplemented }, + { spv::Op::OpDecorateId, "OpDecorateId", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformElect, "OpGroupNonUniformElect", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformAll, "OpGroupNonUniformAll", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformAny, "OpGroupNonUniformAny", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformAllEqual, "OpGroupNonUniformAllEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformBroadcast, "OpGroupNonUniformBroadcast", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformBroadcastFirst, "OpGroupNonUniformBroadcastFirst", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformBallot, "OpGroupNonUniformBallot", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformInverseBallot, "OpGroupNonUniformInverseBallot", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformBallotBitExtract, "OpGroupNonUniformBallotBitExtract", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformBallotBitCount, "OpGroupNonUniformBallotBitCount", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformBallotFindLSB, "OpGroupNonUniformBallotFindLSB", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformBallotFindMSB, "OpGroupNonUniformBallotFindMSB", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformShuffle, "OpGroupNonUniformShuffle", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformShuffleXor, "OpGroupNonUniformShuffleXor", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformShuffleUp, "OpGroupNonUniformShuffleUp", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformShuffleDown, "OpGroupNonUniformShuffleDown", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformIAdd, "OpGroupNonUniformIAdd", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformFAdd, "OpGroupNonUniformFAdd", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformIMul, "OpGroupNonUniformIMul", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformFMul, "OpGroupNonUniformFMul", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformSMin, "OpGroupNonUniformSMin", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformUMin, "OpGroupNonUniformUMin", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformFMin, "OpGroupNonUniformFMin", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformSMax, "OpGroupNonUniformSMax", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformUMax, "OpGroupNonUniformUMax", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformFMax, "OpGroupNonUniformFMax", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformBitwiseAnd, "OpGroupNonUniformBitwiseAnd", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformBitwiseOr, "OpGroupNonUniformBitwiseOr", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformBitwiseXor, "OpGroupNonUniformBitwiseXor", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformLogicalAnd, "OpGroupNonUniformLogicalAnd", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformLogicalOr, "OpGroupNonUniformLogicalOr", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformLogicalXor, "OpGroupNonUniformLogicalXor", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformQuadBroadcast, "OpGroupNonUniformQuadBroadcast", &CompilerGXP::unimplemented }, + { spv::Op::OpGroupNonUniformQuadSwap, "OpGroupNonUniformQuadSwap", &CompilerGXP::unimplemented }, + { static_cast(367), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(368), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(369), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(370), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(371), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(372), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(373), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(374), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(375), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(376), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(377), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(378), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(379), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(380), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(381), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(382), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(383), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(384), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(385), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(386), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(387), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(388), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(389), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(390), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(391), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(392), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(393), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(394), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(395), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(396), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(397), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(398), "OpUndefined", &CompilerGXP::undefined }, + { static_cast(399), "OpUndefined", &CompilerGXP::undefined }, + { spv::Op::OpCopyLogical, "OpCopyLogical", &CompilerGXP::unimplemented }, + { spv::Op::OpPtrEqual, "OpPtrEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpPtrNotEqual, "OpPtrNotEqual", &CompilerGXP::unimplemented }, + { spv::Op::OpPtrDiff, "OpPtrDiff", &CompilerGXP::unimplemented }, + }; +} diff --git a/src/translator/src/translator.cpp b/src/translator/src/translator.cpp new file mode 100644 index 0000000..49c4394 --- /dev/null +++ b/src/translator/src/translator.cpp @@ -0,0 +1,245 @@ +#include + +#include + +#include + +// Position, PointSize, Clip0 and Clip1 are removed so they are not allocated over. +const std::vector allVaryings = { + gxp::ProgramVarying::Color0, + gxp::ProgramVarying::Color1, + gxp::ProgramVarying::Clip2, + gxp::ProgramVarying::Clip3, + gxp::ProgramVarying::Clip4, + gxp::ProgramVarying::Clip5, + gxp::ProgramVarying::Clip6, + gxp::ProgramVarying::Clip7, + gxp::ProgramVarying::Fog, +}; + +const std::vector allTexCoords = { + gxp::ProgramVarying::TexCoord0, + gxp::ProgramVarying::TexCoord1, + gxp::ProgramVarying::TexCoord2, + gxp::ProgramVarying::TexCoord3, + gxp::ProgramVarying::TexCoord4, + gxp::ProgramVarying::TexCoord5, + gxp::ProgramVarying::TexCoord6, + gxp::ProgramVarying::TexCoord7, + gxp::ProgramVarying::TexCoord8, + gxp::ProgramVarying::TexCoord9, +}; + +usse::Type CompilerGXP::translateType(SPIRType::BaseType baseType) { + switch (baseType) { + case SPIRType::BaseType::Boolean: + return usse::Type::Unsigned8; + case SPIRType::BaseType::SByte: + return usse::Type::Signed8; + case SPIRType::BaseType::UByte: + return usse::Type::Unsigned8; + case SPIRType::BaseType::Short: + return usse::Type::Signed16; + case SPIRType::BaseType::UShort: + return usse::Type::Unsigned16; + case SPIRType::BaseType::Int: + return usse::Type::Signed32; + case SPIRType::BaseType::UInt: + return usse::Type::Unsigned32; + case SPIRType::BaseType::Half: + return usse::Type::Float16; + case SPIRType::BaseType::Float: + return usse::Type::Float32; + default: + throw std::runtime_error(fmt::format("Unknown base type {}.", baseType)); + } +} + +usse::DataType CompilerGXP::translateType(const SPIRType &type) { + usse::DataType dataType = { }; + + dataType.type = translateType(type.basetype); + dataType.components = type.vecsize; + dataType.arraySize = type.array.size(); + + return dataType; +} + +gxp::ParameterSemantic CompilerGXP::translateDecorations(spv::BuiltIn builtIn) { + switch (builtIn) { + case spv::BuiltInPosition: + return gxp::ParameterSemantic::Position; + case spv::BuiltInPointSize: + return gxp::ParameterSemantic::PointSize; + default: + return gxp::ParameterSemantic::None; + } +} + +gxp::ProgramVarying CompilerGXP::translateVarying(spv::BuiltIn builtIn) { + switch (builtIn) { + case spv::BuiltInPosition: + return gxp::ProgramVarying::Position; + case spv::BuiltInPointSize: + return gxp::ProgramVarying::PointSize; + case spv::BuiltInClipDistance: + return gxp::ProgramVarying::Clip0; + case spv::BuiltInCullDistance: + return gxp::ProgramVarying::Clip1; + default: + return gxp::ProgramVarying::None; + } +} + +void CompilerGXP::createBlock(const SPIRBlock &block) { + gxp::Block *gxpBlock = builder.createPrimaryBlock(); + + for (Instruction instruction : block.ops) { + const auto &code = codes[instruction.op]; + + const TranslatorArguments arguments( + *gxpBlock, + code, + &ir.spirv[instruction.offset], + instruction.count + ); + + (this->*code.implementation)(arguments); + } +} + +void CompilerGXP::createFunction(const SPIRFunction &function) { + for (uint32_t blockId : function.blocks) { + auto &block = get(blockId); + createBlock(block); + } +} + +void CompilerGXP::createShaderResources() { + ShaderResources resources = get_shader_resources(); + + for (const auto &input : resources.stage_inputs) { + const SPIRType &type = get_type(input.type_id); + + gxp::Parameter parameter; + parameter.name = input.name; + parameter.category = gxp::ParameterCategory::Attribute; + parameter.type.type = translateType(type.basetype); + parameter.type.arraySize = 1; + parameter.type.components = type.vecsize; + idRegisters[input.id] = builder.registerParameter(parameter); + } + + for (const auto &uniform : resources.uniform_buffers) { + const SPIRType &type = get_type(uniform.type_id); + + // Fill In + } + + std::vector varyings; + std::vector texCoords; + + std::vector availableVaryings = allVaryings; + std::vector availableTexCoords = allTexCoords; + + const auto &allocate_varying = [&availableVaryings, &availableTexCoords](uint32_t size) { + gxp::ProgramVarying selected = gxp::ProgramVarying::None; + + if (size == 4 && !availableVaryings.empty()) { + selected = availableVaryings[0]; + availableVaryings.erase(availableVaryings.begin()); + } else if (!availableTexCoords.empty()) { + selected = availableTexCoords[0]; + availableTexCoords.erase(availableTexCoords.begin()); + } + + return selected; + }; + + for (const auto &output : resources.stage_outputs) { + const SPIRVariable &variable = get(output.id); + const SPIRType &type = get_type(output.type_id); + + gxp::ProgramVarying varying = allocate_varying(type.vecsize); + if (varying == gxp::ProgramVarying::None) + throw std::runtime_error("No availible space for varying."); + + if (gxp::isTexCoordVarying(varying)) { + gxp::ProgramTexCoordInfo info = { }; + info.varying = varying; + info.componentCount = type.vecsize; + texCoords.push_back(info); + } else + varyings.push_back(varying); + + idVaryings[output.id] = varying; + } + + // TODO: Better solution that works with more layouts. + auto variables = get_active_interface_variables(); + for (const auto &variableId : variables) { + const SPIRVariable &variable = get(variableId); + const SPIRType &type = get_type(variable.basetype); + + if (variable.storage != spv::StorageClassOutput) continue; + + if (type.basetype == SPIRType::Struct) { + // TODO: struct member should be able to identify itself via idVaryings? OpAccessChain might be it. + for (size_t a = 0; a < type.member_types.size(); a++) { + uint32_t id = type.member_types[a]; + SPIRType memberType = get_type(id); + + spv::BuiltIn builtIn; + if (is_member_builtin(type, a, &builtIn)) { + gxp::ProgramVarying varying = translateVarying(builtIn); + + if (varying != gxp::ProgramVarying::None) + varyings.push_back(varying); + } + } + } else { + gxp::ProgramVarying varying = translateVarying(ir.meta[variableId].decoration.builtin_type); + + if (varying != gxp::ProgramVarying::None) { + varyings.push_back(varying); + idVaryings[variableId] = varying; + } + } + } + + varyingReferences = builder.registerVaryings(varyings, texCoords); +} + +std::vector CompilerGXP::compileData() { + auto entryPoints = get_entry_points_and_stages(); + if (entryPoints.size() != 1) + throw std::runtime_error("Number of entry points must be 1."); + switch (entryPoints[0].execution_model) { + case spv::ExecutionModelVertex: + builder.setType(gxp::ShaderType::Vertex); + break; + case spv::ExecutionModelFragment: + builder.setType(gxp::ShaderType::Fragment); + break; + default: + throw std::runtime_error("Entry point must be of type vertex or fragment."); + } + + SPIREntryPoint entryPoint = get_entry_point(entryPoints[0].name, entryPoints[0].execution_model); + SPIRFunction entryFunction = get(entryPoint.self); + + createShaderResources(); + + // addFunction should recursively call the other functions. +// try { + createFunction(entryFunction); +// } catch (std::runtime_error &e) { +// fmt::print("{}\n", e.what()); +// } + + return builder.build(); +} + +CompilerGXP::CompilerGXP(const std::vector &data) : Compiler(data) { + createTranslators(); +} diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt new file mode 100644 index 0000000..bfe9969 --- /dev/null +++ b/src/util/CMakeLists.txt @@ -0,0 +1,6 @@ +add_library(util INTERFACE) + +target_include_directories(util INTERFACE include) +target_link_libraries(util INTERFACE fmt spirv-cross-reflect spirv-cross-cpp) + +# util is just headers for now. I want to expand the use for this module soon. diff --git a/src/util/include/util/spirv.h b/src/util/include/util/spirv.h new file mode 100644 index 0000000..dabd3d7 --- /dev/null +++ b/src/util/include/util/spirv.h @@ -0,0 +1,5 @@ +#pragma once + +#include + +using namespace spirv_cross; diff --git a/src/util/include/util/util.h b/src/util/include/util/util.h new file mode 100644 index 0000000..e1ad06f --- /dev/null +++ b/src/util/include/util/util.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +template +std::vector loadFileData(const std::string &path) { + std::ifstream stream(path, std::ios::binary | std::ios::ate); + size_t size = stream.tellg(); + assert(size % sizeof(T) == 0); + std::vector data(size / sizeof(T)); + stream.seekg(0, std::ios::beg); + stream.read(reinterpret_cast(data.data()), size); + stream.close(); + return data; +} From e10c410635df8d12ca9b22b14a7add87c57e215b Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Mon, 9 Sep 2019 22:19:13 -0400 Subject: [PATCH 02/19] Matrix support --- src/gxp/include/gxp/builder.h | 15 ++- src/gxp/include/gxp/usse.h | 10 +- src/gxp/src/builder.cpp | 88 ++++++++++++++---- src/gxp/src/usse.cpp | 76 +++++++++++++-- .../include/translator/translator.h | 1 + src/translator/src/codes.cpp | 93 ++++++++++++------- src/translator/src/translator.cpp | 21 +++-- src/util/include/util/util.h | 9 ++ 8 files changed, 235 insertions(+), 78 deletions(-) diff --git a/src/gxp/include/gxp/builder.h b/src/gxp/include/gxp/builder.h index d845e70..68876e8 100644 --- a/src/gxp/include/gxp/builder.h +++ b/src/gxp/include/gxp/builder.h @@ -24,8 +24,16 @@ namespace gxp { Builder &parent; void createNop(); - void createMov(usse::RegisterReference source, usse::RegisterReference destination); - void createPack(usse::RegisterReference source, usse::RegisterReference destination); + void createMov( + usse::RegisterReference source, + usse::RegisterReference destination); + void createPack( + usse::RegisterReference source, + usse::RegisterReference destination); + void createDot( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination); }; class Parameter { @@ -50,6 +58,8 @@ namespace gxp { uint32_t paRegPointer = 0; uint32_t saRegPointer = 0; uint32_t oRegPointer = 0; + uint32_t tRegPointer = 0; + uint32_t iRegPointer = 0; std::vector primaryBlocks; std::vector secondaryBlocks; @@ -61,6 +71,7 @@ namespace gxp { Block *createSecondaryBlock(); usse::RegisterReference allocateRegister(usse::RegisterBank bank, usse::DataType type); + void freeRegister(usse::RegisterReference reg); usse::RegisterReference registerParameter(const Parameter ¶meter); std::unordered_map registerVaryings( diff --git a/src/gxp/include/gxp/usse.h b/src/gxp/include/gxp/usse.h index 05caa18..11db340 100644 --- a/src/gxp/include/gxp/usse.h +++ b/src/gxp/include/gxp/usse.h @@ -8,11 +8,11 @@ namespace usse { enum class RegisterBank { - Temp, + Temporary, Primary, Output, Secondary, - FloatInternal, + Internal, Special, Global, FloatConstant, @@ -72,7 +72,10 @@ namespace usse { RegisterReference operator+(uint32_t value); uint32_t getSwizzleMask(); + int32_t getSwizzleIndex(bool extended = false); + RegisterReference getHalf(uint32_t half); RegisterReference getComponents(uint32_t component, uint32_t count); + RegisterReference getElement(uint32_t element); RegisterReference() = default; RegisterReference(DataType type, RegisterBank bank, uint32_t index, uint32_t size); @@ -85,7 +88,8 @@ namespace usse { uint8_t extension = 0; uint8_t number = 0; - bool needsDiv(); + bool isHalf(Type type); + uint32_t getIndex(RegisterReference reference, uint32_t bits = 7); static BankLayout destLayout(RegisterBank bank); static BankLayout src0Layout(RegisterBank bank); diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp index c79c6b7..5e70c1c 100644 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -20,15 +20,6 @@ namespace gxp { std::vector data; uint32_t destMask = destination.getSwizzleMask(); -// usse::SwizzleVec4 swizzleDescription = usse::getSwizzleVec4DontCare(); -// uint32_t swizzleIndex = 0; -// for (uint32_t a = 0; a < 4; a++) { -// if (destMask & (1u << a)) { -// if (swizzleIndex >= source.swizzle.size()) -// throw std::runtime_error("Swizzle out of bounds."); -// swizzleDescription[a] = source.swizzle[swizzleIndex++]; -// } -// } for (uint32_t a = 0; a < 2; a++) { uint8_t mask = (destMask & (0b11u << (a * 2))) >> (a * 2); @@ -142,16 +133,16 @@ namespace gxp { srcBankLayout.number, // src1_bank_sel 0, // src2_bank_sel mov.destMask, // dest_mask - mov.destination.index / (destBankLayout.needsDiv() ? 2 : 1), // dest_n + destBankLayout.getIndex(mov.destination), // dest_n 0, // src0_n - mov.source.index / (srcBankLayout.needsDiv() ? 2 : 1), // src1_n + srcBankLayout.getIndex(mov.source), // src1_n 0 // src2_n )); } } void Block::createPack(usse::RegisterReference source, usse::RegisterReference destination) { - usse::BankLayout srcBankLayout = usse::BankLayout::destLayout(source.bank); + usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); if (destination.type.type == usse::Type::Signed32 || destination.type.type == usse::Type::Unsigned32 @@ -188,15 +179,54 @@ namespace gxp { destBankLayout.number, // dest_bank_sel srcBankLayout.number, // src1_bank_sel srcBankLayout.number, // src2_bank_sel - destination.index, // dest_n + destBankLayout.getIndex(destination), // dest_n static_cast(destination.swizzle[3]) & 0b11u, // comp_sel_3 - 1, // scale + false, // scale static_cast(destination.swizzle[1]) & 0b11u, // comp_sel_1 static_cast(destination.swizzle[2]) & 0b11u, // comp_sel_2 - source.index, // src1_n + srcBankLayout.getIndex(source.getHalf(0)), // src1_n static_cast(destination.swizzle[0]) & 0b10u >> 1u, // comp0_sel_bit1 - source.index + source.size / 2, // src2_n - static_cast(destination.swizzle[3]) & 0b01u // comp_sel_0_bit0 + srcBankLayout.getIndex(source.getHalf(1)), // src2_n + static_cast(destination.swizzle[0]) & 0b01u // comp_sel_0_bit0 + )); + } + + + void Block::createDot( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination) { + usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + assert(second.bank == usse::RegisterBank::Internal); + + instructions.push_back(usse::makeVDP( + 0, // pred + 0, // skipinv + 0, // clip_plane_enable + first.type.components == 4, // opcode2 + destBankLayout.extension, // dest_use_bank_ext + 0, // end + firstBankLayout.extension, // src1_bank_ext + 3, /* Seems to be the normal value... */ // increment_mode + 0, // gpi0_abs + 0, // repeat_count + 0, // nosched + destination.getSwizzleMask(), // write_mask + 0, // src1_neg + 0, // src1_abs + 0, // clip_plane_n + destBankLayout.number, // dest_bank + firstBankLayout.number, // src1_bank + second.index, // gpi0_n + destBankLayout.getIndex(destination), // dest_n + second.getSwizzleIndex(), // gpi0_swiz + static_cast(first.swizzle[3]), // src1_swiz_w + static_cast(first.swizzle[2]), // src1_swiz_z + static_cast(first.swizzle[1]), // src1_swiz_y + static_cast(first.swizzle[0]), // src1_swiz_x + firstBankLayout.getIndex(first) // src1_n )); } @@ -229,7 +259,7 @@ namespace gxp { usse::RegisterReference Builder::allocateRegister(usse::RegisterBank bank, usse::DataType type) { uint32_t index = 0; - uint32_t size = usse::getTypeSize(type.type) * type.components / 4; + uint32_t size = usse::getTypeSize(type.type) * type.components * type.arraySize / 4; switch (bank) { case usse::RegisterBank::Primary: @@ -244,21 +274,37 @@ namespace gxp { index = oRegPointer; oRegPointer += size; break; + case usse::RegisterBank::Temporary: + index = tRegPointer; + tRegPointer += size; + break; + case usse::RegisterBank::Internal: + index = iRegPointer; + iRegPointer += size; + break; default: throw std::runtime_error("Missing allocation method for bank."); } -// fmt::print("Allocating {} registers of type {} (vec{}[{}]), size {} at index {}.\n", -// usse::getBankName(bank), usse::getTypeName(type.type), type.components, type.arraySize, reg.size, reg.index); + fmt::print("Allocating {} registers of type {} (vec{}[{}]), size {} at index {}.\n", + usse::getBankName(bank), usse::getTypeName(type.type), type.components, type.arraySize, size, index); return usse::RegisterReference(type, bank, index, size); } + void Builder::freeRegister(usse::RegisterReference reg) { + if (reg.bank == usse::RegisterBank::Internal && reg.index + reg.size == iRegPointer) { + iRegPointer -= reg.size; + } + } + usse::RegisterReference Builder::registerParameter(const Parameter ¶meter) { size_t index = parameters.size(); parameters.push_back(parameter); usse::RegisterReference reg = allocateRegister(parameters[index].getBank(), parameter.type); + parameters[index].resourceIndex = reg.index; + parameters[index].containerIndex = 0; return reg; } @@ -396,6 +442,8 @@ namespace gxp { // Code header.primaryRegCount = paRegPointer; header.secondaryRegCount = saRegPointer; + header.tempRegCount1 = tRegPointer; + header.tempRegCount2 = tRegPointer; // Difference between both reg counts? { header.secondaryProgramOffset = data.size() - OFFSET_OF(header, secondaryProgramOffset); for (const Block &block : secondaryBlocks) { diff --git a/src/gxp/src/usse.cpp b/src/gxp/src/usse.cpp index 6c1b959..d5b1b71 100644 --- a/src/gxp/src/usse.cpp +++ b/src/gxp/src/usse.cpp @@ -74,7 +74,7 @@ namespace usse { }; const uint32_t swizzleStandardSize = 16; - std::array swizzleVector4[2][swizzleStandardSize] = { + SwizzleVec4 swizzleVector4[2][swizzleStandardSize] = { { { SwizzleChannel::X, SwizzleChannel::X, SwizzleChannel::X, SwizzleChannel::X }, { SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::Y }, @@ -113,7 +113,7 @@ namespace usse { }, }; - std::array swizzleVector3[2][swizzleStandardSize] = { + SwizzleVec3 swizzleVector3[2][swizzleStandardSize] = { { { SwizzleChannel::X, SwizzleChannel::X, SwizzleChannel::X }, { SwizzleChannel::Y, SwizzleChannel::Y, SwizzleChannel::Y }, @@ -151,8 +151,27 @@ namespace usse { }, }; - bool BankLayout::needsDiv() { - return bank != usse::RegisterBank::FloatConstant; + bool BankLayout::isHalf(Type type) { + return ( + type == Type::Float32 || + type == Type::Float16 || + type == Type::Fixed10) && + + bank != usse::RegisterBank::FloatConstant && + bank != usse::RegisterBank::Immediate; + } + + uint32_t BankLayout::getIndex(RegisterReference reference, uint32_t bits) { + uint32_t index = reference.index; + bool doubleReg = isHalf(reference.type.type); + + if (doubleReg) + index /= 2; + // Top Bit, is this wrong? Looks more complex in V3K source. + if (bank == RegisterBank::Internal) + index += 120 + (doubleReg ? 4 : 0); + + return index; } BankLayout BankLayout::destLayout(RegisterBank bank) { @@ -160,7 +179,8 @@ namespace usse { case RegisterBank::Primary: return { bank, 0, 2 }; case RegisterBank::Secondary: return { bank, 1, 0 }; case RegisterBank::Output: return { bank, 0, 1 }; - case RegisterBank::Temp: return { bank, 0, 0 }; + case RegisterBank::Internal: + case RegisterBank::Temporary: return {bank, 0, 0 }; case RegisterBank::Special: return { bank, 1, 1 }; case RegisterBank::Index: return { bank, 1, 2 }; case RegisterBank::Indexed1: return { bank, 0, 3 }; @@ -174,14 +194,14 @@ namespace usse { case RegisterBank::Primary: return { bank, 0, 1 }; case RegisterBank::Secondary: return { bank, 1, 1 }; case RegisterBank::Output: return { bank, 1, 0 }; - case RegisterBank::Temp: return { bank, 0, 0 }; + case RegisterBank::Temporary: return {bank, 0, 0 }; default: throw std::runtime_error("Unsupported src0 bank."); } } BankLayout BankLayout::srcLayout(RegisterBank bank) { switch (bank) { - case RegisterBank::Temp: return { bank, 0, 0 }; + case RegisterBank::Temporary: return {bank, 0, 0 }; case RegisterBank::Primary: return { bank, 0, 2 }; case RegisterBank::Output: return { bank, 0, 1 }; case RegisterBank::Secondary: return { bank, 0, 3 }; @@ -228,6 +248,32 @@ namespace usse { return mask; } + int32_t RegisterReference::getSwizzleIndex(bool extended) { + switch (type.components) { + case 4: { + usse::SwizzleVec4 vec; + std::copy(swizzle.begin(), swizzle.end(), vec.begin()); + return usse::getSwizzleVec4Index(vec, extended); + } + case 3: { + usse::SwizzleVec3 vec; + std::copy(swizzle.begin(), swizzle.end(), vec.begin()); + return usse::getSwizzleVec3Index(vec, extended); + } + case 1: + return usse::getSwizzleScalarIndex(swizzle[0]); + default: + throw std::runtime_error("Invalid component count for swizzle."); + } + } + + RegisterReference RegisterReference::getHalf(uint32_t half) { + assert(type.components % 2 == 0); + uint32_t width = type.components / 2; + + return getComponents(width * half, width); + } + usse::RegisterReference RegisterReference::getComponents(uint32_t component, uint32_t count) { // if (component + count > type.components) // throw std::runtime_error(fmt::format( @@ -258,6 +304,18 @@ namespace usse { return ref; } + RegisterReference RegisterReference::getElement(uint32_t element) { + if (element >= type.arraySize) + throw std::runtime_error("Register reference array out of bounds."); + usse::RegisterReference reg = *this; + + reg.type.arraySize = 1; + reg.size = size / type.arraySize; + reg.index += reg.size * element; + + return reg; + } + RegisterReference::RegisterReference(DataType type, RegisterBank bank, uint32_t index, uint32_t size) : type(type), bank(bank), index(index), size(size) { for (uint32_t a = 0; a < type.components; a++) { @@ -300,11 +358,11 @@ namespace usse { std::string getBankName(RegisterBank bank) { switch (bank) { - case RegisterBank::Temp: return "Temp"; + case RegisterBank::Temporary: return "Temp"; case RegisterBank::Primary: return "Primary"; case RegisterBank::Output: return "Output"; case RegisterBank::Secondary: return "Secondary"; - case RegisterBank::FloatInternal: return "Float Internal"; + case RegisterBank::Internal: return "Float Internal"; case RegisterBank::Special: return "Special"; case RegisterBank::Global: return "Global"; case RegisterBank::FloatConstant: return "Float Constant"; diff --git a/src/translator/include/translator/translator.h b/src/translator/include/translator/translator.h index c9bb1b8..4e85733 100644 --- a/src/translator/include/translator/translator.h +++ b/src/translator/include/translator/translator.h @@ -56,6 +56,7 @@ class CompilerGXP : public Compiler { void opLoad(const TranslatorArguments &arguments); void opStore(const TranslatorArguments &arguments); + void opMatrixTimesVector(const TranslatorArguments &arguments); void opConvertUToF(const TranslatorArguments &arguments); void opCompositeExtract(const TranslatorArguments &arguments); void opCompositeConstruct(const TranslatorArguments &arguments); diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp index 2b4dc3a..be97724 100644 --- a/src/translator/src/codes.cpp +++ b/src/translator/src/codes.cpp @@ -1,5 +1,7 @@ #include +#include + #include static std::string getString(const uint32_t *program, size_t &length) { @@ -20,47 +22,64 @@ void CompilerGXP::undefined(const TranslatorArguments &arguments) { } void CompilerGXP::opLoad(const TranslatorArguments &arguments) { - spv::Id type = arguments.instruction[0]; // TODO: Type is important. + spv::Id type = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id pointer = arguments.instruction[2]; - auto reg = idRegisters.find(pointer); - if (reg == idRegisters.end()) - throw std::runtime_error(fmt::format("Source ID {} does not have an associated register.", pointer)); - - idRegisters[result] = reg->second; + // This is a redirect, but it should really load into temp. + // Maybe let the user chose if there want to assume redirect or copy until we can introduce analysis. + idRegisters[result] = getOrThrow(idRegisters, pointer); } void CompilerGXP::opStore(const TranslatorArguments &arguments) { spv::Id destination = arguments.instruction[0]; spv::Id source = arguments.instruction[1]; - auto sourceRegister = idRegisters.find(source); - if (sourceRegister == idRegisters.end()) - throw std::runtime_error(fmt::format("Source ID {} was not loaded with a register reference.", source)); + usse::RegisterReference sourceRegister = getOrThrow(idRegisters, source); + usse::RegisterReference destinationRegister; + if (idVaryings.find(destination) != idVaryings.end()) + destinationRegister = getOrThrow(varyingReferences, getOrThrow(idVaryings, destination)); + else + destinationRegister = getOrThrow(idRegisters, destination); - usse::RegisterReference destinationRegister = varyingReferences[idVaryings[destination]]; + arguments.block.createMov(sourceRegister, destinationRegister); +} - auto *var = maybe_get(destination); - if (var) { - SPIRType type = get_type(var->basetype); +void CompilerGXP::opMatrixTimesVector(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id matrix = arguments.instruction[2]; + spv::Id vector = arguments.instruction[3]; - usse::DataType dataType = translateType(type); + usse::RegisterReference matrixRegister = getOrThrow(idRegisters, matrix); + usse::RegisterReference vectorRegister = getOrThrow(idRegisters, vector); - arguments.block.createMov(sourceRegister->second, destinationRegister); - } else { - arguments.block.createMov(sourceRegister->second, idRegisters[destination]); + assert(matrixRegister.type.type == vectorRegister.type.type); + assert(matrixRegister.type.arraySize == vectorRegister.type.components); + + usse::RegisterReference internal = arguments.block.parent.allocateRegister( + usse::RegisterBank::Internal, vectorRegister.type); + + usse::RegisterReference temp = arguments.block.parent.allocateRegister( + usse::RegisterBank::Temporary, vectorRegister.type); + + for (uint32_t a = 0; a < vectorRegister.type.components; a++) { + arguments.block.createPack(matrixRegister.getElement(a), internal); + arguments.block.createDot(vectorRegister, internal, temp.getComponents(a, 1)); } + + arguments.block.parent.freeRegister(internal); + idRegisters[result] = temp; } void CompilerGXP::opConvertUToF(const TranslatorArguments &arguments) { - spv::Id type = arguments.instruction[0]; // TODO: Type is important. + spv::Id type = arguments.instruction[0]; spv::Id destination = arguments.instruction[1]; spv::Id source = arguments.instruction[2]; - usse::RegisterReference srcReg = idRegisters[source]; + usse::RegisterReference srcReg = getOrThrow(idRegisters, source); usse::RegisterReference destReg = arguments.block.parent.allocateRegister( - usse::RegisterBank::Primary, { usse::Type::Float32, 4, 1 }); + usse::RegisterBank::Temporary, { usse::Type::Float32, 4, 1 }); arguments.block.createPack(srcReg, destReg); @@ -86,7 +105,7 @@ void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { SPIRType type = get_type(typeId); - usse::RegisterReference output = arguments.block.parent.allocateRegister(usse::RegisterBank::Primary, + usse::RegisterReference output = arguments.block.parent.allocateRegister(usse::RegisterBank::Temporary, { translateType(type.basetype), type.vecsize, 1 }); for (size_t a = 0; a < type.vecsize; a++) { @@ -130,28 +149,30 @@ void CompilerGXP::opAccessChain(const TranslatorArguments &arguments) { // This right now is only supported to accessing gl_PerVertex structs. SPIRConstant constant = get(index); - uint32_t structIndex = constant.m.c[0].r[0].u32; + uint32_t value = constant.m.c[0].r[0].u32; SPIRVariable baseVariable = get(base); SPIRType type = get_type(baseVariable.basetype); - if (type.basetype != SPIRType::Struct) - throw std::runtime_error("Access chain can only be created on gl_PerVertex struct."); - SPIRType memberType = get_type(type.member_types[structIndex]); - spv::BuiltIn builtIn; + if (type.basetype == SPIRType::Struct) { + SPIRType memberType = get_type(type.member_types[value]); + spv::BuiltIn builtIn; - if (is_member_builtin(type, structIndex, &builtIn)) { - gxp::ProgramVarying varying = translateVarying(builtIn); - auto varyingReference = varyingReferences.find(varying); - - if (varyingReference != varyingReferences.end()) { - idRegisters[result] = varyingReference->second; + if (is_member_builtin(type, value, &builtIn)) { + idRegisters[result] = getOrThrow(varyingReferences, translateVarying(builtIn)); } else { - throw std::runtime_error( - fmt::format("No varying registered with varying {}.", static_cast(varying))); + if (value != 0) + throw std::runtime_error("Must link to a single element."); + idRegisters[result] = getOrThrow(idRegisters, base); } } else { - throw std::runtime_error("Access chain does not link to a varying."); + if (type.columns > 1) { + idRegisters[result] = getOrThrow(idRegisters, base).getElement(value); + } else if (type.vecsize > 1) { + idRegisters[result] = getOrThrow(idRegisters, base).getComponents(value, 1); + } else { + throw std::runtime_error("Access Chain to a non composite type."); + } } } @@ -314,7 +335,7 @@ void CompilerGXP::createTranslators() { { spv::Op::OpVectorTimesScalar, "OpVectorTimesScalar", &CompilerGXP::unimplemented }, { spv::Op::OpMatrixTimesScalar, "OpMatrixTimesScalar", &CompilerGXP::unimplemented }, { spv::Op::OpVectorTimesMatrix, "OpVectorTimesMatrix", &CompilerGXP::unimplemented }, - { spv::Op::OpMatrixTimesVector, "OpMatrixTimesVector", &CompilerGXP::unimplemented }, + { spv::Op::OpMatrixTimesVector, "OpMatrixTimesVector", &CompilerGXP::opMatrixTimesVector }, { spv::Op::OpMatrixTimesMatrix, "OpMatrixTimesMatrix", &CompilerGXP::unimplemented }, { spv::Op::OpOuterProduct, "OpOuterProduct", &CompilerGXP::unimplemented }, { spv::Op::OpDot, "OpDot", &CompilerGXP::unimplemented }, diff --git a/src/translator/src/translator.cpp b/src/translator/src/translator.cpp index 49c4394..964ac2c 100644 --- a/src/translator/src/translator.cpp +++ b/src/translator/src/translator.cpp @@ -60,7 +60,7 @@ usse::DataType CompilerGXP::translateType(const SPIRType &type) { dataType.type = translateType(type.basetype); dataType.components = type.vecsize; - dataType.arraySize = type.array.size(); + dataType.arraySize = type.columns; return dataType; } @@ -124,16 +124,21 @@ void CompilerGXP::createShaderResources() { gxp::Parameter parameter; parameter.name = input.name; parameter.category = gxp::ParameterCategory::Attribute; - parameter.type.type = translateType(type.basetype); - parameter.type.arraySize = 1; - parameter.type.components = type.vecsize; + parameter.type = translateType(type); idRegisters[input.id] = builder.registerParameter(parameter); } for (const auto &uniform : resources.uniform_buffers) { const SPIRType &type = get_type(uniform.type_id); - // Fill In + if (type.member_types.size() != 1) + throw std::runtime_error("Uniform blocks are not supported."); + + gxp::Parameter parameter; + parameter.name = uniform.name; + parameter.category = gxp::ParameterCategory::Uniform; + parameter.type = translateType(get_type(type.member_types[0])); + idRegisters[uniform.id] = builder.registerParameter(parameter); } std::vector varyings; @@ -142,7 +147,7 @@ void CompilerGXP::createShaderResources() { std::vector availableVaryings = allVaryings; std::vector availableTexCoords = allTexCoords; - const auto &allocate_varying = [&availableVaryings, &availableTexCoords](uint32_t size) { + const auto &allocateVarying = [&availableVaryings, &availableTexCoords](uint32_t size) { gxp::ProgramVarying selected = gxp::ProgramVarying::None; if (size == 4 && !availableVaryings.empty()) { @@ -160,9 +165,9 @@ void CompilerGXP::createShaderResources() { const SPIRVariable &variable = get(output.id); const SPIRType &type = get_type(output.type_id); - gxp::ProgramVarying varying = allocate_varying(type.vecsize); + gxp::ProgramVarying varying = allocateVarying(type.vecsize); if (varying == gxp::ProgramVarying::None) - throw std::runtime_error("No availible space for varying."); + throw std::runtime_error("No available space for varying."); if (gxp::isTexCoordVarying(varying)) { gxp::ProgramTexCoordInfo info = { }; diff --git a/src/util/include/util/util.h b/src/util/include/util/util.h index e1ad06f..5f22ae4 100644 --- a/src/util/include/util/util.h +++ b/src/util/include/util/util.h @@ -14,3 +14,12 @@ std::vector loadFileData(const std::string &path) { stream.close(); return data; } + +template +typename T::mapped_type &getOrThrow(T map, typename T::key_type key) { + auto reference = map.find(key); + if (reference == map.end()) + throw std::runtime_error("Missing key in map."); + + return reference->second; +} From c26d9cc905538fe0aa87b2eb75d8e53d7202421f Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Tue, 10 Sep 2019 08:22:15 -0400 Subject: [PATCH 03/19] Fix Vec3 and Outputs --- src/gxp/include/gxp/gxp.h | 1 + src/gxp/include/gxp/usse.h | 2 +- src/gxp/src/builder.cpp | 6 ------ src/gxp/src/gxp.cpp | 31 +++++++++++++++++++++++++++++++ src/gxp/src/usse.cpp | 12 +++++++++--- src/translator/src/codes.cpp | 5 +++-- src/translator/src/translator.cpp | 8 ++++++++ 7 files changed, 53 insertions(+), 12 deletions(-) diff --git a/src/gxp/include/gxp/gxp.h b/src/gxp/include/gxp/gxp.h index d30f492..0c08a15 100644 --- a/src/gxp/include/gxp/gxp.h +++ b/src/gxp/include/gxp/gxp.h @@ -105,6 +105,7 @@ namespace gxp { Clip7, }; + std::string getVaryingName(ProgramVarying varying); bool iClipVarying(ProgramVarying varying); bool isTexCoordVarying(ProgramVarying varying); uint32_t getVaryingBits(ProgramVarying varying); diff --git a/src/gxp/include/gxp/usse.h b/src/gxp/include/gxp/usse.h index 11db340..d401a9a 100644 --- a/src/gxp/include/gxp/usse.h +++ b/src/gxp/include/gxp/usse.h @@ -78,7 +78,7 @@ namespace usse { RegisterReference getElement(uint32_t element); RegisterReference() = default; - RegisterReference(DataType type, RegisterBank bank, uint32_t index, uint32_t size); + RegisterReference(DataType type, RegisterBank bank, uint32_t regIndex, uint32_t size); }; class BankLayout { diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp index 5e70c1c..1ef0873 100644 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -347,8 +347,6 @@ namespace gxp { if (varying == ProgramVarying::Position || varyings.vertex_outputs1 & getVaryingBits(varying)) { references[varying] = allocateRegister(usse::RegisterBank::Output, { usse::Type::Float32, 4, 1 }); - - oRegPointer += 4; } } @@ -362,8 +360,6 @@ namespace gxp { if (texCoordInfo != texCoords.end()) { references[varying] = allocateRegister(usse::RegisterBank::Output, { usse::Type::Float32, texCoordInfo->componentCount, 1 }); - - oRegPointer += texCoordInfo->componentCount; } } @@ -376,8 +372,6 @@ namespace gxp { references[varying] = allocateRegister(usse::RegisterBank::Output, { usse::Type::Float32, varyingSize, 1 }); - - oRegPointer += varyingSize; } } diff --git a/src/gxp/src/gxp.cpp b/src/gxp/src/gxp.cpp index d4c035e..4cd051e 100644 --- a/src/gxp/src/gxp.cpp +++ b/src/gxp/src/gxp.cpp @@ -38,6 +38,37 @@ namespace gxp { } } + std::string getVaryingName(ProgramVarying varying) { + switch (varying) { + case ProgramVarying::Position: return "Position"; + case ProgramVarying::Fog: return "Fog"; + case ProgramVarying::Color0: return "Color0"; + case ProgramVarying::Color1: return "Color1"; + case ProgramVarying::TexCoord0: return "TexCoord0"; + case ProgramVarying::TexCoord1: return "TexCoord1"; + case ProgramVarying::TexCoord2: return "TexCoord2"; + case ProgramVarying::TexCoord3: return "TexCoord3"; + case ProgramVarying::TexCoord4: return "TexCoord4"; + case ProgramVarying::TexCoord5: return "TexCoord5"; + case ProgramVarying::TexCoord6: return "TexCoord6"; + case ProgramVarying::TexCoord7: return "TexCoord7"; + case ProgramVarying::TexCoord8: return "TexCoord8"; + case ProgramVarying::TexCoord9: return "TexCoord9"; + case ProgramVarying::PointSize: return "PointSize"; + case ProgramVarying::Clip0: return "Clip0"; + case ProgramVarying::Clip1: return "Clip1"; + case ProgramVarying::Clip2: return "Clip2"; + case ProgramVarying::Clip3: return "Clip3"; + case ProgramVarying::Clip4: return "Clip4"; + case ProgramVarying::Clip5: return "Clip5"; + case ProgramVarying::Clip6: return "Clip6"; + case ProgramVarying::Clip7: return "Clip7"; + case ProgramVarying::None: + default: + return "Invalid"; + } + } + bool iClipVarying(ProgramVarying varying) { auto varyingNum = static_cast(varying); return varyingNum >= static_cast(ProgramVarying::Clip0) diff --git a/src/gxp/src/usse.cpp b/src/gxp/src/usse.cpp index d5b1b71..aad3f51 100644 --- a/src/gxp/src/usse.cpp +++ b/src/gxp/src/usse.cpp @@ -316,11 +316,17 @@ namespace usse { return reg; } - RegisterReference::RegisterReference(DataType type, RegisterBank bank, uint32_t index, uint32_t size) - : type(type), bank(bank), index(index), size(size) { + RegisterReference::RegisterReference(DataType type, RegisterBank bank, uint32_t regIndex, uint32_t size) + : type(type), bank(bank), size(size) { + bool swizzleUp = false; + if (regIndex % 2 == 1) { + regIndex--; + swizzleUp = true; + } for (uint32_t a = 0; a < type.components; a++) { - swizzle.push_back(static_cast(a)); + swizzle.push_back(static_cast(a + swizzleUp)); } + index = regIndex; } std::string getTypeName(Type type) { diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp index be97724..b76743f 100644 --- a/src/translator/src/codes.cpp +++ b/src/translator/src/codes.cpp @@ -63,9 +63,10 @@ void CompilerGXP::opMatrixTimesVector(const TranslatorArguments &arguments) { usse::RegisterReference temp = arguments.block.parent.allocateRegister( usse::RegisterBank::Temporary, vectorRegister.type); + arguments.block.createPack(vectorRegister, internal); + for (uint32_t a = 0; a < vectorRegister.type.components; a++) { - arguments.block.createPack(matrixRegister.getElement(a), internal); - arguments.block.createDot(vectorRegister, internal, temp.getComponents(a, 1)); + arguments.block.createDot(matrixRegister.getElement(a), internal, temp.getComponents(a, 1)); } arguments.block.parent.freeRegister(internal); diff --git a/src/translator/src/translator.cpp b/src/translator/src/translator.cpp index 964ac2c..0bee121 100644 --- a/src/translator/src/translator.cpp +++ b/src/translator/src/translator.cpp @@ -4,10 +4,16 @@ #include +#define VERTEX_POSITION_OUTPUT_ONLY + // Position, PointSize, Clip0 and Clip1 are removed so they are not allocated over. const std::vector allVaryings = { gxp::ProgramVarying::Color0, gxp::ProgramVarying::Color1, +#ifdef VERTEX_POSITION_OUTPUT_ONLY + gxp::ProgramVarying::Clip0, + gxp::ProgramVarying::Clip1, +#endif gxp::ProgramVarying::Clip2, gxp::ProgramVarying::Clip3, gxp::ProgramVarying::Clip4, @@ -80,12 +86,14 @@ gxp::ProgramVarying CompilerGXP::translateVarying(spv::BuiltIn builtIn) { switch (builtIn) { case spv::BuiltInPosition: return gxp::ProgramVarying::Position; +#ifndef VERTEX_POSITION_OUTPUT_ONLY case spv::BuiltInPointSize: return gxp::ProgramVarying::PointSize; case spv::BuiltInClipDistance: return gxp::ProgramVarying::Clip0; case spv::BuiltInCullDistance: return gxp::ProgramVarying::Clip1; +#endif default: return gxp::ProgramVarying::None; } From eab4d67143cb86e0239995ca88160745f5c20b7d Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Wed, 11 Sep 2019 11:44:39 -0400 Subject: [PATCH 04/19] Fragment shader support --- src/gxp/include/gxp/builder.h | 11 +- src/gxp/include/gxp/gxp.h | 48 +++++--- src/gxp/include/gxp/usse.h | 1 - src/gxp/src/builder.cpp | 80 +++++++++---- src/gxp/src/gxp.cpp | 86 +++++++------- .../include/translator/translator.h | 8 +- src/translator/src/codes.cpp | 2 - src/translator/src/translator.cpp | 112 ++++++++++++------ 8 files changed, 231 insertions(+), 117 deletions(-) diff --git a/src/gxp/include/gxp/builder.h b/src/gxp/include/gxp/builder.h index 68876e8..9d44710 100644 --- a/src/gxp/include/gxp/builder.h +++ b/src/gxp/include/gxp/builder.h @@ -64,8 +64,10 @@ namespace gxp { std::vector primaryBlocks; std::vector secondaryBlocks; std::vector parameters; + std::vector fragmentInputs; public: void setType(ShaderType type); + ShaderType getType(); Block *createPrimaryBlock(); Block *createSecondaryBlock(); @@ -74,8 +76,13 @@ namespace gxp { void freeRegister(usse::RegisterReference reg); usse::RegisterReference registerParameter(const Parameter ¶meter); - std::unordered_map registerVaryings( - const std::vector &outputs, const std::vector &texCoords); + + std::unordered_map registerVertexVaryings( + const std::vector &outputs, const std::vector &texCoords); + std::unordered_map registerFragmentVaryings( + const std::vector &inputs /*, samplers...*/); + + usse::RegisterReference createFragmentOutput(usse::Type type, uint32_t components); std::vector build(); diff --git a/src/gxp/include/gxp/gxp.h b/src/gxp/include/gxp/gxp.h index 0c08a15..b590cc8 100644 --- a/src/gxp/include/gxp/gxp.h +++ b/src/gxp/include/gxp/gxp.h @@ -65,7 +65,7 @@ namespace gxp { Color0 = 0x0800, }; - enum class ProgramTexCoordMasks : uint32_t { + enum class ProgramTexCoordVertexMasks : uint32_t { TexCoord0 = 0b111u << (3u * 0u), TexCoord1 = 0b111u << (3u * 1u), TexCoord2 = 0b111u << (3u * 2u), @@ -78,6 +78,23 @@ namespace gxp { TexCoord9 = 0b111u << (3u * 9u), }; + enum class ProgramVaryingFragmentBits : uint32_t { + Position = 0xD000, + Fog = 0xC000, + Color0 = 0xA000, + Color1 = 0xB000, + TexCoord0 = 0x0000, + TexCoord1 = 0x1000, + TexCoord2 = 0x2000, + TexCoord3 = 0x3000, + TexCoord4 = 0x4000, + TexCoord5 = 0x5000, + TexCoord6 = 0x6000, + TexCoord7 = 0x7000, + TexCoord8 = 0x8000, + TexCoord9 = 0x9000, + }; + enum class ProgramVarying { None, Position, @@ -108,32 +125,26 @@ namespace gxp { std::string getVaryingName(ProgramVarying varying); bool iClipVarying(ProgramVarying varying); bool isTexCoordVarying(ProgramVarying varying); - uint32_t getVaryingBits(ProgramVarying varying); + uint32_t getVertexVaryingBits(ProgramVarying varying); + uint32_t getFragmentVaryingBits(ProgramVarying varying); - class ProgramTexCoordInfo { + class ProgramVectorInfo { public: ProgramVarying varying; - uint32_t componentCount; + uint32_t components; }; - class ParameterConfig { - uint16_t config = 0; - public: - void setCategory(ParameterCategory category); - void setType(ParameterType type); - void setComponentCount(uint32_t componentCount); - void setContainerIndex(uint32_t containerIndex); - - ParameterCategory getCategory(); - ParameterType getType(); - uint32_t getComponentCount(); - uint32_t getContainerIndex(); + struct ProgramFragmentInputInfo { + std::uint32_t attribute_info = 0; + std::uint32_t resource_index = 0; + std::uint32_t size = 0; + std::uint32_t component_info = 0; }; class ProgramParameterInfo { public: int32_t nameOffset = 0; - ParameterConfig config; + uint16_t config = 0; uint16_t semantic = 0; uint32_t arraySize = 0; uint32_t resourceIndex = 0; @@ -217,4 +228,7 @@ namespace gxp { uint32_t containerCount = 0; uint32_t containerOffset = 0; }; + + uint16_t createParameterConfig(ParameterCategory category, ParameterType type, + uint32_t components, uint32_t containerIndex); } diff --git a/src/gxp/include/gxp/usse.h b/src/gxp/include/gxp/usse.h index d401a9a..2587f53 100644 --- a/src/gxp/include/gxp/usse.h +++ b/src/gxp/include/gxp/usse.h @@ -6,7 +6,6 @@ #include namespace usse { - enum class RegisterBank { Temporary, Primary, diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp index 1ef0873..612dd2c 100644 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -101,6 +101,10 @@ namespace gxp { header.type = static_cast(type); } + ShaderType Builder::getType() { + return static_cast(header.type); + } + void Block::createNop() { instructions.push_back(usse::makeNOP()); } @@ -309,9 +313,8 @@ namespace gxp { return reg; } - std::unordered_map Builder::registerVaryings( - const std::vector &outputs, const std::vector &texCoords) { - // TODO: This is only for vertex. Either make this method work for fragment or make another method for fragment. + std::unordered_map Builder::registerVertexVaryings( + const std::vector &outputs, const std::vector &texCoords) { varyings.varyings_count = outputs.size() + texCoords.size(); varyings.vertex_outputs1 = 0; @@ -322,10 +325,10 @@ namespace gxp { if (isTexCoordVarying(output)) throw std::runtime_error("TexCoord passed as regular output to createVaryings."); - varyings.vertex_outputs1 |= getVaryingBits(output); + varyings.vertex_outputs1 |= getVertexVaryingBits(output); } - for (ProgramTexCoordInfo texCoord : texCoords) { + for (ProgramVectorInfo texCoord : texCoords) { if (!isTexCoordVarying(texCoord.varying)) throw std::runtime_error("Non-TexCoord passed as TexCoord to createVaryings."); @@ -333,7 +336,7 @@ namespace gxp { - static_cast(ProgramVarying::TexCoord0); uint32_t texCoordBits = 0; - texCoordBits |= (texCoord.componentCount - 1) & 0b11u; + texCoordBits |= (texCoord.components - 1) & 0b11u; varyings.vertex_outputs2 |= texCoordBits << (texCoordIndex * 3u); } @@ -344,7 +347,7 @@ namespace gxp { a < static_cast(ProgramVarying::TexCoord0); a++) { auto varying = static_cast(a); - if (varying == ProgramVarying::Position || varyings.vertex_outputs1 & getVaryingBits(varying)) { + if (varying == ProgramVarying::Position || varyings.vertex_outputs1 & getVertexVaryingBits(varying)) { references[varying] = allocateRegister(usse::RegisterBank::Output, { usse::Type::Float32, 4, 1 }); } @@ -355,11 +358,11 @@ namespace gxp { auto varying = static_cast(a); auto texCoordInfo = std::find_if(texCoords.begin(), texCoords.end(), - [varying](const ProgramTexCoordInfo &info) { return info.varying == varying; }); + [varying](const ProgramVectorInfo &info) { return info.varying == varying; }); if (texCoordInfo != texCoords.end()) { references[varying] = allocateRegister(usse::RegisterBank::Output, - { usse::Type::Float32, texCoordInfo->componentCount, 1 }); + {usse::Type::Float32, texCoordInfo->components, 1 }); } } @@ -367,7 +370,7 @@ namespace gxp { a <= static_cast(ProgramVarying::Clip7); a++) { auto varying = static_cast(a); - if (varyings.vertex_outputs1 & getVaryingBits(varying)) { + if (varyings.vertex_outputs1 & getVertexVaryingBits(varying)) { uint32_t varyingSize = varying == ProgramVarying::PointSize ? 1 : 4; references[varying] = allocateRegister(usse::RegisterBank::Output, @@ -378,8 +381,43 @@ namespace gxp { return references; } + std::unordered_map Builder::registerFragmentVaryings( + const std::vector &inputs) { + std::unordered_map references; + + for (ProgramVectorInfo varying : inputs) { + usse::DataType type = {usse::Type::Float32, varying.components, 1 }; + usse::RegisterReference reference = allocateRegister(usse::RegisterBank::Primary, type); + + // What the heck is going on with fragment inputs!?!? + ProgramFragmentInputInfo input; + input.size = (reference.size - 1) << 4u; + input.component_info = 0b11u << 4u; // 0b11 = Float, 0b10 = Half? + input.resource_index = reference.index; + + input.attribute_info |= getFragmentVaryingBits(varying.varying); // Id + input.attribute_info |= 0x10A000u; // 0x20000000 = Half, 0x10000000 = Fixed, 0x10A000 = Float... + input.attribute_info |= (reference.type.components - 1) << 22u; // Component Count + + // Samplers are not yet supported. + + references[varying.varying] = reference; + } + + return references; + } + + usse::RegisterReference Builder::createFragmentOutput(usse::Type type, uint32_t components) { + varyings.output_comp_count = components; + varyings.output_param_type = static_cast(getParameterTypeFromUSSEType(type)); + + return usse::RegisterReference({ type, components, 1 }, + usse::RegisterBank::Primary, 0, usse::getTypeSize(type) * components / 4); + } + std::vector Builder::build() { - std::vector data(sizeof(ProgramHeader)); + std::vector data(sizeof(ProgramHeader) + sizeof(ProgramVaryings)); + header.varyingsOffset = sizeof(ProgramHeader) - OFFSET_OF(header, varyingsOffset); // Strings class StringEntry { @@ -407,10 +445,8 @@ namespace gxp { parameter.resourceIndex = param.resourceIndex; parameter.arraySize = param.type.arraySize; parameter.semantic = static_cast(param.semantic); - parameter.config.setType(getParameterTypeFromUSSEType(param.type.type)); - parameter.config.setCategory(param.category); - parameter.config.setComponentCount(param.type.components); - parameter.config.setContainerIndex(param.containerIndex); + parameter.config = createParameterConfig(param.category, getParameterTypeFromUSSEType(param.type.type), + param.type.components, param.containerIndex); auto stringEntry = std::find_if(stringDB.begin(), stringDB.end(), [param](const StringEntry &entry) { return entry.text == param.name; @@ -426,11 +462,14 @@ namespace gxp { } // Varyings - header.varyingsOffset = data.size() - OFFSET_OF(header, varyingsOffset); - { - std::vector varyingsData(sizeof(varyings)); - std::memcpy(varyingsData.data(), &varyings, sizeof(varyings)); - data.insert(data.end(), varyingsData.begin(), varyingsData.end()); + if (getType() == ShaderType::Fragment && !fragmentInputs.empty()) { + varyings.varyings_count = fragmentInputs.size(); + varyings.vertex_outputs1 = data.size() - + (sizeof(ProgramHeader) + sizeof(ProgramVaryings) - OFFSET_OF(varyings, vertex_outputs1)); + data.insert(data.end(), + reinterpret_cast(fragmentInputs.data()), + reinterpret_cast(fragmentInputs.data()) + + fragmentInputs.size() * sizeof(ProgramFragmentInputInfo)); } // Code @@ -508,6 +547,7 @@ namespace gxp { } std::memcpy(data.data(), &header, sizeof(ProgramHeader)); + std::memcpy(data.data() + sizeof(ProgramHeader), &varyings, sizeof(ProgramVaryings)); return data; } diff --git a/src/gxp/src/gxp.cpp b/src/gxp/src/gxp.cpp index 4cd051e..9a2df74 100644 --- a/src/gxp/src/gxp.cpp +++ b/src/gxp/src/gxp.cpp @@ -80,7 +80,7 @@ namespace gxp { && varyingNum <= static_cast(ProgramVarying::TexCoord9); } - uint32_t getVaryingBits(ProgramVarying varying) { + uint32_t getVertexVaryingBits(ProgramVarying varying) { switch (varying) { case ProgramVarying::Fog: return static_cast(ProgramVaryingVertexBits::Fog); case ProgramVarying::Color0: return static_cast(ProgramVaryingVertexBits::Color0); @@ -95,54 +95,58 @@ namespace gxp { case ProgramVarying::Clip6: return static_cast(ProgramVaryingVertexBits::Clip6); case ProgramVarying::Clip7: return static_cast(ProgramVaryingVertexBits::Clip7); - case ProgramVarying::TexCoord0: return static_cast(ProgramTexCoordMasks::TexCoord0); - case ProgramVarying::TexCoord1: return static_cast(ProgramTexCoordMasks::TexCoord1); - case ProgramVarying::TexCoord2: return static_cast(ProgramTexCoordMasks::TexCoord2); - case ProgramVarying::TexCoord3: return static_cast(ProgramTexCoordMasks::TexCoord3); - case ProgramVarying::TexCoord4: return static_cast(ProgramTexCoordMasks::TexCoord4); - case ProgramVarying::TexCoord5: return static_cast(ProgramTexCoordMasks::TexCoord5); - case ProgramVarying::TexCoord6: return static_cast(ProgramTexCoordMasks::TexCoord6); - case ProgramVarying::TexCoord7: return static_cast(ProgramTexCoordMasks::TexCoord7); - case ProgramVarying::TexCoord8: return static_cast(ProgramTexCoordMasks::TexCoord8); - case ProgramVarying::TexCoord9: return static_cast(ProgramTexCoordMasks::TexCoord9); + case ProgramVarying::TexCoord0: return static_cast(ProgramTexCoordVertexMasks::TexCoord0); + case ProgramVarying::TexCoord1: return static_cast(ProgramTexCoordVertexMasks::TexCoord1); + case ProgramVarying::TexCoord2: return static_cast(ProgramTexCoordVertexMasks::TexCoord2); + case ProgramVarying::TexCoord3: return static_cast(ProgramTexCoordVertexMasks::TexCoord3); + case ProgramVarying::TexCoord4: return static_cast(ProgramTexCoordVertexMasks::TexCoord4); + case ProgramVarying::TexCoord5: return static_cast(ProgramTexCoordVertexMasks::TexCoord5); + case ProgramVarying::TexCoord6: return static_cast(ProgramTexCoordVertexMasks::TexCoord6); + case ProgramVarying::TexCoord7: return static_cast(ProgramTexCoordVertexMasks::TexCoord7); + case ProgramVarying::TexCoord8: return static_cast(ProgramTexCoordVertexMasks::TexCoord8); + case ProgramVarying::TexCoord9: return static_cast(ProgramTexCoordVertexMasks::TexCoord9); default: return 0; } } - - void ParameterConfig::setCategory(ParameterCategory category) { - config &= ~0b1111u; - config |= static_cast(category) & 0b1111u; - } - void ParameterConfig::setType(ParameterType type) { - config &= ~(0b1111u << 4u); - config |= (static_cast(type) & 0b1111u) << 4u; - } + uint32_t getFragmentVaryingBits(ProgramVarying varying) { + switch (varying) { + case ProgramVarying::Position: return static_cast(ProgramVaryingFragmentBits::Position); + case ProgramVarying::Fog: return static_cast(ProgramVaryingFragmentBits::Fog); + case ProgramVarying::Color0: return static_cast(ProgramVaryingFragmentBits::Color0); + case ProgramVarying::Color1: return static_cast(ProgramVaryingFragmentBits::Color1); + case ProgramVarying::TexCoord0: return static_cast(ProgramVaryingFragmentBits::TexCoord0); + case ProgramVarying::TexCoord1: return static_cast(ProgramVaryingFragmentBits::TexCoord1); + case ProgramVarying::TexCoord2: return static_cast(ProgramVaryingFragmentBits::TexCoord2); + case ProgramVarying::TexCoord3: return static_cast(ProgramVaryingFragmentBits::TexCoord3); + case ProgramVarying::TexCoord4: return static_cast(ProgramVaryingFragmentBits::TexCoord4); + case ProgramVarying::TexCoord5: return static_cast(ProgramVaryingFragmentBits::TexCoord5); + case ProgramVarying::TexCoord6: return static_cast(ProgramVaryingFragmentBits::TexCoord6); + case ProgramVarying::TexCoord7: return static_cast(ProgramVaryingFragmentBits::TexCoord7); + case ProgramVarying::TexCoord8: return static_cast(ProgramVaryingFragmentBits::TexCoord8); + case ProgramVarying::TexCoord9: return static_cast(ProgramVaryingFragmentBits::TexCoord9); +// case ProgramVarying::PointSize: return static_cast(ProgramVaryingFragmentBits::PointSize); +// case ProgramVarying::Clip0: return static_cast(ProgramVaryingFragmentBits::Clip0); +// case ProgramVarying::Clip1: return static_cast(ProgramVaryingFragmentBits::Clip1); +// case ProgramVarying::Clip2: return static_cast(ProgramVaryingFragmentBits::Clip2); +// case ProgramVarying::Clip3: return static_cast(ProgramVaryingFragmentBits::Clip3); +// case ProgramVarying::Clip4: return static_cast(ProgramVaryingFragmentBits::Clip4); +// case ProgramVarying::Clip5: return static_cast(ProgramVaryingFragmentBits::Clip5); +// case ProgramVarying::Clip6: return static_cast(ProgramVaryingFragmentBits::Clip6); +// case ProgramVarying::Clip7: return static_cast(ProgramVaryingFragmentBits::Clip7); - void ParameterConfig::setComponentCount(uint32_t componentCount) { - config &= ~(0b1111u << 8u); - config |= (componentCount & 0b1111u) << 8u; + default: return 0; + } } - void ParameterConfig::setContainerIndex(uint32_t containerIndex) { - config &= ~(0b1111u << 12u); + uint16_t createParameterConfig(ParameterCategory category, ParameterType type, + uint32_t components, uint32_t containerIndex) { + uint16_t config = 0; config |= (containerIndex & 0b1111u) << 12u; - } - - ParameterCategory ParameterConfig::getCategory() { - return static_cast(config & 0b1111u); - } - - ParameterType ParameterConfig::getType() { - return static_cast((config & (0b1111u << 4u)) >> 4u); - } - - uint32_t ParameterConfig::getComponentCount() { - return (config & (0b1111u << 8u)) >> 8u; - } - - uint32_t ParameterConfig::getContainerIndex() { - return (config & (0b1111u << 12u)) >> 12u; + config |= (components & 0b1111u) << 8u; + config |= (static_cast(type) & 0b1111u) << 4u; + config |= static_cast(category) & 0b1111u; + return config; } } diff --git a/src/translator/include/translator/translator.h b/src/translator/include/translator/translator.h index 4e85733..37cf86e 100644 --- a/src/translator/include/translator/translator.h +++ b/src/translator/include/translator/translator.h @@ -45,9 +45,15 @@ class CompilerGXP : public Compiler { static gxp::ParameterSemantic translateDecorations(spv::BuiltIn builtIn); static gxp::ProgramVarying translateVarying(spv::BuiltIn builtIn); + static gxp::ProgramVarying allocateVarying( + std::vector &availableVaryings, + std::vector &availableTexCoords, + uint32_t components); + void createBlock(const SPIRBlock &block); void createFunction(const SPIRFunction &function); - void createShaderResources(); + void createVertexShaderResources(); + void createFragmentShaderResources(); void createTranslators(); // SPRIV Translation OPs diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp index b76743f..f08f6ff 100644 --- a/src/translator/src/codes.cpp +++ b/src/translator/src/codes.cpp @@ -147,8 +147,6 @@ void CompilerGXP::opAccessChain(const TranslatorArguments &arguments) { spv::Id base = arguments.instruction[2]; spv::Id index = arguments.instruction[3]; // Multiple indices, apparently. - // This right now is only supported to accessing gl_PerVertex structs. - SPIRConstant constant = get(index); uint32_t value = constant.m.c[0].r[0].u32; diff --git a/src/translator/src/translator.cpp b/src/translator/src/translator.cpp index 0bee121..c398771 100644 --- a/src/translator/src/translator.cpp +++ b/src/translator/src/translator.cpp @@ -10,16 +10,15 @@ const std::vector allVaryings = { gxp::ProgramVarying::Color0, gxp::ProgramVarying::Color1, -#ifdef VERTEX_POSITION_OUTPUT_ONLY - gxp::ProgramVarying::Clip0, - gxp::ProgramVarying::Clip1, -#endif - gxp::ProgramVarying::Clip2, - gxp::ProgramVarying::Clip3, - gxp::ProgramVarying::Clip4, - gxp::ProgramVarying::Clip5, - gxp::ProgramVarying::Clip6, - gxp::ProgramVarying::Clip7, +// Afaik Clips don't work as a varying to fragment shader. +// gxp::ProgramVarying::Clip0, +// gxp::ProgramVarying::Clip1, +// gxp::ProgramVarying::Clip2, +// gxp::ProgramVarying::Clip3, +// gxp::ProgramVarying::Clip4, +// gxp::ProgramVarying::Clip5, +// gxp::ProgramVarying::Clip6, +// gxp::ProgramVarying::Clip7, gxp::ProgramVarying::Fog, }; @@ -99,6 +98,23 @@ gxp::ProgramVarying CompilerGXP::translateVarying(spv::BuiltIn builtIn) { } } +gxp::ProgramVarying CompilerGXP::allocateVarying( + std::vector &availableVaryings, + std::vector &availableTexCoords, + uint32_t components) { + gxp::ProgramVarying selected = gxp::ProgramVarying::None; + + if (components == 4 && !availableVaryings.empty()) { + selected = availableVaryings[0]; + availableVaryings.erase(availableVaryings.begin()); + } else if (!availableTexCoords.empty()) { + selected = availableTexCoords[0]; + availableTexCoords.erase(availableTexCoords.begin()); + } + + return selected; +} + void CompilerGXP::createBlock(const SPIRBlock &block) { gxp::Block *gxpBlock = builder.createPrimaryBlock(); @@ -123,7 +139,7 @@ void CompilerGXP::createFunction(const SPIRFunction &function) { } } -void CompilerGXP::createShaderResources() { +void CompilerGXP::createVertexShaderResources() { ShaderResources resources = get_shader_resources(); for (const auto &input : resources.stage_inputs) { @@ -150,37 +166,22 @@ void CompilerGXP::createShaderResources() { } std::vector varyings; - std::vector texCoords; + std::vector texCoords; std::vector availableVaryings = allVaryings; std::vector availableTexCoords = allTexCoords; - const auto &allocateVarying = [&availableVaryings, &availableTexCoords](uint32_t size) { - gxp::ProgramVarying selected = gxp::ProgramVarying::None; - - if (size == 4 && !availableVaryings.empty()) { - selected = availableVaryings[0]; - availableVaryings.erase(availableVaryings.begin()); - } else if (!availableTexCoords.empty()) { - selected = availableTexCoords[0]; - availableTexCoords.erase(availableTexCoords.begin()); - } - - return selected; - }; - for (const auto &output : resources.stage_outputs) { - const SPIRVariable &variable = get(output.id); const SPIRType &type = get_type(output.type_id); - gxp::ProgramVarying varying = allocateVarying(type.vecsize); + gxp::ProgramVarying varying = allocateVarying(availableVaryings, availableTexCoords, type.vecsize); if (varying == gxp::ProgramVarying::None) throw std::runtime_error("No available space for varying."); if (gxp::isTexCoordVarying(varying)) { - gxp::ProgramTexCoordInfo info = { }; + gxp::ProgramVectorInfo info = { }; info.varying = varying; - info.componentCount = type.vecsize; + info.components = type.vecsize; texCoords.push_back(info); } else varyings.push_back(varying); @@ -220,7 +221,52 @@ void CompilerGXP::createShaderResources() { } } - varyingReferences = builder.registerVaryings(varyings, texCoords); + varyingReferences = builder.registerVertexVaryings(varyings, texCoords); +} + +void CompilerGXP::createFragmentShaderResources() { + ShaderResources resources = get_shader_resources(); + + if (resources.stage_outputs.size() == 1) { + Resource resource = resources.stage_outputs[0]; + SPIRType type = get_type(resource.type_id); + + idRegisters[resource.id] = builder.createFragmentOutput(translateType(type.basetype), type.vecsize); + } else { + throw std::runtime_error("Only one output is allowed for a fragment shader."); + } + + for (const auto &uniform : resources.uniform_buffers) { + const SPIRType &type = get_type(uniform.type_id); + + if (type.member_types.size() != 1) + throw std::runtime_error("Uniform blocks are not supported."); + + gxp::Parameter parameter; + parameter.name = uniform.name; + parameter.category = gxp::ParameterCategory::Uniform; + parameter.type = translateType(get_type(type.member_types[0])); + idRegisters[uniform.id] = builder.registerParameter(parameter); + } + + std::vector varyings; + + std::vector availableVaryings = allVaryings; + std::vector availableTexCoords = allTexCoords; + + for (const auto &input : resources.stage_inputs) { + const SPIRType &type = get_type(input.type_id); + + gxp::ProgramVarying varying = allocateVarying(availableVaryings, availableTexCoords, type.vecsize); + if (varying == gxp::ProgramVarying::None) + throw std::runtime_error("No available space for varying."); + + varyings.push_back({ varying, type.vecsize }); + + idVaryings[input.id] = varying; + } + + varyingReferences = builder.registerFragmentVaryings(varyings); } std::vector CompilerGXP::compileData() { @@ -230,9 +276,11 @@ std::vector CompilerGXP::compileData() { switch (entryPoints[0].execution_model) { case spv::ExecutionModelVertex: builder.setType(gxp::ShaderType::Vertex); + createVertexShaderResources(); break; case spv::ExecutionModelFragment: builder.setType(gxp::ShaderType::Fragment); + createFragmentShaderResources(); break; default: throw std::runtime_error("Entry point must be of type vertex or fragment."); @@ -241,8 +289,6 @@ std::vector CompilerGXP::compileData() { SPIREntryPoint entryPoint = get_entry_point(entryPoints[0].name, entryPoints[0].execution_model); SPIRFunction entryFunction = get(entryPoint.self); - createShaderResources(); - // addFunction should recursively call the other functions. // try { createFunction(entryFunction); From d68aecc7f41ae24a42f5fb85492af42e4df025f8 Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Thu, 12 Sep 2019 13:46:09 -0400 Subject: [PATCH 05/19] Struct support --- src/gxp/include/gxp/builder.h | 4 + src/gxp/src/builder.cpp | 45 ++++++++- .../include/translator/translator.h | 26 +++-- src/translator/src/codes.cpp | 96 +++++++++++++------ src/translator/src/translator.cpp | 68 +++++++++---- 5 files changed, 186 insertions(+), 53 deletions(-) diff --git a/src/gxp/include/gxp/builder.h b/src/gxp/include/gxp/builder.h index 9d44710..02e433c 100644 --- a/src/gxp/include/gxp/builder.h +++ b/src/gxp/include/gxp/builder.h @@ -34,6 +34,10 @@ namespace gxp { usse::RegisterReference first, usse::RegisterReference second, usse::RegisterReference destination); + void createSub( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination); }; class Parameter { diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp index 612dd2c..18aaf69 100644 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -234,6 +234,44 @@ namespace gxp { )); } + void Block::createSub( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination) { + usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + // Oh boy... + assert(second.bank == usse::RegisterBank::Internal); + + assert(false); + +// instructions.push_back(usse::makeVNMAD32( +// // pred +// // skipinv +// // src1_swiz_10_11 +// // syncstart +// // dest_bank_ext +// // src1_swiz_9 +// // src1_bank_ext +// // src2_bank_ext +// // src2_swiz +// // nosched +// // dest_mask +// // src1_mod +// // src2_mod +// // src1_swiz_7_8 +// // dest_bank_sel +// // src1_bank_sel +// // src2_bank_sel +// // dest_n +// // src1_swiz_0_6 +// // op2 +// // src1_n +// // src2_n +// )); + } + Block::Block(gxp::Builder &parent) : parent(parent) { } usse::RegisterBank Parameter::getBank() { @@ -398,9 +436,12 @@ namespace gxp { input.attribute_info |= getFragmentVaryingBits(varying.varying); // Id input.attribute_info |= 0x10A000u; // 0x20000000 = Half, 0x10000000 = Fixed, 0x10A000 = Float... input.attribute_info |= (reference.type.components - 1) << 22u; // Component Count + input.attribute_info |= 0xFu; // Not a Sampler! // Samplers are not yet supported. + fragmentInputs.push_back(input); + references[varying.varying] = reference; } @@ -464,8 +505,8 @@ namespace gxp { // Varyings if (getType() == ShaderType::Fragment && !fragmentInputs.empty()) { varyings.varyings_count = fragmentInputs.size(); - varyings.vertex_outputs1 = data.size() - - (sizeof(ProgramHeader) + sizeof(ProgramVaryings) - OFFSET_OF(varyings, vertex_outputs1)); + varyings.vertex_outputs1 = data.size() + - (sizeof(ProgramHeader) + OFFSET_OF(varyings, varyings_count) + sizeof(uint32_t)); data.insert(data.end(), reinterpret_cast(fragmentInputs.data()), reinterpret_cast(fragmentInputs.data()) diff --git a/src/translator/include/translator/translator.h b/src/translator/include/translator/translator.h index 37cf86e..d837298 100644 --- a/src/translator/include/translator/translator.h +++ b/src/translator/include/translator/translator.h @@ -19,17 +19,25 @@ class TranslatorCode { }; class TranslatorArguments { + TranslatorArguments( + gxp::Block &block, + const TranslatorCode &code, + const uint32_t *instruction, + uint32_t wordCount); + friend class CompilerGXP; public: gxp::Block █ const TranslatorCode &code; const uint32_t *instruction; uint32_t wordCount; +}; - TranslatorArguments( - gxp::Block &block, - const TranslatorCode &code, - const uint32_t *instruction, - uint32_t wordCount); +class TranslatorReference { +public: + usse::RegisterReference reference; + std::vector subreferences; + + bool isStruct(); }; class CompilerGXP : public Compiler { @@ -37,7 +45,7 @@ class CompilerGXP : public Compiler { std::vector codes; std::unordered_map idVaryings; - std::unordered_map idRegisters; + std::unordered_map idRegisters; std::unordered_map varyingReferences; static usse::Type translateType(SPIRType::BaseType baseType); @@ -50,6 +58,10 @@ class CompilerGXP : public Compiler { std::vector &availableTexCoords, uint32_t components); + TranslatorReference createVariable(usse::RegisterBank bank, const SPIRType &type); + TranslatorReference createParameter(gxp::ParameterCategory category, const SPIRType &type, + const std::string &name); + void createBlock(const SPIRBlock &block); void createFunction(const SPIRFunction &function); void createVertexShaderResources(); @@ -67,6 +79,8 @@ class CompilerGXP : public Compiler { void opCompositeExtract(const TranslatorArguments &arguments); void opCompositeConstruct(const TranslatorArguments &arguments); void opAccessChain(const TranslatorArguments &arguments); + void opVectorShuffle(const TranslatorArguments &arguments); + void opFSub(const TranslatorArguments &arguments); public: std::vector compileData(); diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp index f08f6ff..59e4d0e 100644 --- a/src/translator/src/codes.cpp +++ b/src/translator/src/codes.cpp @@ -28,19 +28,22 @@ void CompilerGXP::opLoad(const TranslatorArguments &arguments) { // This is a redirect, but it should really load into temp. // Maybe let the user chose if there want to assume redirect or copy until we can introduce analysis. - idRegisters[result] = getOrThrow(idRegisters, pointer); + if (idVaryings.find(pointer) != idVaryings.end()) + idRegisters[result] = { getOrThrow(varyingReferences, getOrThrow(idVaryings, pointer)) }; + else + idRegisters[result] = getOrThrow(idRegisters, pointer); } void CompilerGXP::opStore(const TranslatorArguments &arguments) { spv::Id destination = arguments.instruction[0]; spv::Id source = arguments.instruction[1]; - usse::RegisterReference sourceRegister = getOrThrow(idRegisters, source); + usse::RegisterReference sourceRegister = getOrThrow(idRegisters, source).reference; usse::RegisterReference destinationRegister; if (idVaryings.find(destination) != idVaryings.end()) destinationRegister = getOrThrow(varyingReferences, getOrThrow(idVaryings, destination)); else - destinationRegister = getOrThrow(idRegisters, destination); + destinationRegister = getOrThrow(idRegisters, destination).reference; arguments.block.createMov(sourceRegister, destinationRegister); } @@ -51,8 +54,8 @@ void CompilerGXP::opMatrixTimesVector(const TranslatorArguments &arguments) { spv::Id matrix = arguments.instruction[2]; spv::Id vector = arguments.instruction[3]; - usse::RegisterReference matrixRegister = getOrThrow(idRegisters, matrix); - usse::RegisterReference vectorRegister = getOrThrow(idRegisters, vector); + usse::RegisterReference matrixRegister = getOrThrow(idRegisters, matrix).reference; + usse::RegisterReference vectorRegister = getOrThrow(idRegisters, vector).reference; assert(matrixRegister.type.type == vectorRegister.type.type); assert(matrixRegister.type.arraySize == vectorRegister.type.components); @@ -70,7 +73,7 @@ void CompilerGXP::opMatrixTimesVector(const TranslatorArguments &arguments) { } arguments.block.parent.freeRegister(internal); - idRegisters[result] = temp; + idRegisters[result] = { temp }; } void CompilerGXP::opConvertUToF(const TranslatorArguments &arguments) { @@ -78,13 +81,13 @@ void CompilerGXP::opConvertUToF(const TranslatorArguments &arguments) { spv::Id destination = arguments.instruction[1]; spv::Id source = arguments.instruction[2]; - usse::RegisterReference srcReg = getOrThrow(idRegisters, source); + usse::RegisterReference srcReg = getOrThrow(idRegisters, source).reference; usse::RegisterReference destReg = arguments.block.parent.allocateRegister( usse::RegisterBank::Temporary, { usse::Type::Float32, 4, 1 }); arguments.block.createPack(srcReg, destReg); - idRegisters[destination] = destReg; + idRegisters[destination] = { destReg }; } void CompilerGXP::opCompositeExtract(const TranslatorArguments &arguments) { @@ -97,7 +100,7 @@ void CompilerGXP::opCompositeExtract(const TranslatorArguments &arguments) { if (reg == idRegisters.end()) throw std::runtime_error(fmt::format("Source ID {} was not loaded with a register reference.", source)); - idRegisters[result] = reg->second.getComponents(index, 1); + idRegisters[result] = { reg->second.reference.getComponents(index, 1) }; } void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { @@ -117,7 +120,7 @@ void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { // This is very wrong. Rework this! auto reg = idRegisters.find(vecId); if (reg != idRegisters.end()) { - source = reg->second; + source = reg->second.reference; } else if (type.basetype == SPIRType::Float) { SPIRConstant spvConstant = get(vecId); @@ -138,7 +141,7 @@ void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { arguments.block.createMov(source, output.getComponents(a, 1)); } - idRegisters[result] = output; + idRegisters[result] = { output }; } void CompilerGXP::opAccessChain(const TranslatorArguments &arguments) { @@ -150,31 +153,66 @@ void CompilerGXP::opAccessChain(const TranslatorArguments &arguments) { SPIRConstant constant = get(index); uint32_t value = constant.m.c[0].r[0].u32; - SPIRVariable baseVariable = get(base); - SPIRType type = get_type(baseVariable.basetype); - - if (type.basetype == SPIRType::Struct) { - SPIRType memberType = get_type(type.member_types[value]); - spv::BuiltIn builtIn; + SPIRType type = get_type_from_variable(base); + spv::BuiltIn builtIn; - if (is_member_builtin(type, value, &builtIn)) { - idRegisters[result] = getOrThrow(varyingReferences, translateVarying(builtIn)); - } else { - if (value != 0) - throw std::runtime_error("Must link to a single element."); - idRegisters[result] = getOrThrow(idRegisters, base); - } + if (type.basetype == SPIRType::Struct && is_member_builtin(type, value, &builtIn)) { + idRegisters[result] = { getOrThrow(varyingReferences, translateVarying(builtIn)) }; } else { - if (type.columns > 1) { - idRegisters[result] = getOrThrow(idRegisters, base).getElement(value); - } else if (type.vecsize > 1) { - idRegisters[result] = getOrThrow(idRegisters, base).getComponents(value, 1); + TranslatorReference reference = getOrThrow(idRegisters, base); + + if (reference.isStruct()) { + idRegisters[result] = reference.subreferences[value]; + } else if (reference.reference.type.arraySize > 1) { + idRegisters[result] = { reference.reference.getElement(value) }; + } else if (reference.reference.type.components > 1) { + idRegisters[result] = { reference.reference.getComponents(value, 1) }; } else { throw std::runtime_error("Access Chain to a non composite type."); } } } + +void CompilerGXP::opVectorShuffle(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id firstId = arguments.instruction[2]; + spv::Id secondId = arguments.instruction[3]; + + SPIRType type = get_type(typeId); + + usse::RegisterReference first = getOrThrow(idRegisters, firstId).reference; + usse::RegisterReference second = getOrThrow(idRegisters, secondId).reference; + + usse::RegisterReference temp = arguments.block.parent.allocateRegister( + usse::RegisterBank::Temporary, translateType(type)); + + for (uint32_t a = 0; a < type.vecsize; a++) { + uint32_t index = arguments.instruction[4 + a]; + + usse::RegisterReference source; + + if (index < first.type.components) + source = first.getComponents(index, 1); + else + source = second.getComponents(index - first.type.components, 1); + + arguments.block.createMov(source, temp.getComponents(a, 1)); + } + + idRegisters[result] = { temp }; +} + +void CompilerGXP::opFSub(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id firstId = arguments.instruction[2]; + spv::Id secondId = arguments.instruction[3]; + + assert(false); +} + TranslatorArguments::TranslatorArguments( gxp::Block &block, const TranslatorCode &code, @@ -268,7 +306,7 @@ void CompilerGXP::createTranslators() { { static_cast(76), "OpUndefined", &CompilerGXP::undefined }, { spv::Op::OpVectorExtractDynamic, "OpVectorExtractDynamic", &CompilerGXP::unimplemented }, { spv::Op::OpVectorInsertDynamic, "OpVectorInsertDynamic", &CompilerGXP::unimplemented }, - { spv::Op::OpVectorShuffle, "OpVectorShuffle", &CompilerGXP::unimplemented }, + { spv::Op::OpVectorShuffle, "OpVectorShuffle", &CompilerGXP::opVectorShuffle }, { spv::Op::OpCompositeConstruct, "OpCompositeConstruct", &CompilerGXP::opCompositeConstruct }, { spv::Op::OpCompositeExtract, "OpCompositeExtract", &CompilerGXP::opCompositeExtract }, { spv::Op::OpCompositeInsert, "OpCompositeInsert", &CompilerGXP::unimplemented }, diff --git a/src/translator/src/translator.cpp b/src/translator/src/translator.cpp index c398771..a689e07 100644 --- a/src/translator/src/translator.cpp +++ b/src/translator/src/translator.cpp @@ -35,6 +35,8 @@ const std::vector allTexCoords = { gxp::ProgramVarying::TexCoord9, }; +bool TranslatorReference::isStruct() { return !subreferences.empty(); } + usse::Type CompilerGXP::translateType(SPIRType::BaseType baseType) { switch (baseType) { case SPIRType::BaseType::Boolean: @@ -115,6 +117,42 @@ gxp::ProgramVarying CompilerGXP::allocateVarying( return selected; } +TranslatorReference CompilerGXP::createVariable(usse::RegisterBank bank, const SPIRType &type) { + if (type.basetype == SPIRType::Struct) { + TranslatorReference reference; + + for (uint32_t a : type.member_types) { + reference.subreferences.push_back(createVariable(bank, get_type(a))); + } + + return reference; + } else { + return { builder.allocateRegister(bank, translateType(type)) }; + } +} + +TranslatorReference CompilerGXP::createParameter(gxp::ParameterCategory category, const SPIRType &type, + const std::string &name) { + if (type.basetype == SPIRType::Struct) { + TranslatorReference reference; + + for (size_t a = 0; a < type.member_types.size(); a++) { + reference.subreferences.push_back( + createParameter(category, get_type(type.member_types[a]), name + "." + get_member_name(type.self, a))); + } + + return reference; + } else { + gxp::Parameter parameter; + + parameter.category = category; + parameter.type = translateType(type); + parameter.name = name; + + return { builder.registerParameter(parameter) }; + } +} + void CompilerGXP::createBlock(const SPIRBlock &block) { gxp::Block *gxpBlock = builder.createPrimaryBlock(); @@ -133,6 +171,12 @@ void CompilerGXP::createBlock(const SPIRBlock &block) { } void CompilerGXP::createFunction(const SPIRFunction &function) { + for (uint32_t local : function.local_variables) { + SPIRType type = get_type_from_variable(local); + + idRegisters[local] = createVariable(usse::RegisterBank::Temporary, type); + } + for (uint32_t blockId : function.blocks) { auto &block = get(blockId); createBlock(block); @@ -145,11 +189,7 @@ void CompilerGXP::createVertexShaderResources() { for (const auto &input : resources.stage_inputs) { const SPIRType &type = get_type(input.type_id); - gxp::Parameter parameter; - parameter.name = input.name; - parameter.category = gxp::ParameterCategory::Attribute; - parameter.type = translateType(type); - idRegisters[input.id] = builder.registerParameter(parameter); + idRegisters[input.id] = createParameter(gxp::ParameterCategory::Attribute, type, input.name); } for (const auto &uniform : resources.uniform_buffers) { @@ -158,11 +198,9 @@ void CompilerGXP::createVertexShaderResources() { if (type.member_types.size() != 1) throw std::runtime_error("Uniform blocks are not supported."); - gxp::Parameter parameter; - parameter.name = uniform.name; - parameter.category = gxp::ParameterCategory::Uniform; - parameter.type = translateType(get_type(type.member_types[0])); - idRegisters[uniform.id] = builder.registerParameter(parameter); + SPIRType subType = get_type(type.member_types[0]); + + idRegisters[uniform.id] = { { }, { createParameter(gxp::ParameterCategory::Uniform, subType, uniform.name) } }; } std::vector varyings; @@ -231,7 +269,7 @@ void CompilerGXP::createFragmentShaderResources() { Resource resource = resources.stage_outputs[0]; SPIRType type = get_type(resource.type_id); - idRegisters[resource.id] = builder.createFragmentOutput(translateType(type.basetype), type.vecsize); + idRegisters[resource.id] = { builder.createFragmentOutput(translateType(type.basetype), type.vecsize) }; } else { throw std::runtime_error("Only one output is allowed for a fragment shader."); } @@ -242,11 +280,9 @@ void CompilerGXP::createFragmentShaderResources() { if (type.member_types.size() != 1) throw std::runtime_error("Uniform blocks are not supported."); - gxp::Parameter parameter; - parameter.name = uniform.name; - parameter.category = gxp::ParameterCategory::Uniform; - parameter.type = translateType(get_type(type.member_types[0])); - idRegisters[uniform.id] = builder.registerParameter(parameter); + SPIRType subType = get_type(type.member_types[0]); + + idRegisters[uniform.id] = { { }, { createParameter(gxp::ParameterCategory::Uniform, subType, uniform.name) } }; } std::vector varyings; From 80421306bb3a0dbd819cf12127876ca78eda69fa Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Sat, 21 Sep 2019 13:53:03 -0400 Subject: [PATCH 06/19] GLSL extension instruction support --- src/gxp/include/gxp/builder.h | 15 ++ src/gxp/include/gxp/usse.h | 18 ++ src/gxp/src/builder.cpp | 228 +++++++++++++++--- src/gxp/src/usse.cpp | 8 +- .../include/translator/translator.h | 12 + src/translator/src/codes.cpp | 195 ++++++++++++--- src/translator/src/translator.cpp | 14 +- src/util/include/util/spirv.h | 1 + src/util/include/util/util.h | 2 +- 9 files changed, 419 insertions(+), 74 deletions(-) diff --git a/src/gxp/include/gxp/builder.h b/src/gxp/include/gxp/builder.h index 02e433c..4fa2fcf 100644 --- a/src/gxp/include/gxp/builder.h +++ b/src/gxp/include/gxp/builder.h @@ -38,6 +38,21 @@ namespace gxp { usse::RegisterReference first, usse::RegisterReference second, usse::RegisterReference destination); + void createMul( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination); + void createReverseSquareRoot( + usse::RegisterReference source, + usse::RegisterReference destination); + void createMin( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination); + void createMax( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination); }; class Parameter { diff --git a/src/gxp/include/gxp/usse.h b/src/gxp/include/gxp/usse.h index 2587f53..fb00a96 100644 --- a/src/gxp/include/gxp/usse.h +++ b/src/gxp/include/gxp/usse.h @@ -50,6 +50,24 @@ namespace usse { Output8, }; + enum class InstructionVNMADOp : uint8_t { + Multiply, + Add, + Fractional, + DSX, + DSY, + Min, + Max, + Dot, + }; + + enum class InstructionVCOMPOp : uint8_t { + Reciprocal, + ReverseSquareRoot, + Logarithm, + Exponent, + }; + typedef std::array SwizzleVec3; typedef std::array SwizzleVec4; diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp index 18aaf69..5e365c0 100644 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -184,7 +184,7 @@ namespace gxp { srcBankLayout.number, // src1_bank_sel srcBankLayout.number, // src2_bank_sel destBankLayout.getIndex(destination), // dest_n - static_cast(destination.swizzle[3]) & 0b11u, // comp_sel_3 + destination.swizzle.size() > 3 ? static_cast(destination.swizzle[3]) & 0b11u : 0, // comp_sel_3 false, // scale static_cast(destination.swizzle[1]) & 0b11u, // comp_sel_1 static_cast(destination.swizzle[2]) & 0b11u, // comp_sel_2 @@ -226,7 +226,7 @@ namespace gxp { second.index, // gpi0_n destBankLayout.getIndex(destination), // dest_n second.getSwizzleIndex(), // gpi0_swiz - static_cast(first.swizzle[3]), // src1_swiz_w + first.swizzle.size() > 3 ? static_cast(first.swizzle[3]) : 0, // src1_swiz_w static_cast(first.swizzle[2]), // src1_swiz_z static_cast(first.swizzle[1]), // src1_swiz_y static_cast(first.swizzle[0]), // src1_swiz_x @@ -239,37 +239,205 @@ namespace gxp { usse::RegisterReference second, usse::RegisterReference destination) { usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); + usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - // Oh boy... - assert(second.bank == usse::RegisterBank::Internal); + uint32_t shift = 0; + uint32_t firstSwizzle = 0; + for (usse::SwizzleChannel channel : first.swizzle) { + firstSwizzle |= static_cast(channel) << shift; + shift += 3; + } + + // First/Second sources are flipped so negative effect can be applied to src1. -x + y = y - x + instructions.push_back(usse::makeVNMAD32( + 0, // pred + 0, // skipinv + (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 + secondBankLayout.extension, // src1_bank_ext + firstBankLayout.extension, // src2_bank_ext + second.getSwizzleIndex(), // src2_swiz + 0, // nosched + destination.getSwizzleMask(), // dest_mask + 0b01, // src1_mod + 0b0, // src2_mod + (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 + destBankLayout.number, // dest_bank_sel + secondBankLayout.number, // src1_bank_sel + firstBankLayout.number, // src2_bank_sel + destBankLayout.getIndex(destination), // dest_n + (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 + static_cast(usse::InstructionVNMADOp::Add), // op2 + secondBankLayout.getIndex(second), // src1_n + firstBankLayout.getIndex(first) // src2_n + )); + } + + void Block::createMul( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination) { + usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); + usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + uint32_t shift = 0; + uint32_t firstSwizzle = 0; + for (usse::SwizzleChannel channel : first.swizzle) { + firstSwizzle |= static_cast(channel) << shift; + shift += 3; + } - assert(false); - -// instructions.push_back(usse::makeVNMAD32( -// // pred -// // skipinv -// // src1_swiz_10_11 -// // syncstart -// // dest_bank_ext -// // src1_swiz_9 -// // src1_bank_ext -// // src2_bank_ext -// // src2_swiz -// // nosched -// // dest_mask -// // src1_mod -// // src2_mod -// // src1_swiz_7_8 -// // dest_bank_sel -// // src1_bank_sel -// // src2_bank_sel -// // dest_n -// // src1_swiz_0_6 -// // op2 -// // src1_n -// // src2_n -// )); + instructions.push_back(usse::makeVNMAD32( + 0, // pred + 0, // skipinv + (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 + firstBankLayout.extension, // src1_bank_ext + secondBankLayout.extension, // src2_bank_ext + second.getSwizzleIndex(), // src2_swiz + 0, // nosched + destination.getSwizzleMask(), // dest_mask + 0b00, // src1_mod + 0b0, // src2_mod + (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 + destBankLayout.number, // dest_bank_sel + firstBankLayout.number, // src1_bank_sel + secondBankLayout.number, // src2_bank_sel + destBankLayout.getIndex(destination), // dest_n + (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 + static_cast(usse::InstructionVNMADOp::Multiply), // op2 + firstBankLayout.getIndex(first), // src1_n + secondBankLayout.getIndex(second) // src2_n + )); + } + + void Block::createReverseSquareRoot( + usse::RegisterReference source, + usse::RegisterReference destination) { + usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + usse::Param typeTable[] = { + 0, // Signed8 - Unsupported + 0, // Signed16 - Unsupported + 0, // Signed32 - Unsupported + 2, // Fixed10 + 1, // Float16 + 0, // Float32 + 0, // Unsigned8 - Unsupported + 0, // Unsigned16 - Unsupported + 0, // Unsigned32 - Unsupported + 0, // Output8 - Unsupported + }; + + instructions.push_back(usse::makeVCOMP( + 0, // pred + 0, // skipinv + typeTable[static_cast(destination.type.type)], // dest_type + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + 0, // end + srcBankLayout.extension, // src1_bank_ext + 0, // repeat_count + 0, // nosched + static_cast(usse::InstructionVCOMPOp::ReverseSquareRoot), // op2 + typeTable[static_cast(source.type.type)], // src_type + 0b00, // src1_mod + static_cast(source.swizzle[0]), // src_comp + destBankLayout.number, // dest_bank + srcBankLayout.number, // src1_bank + destBankLayout.getIndex(destination), // dest_n + srcBankLayout.getIndex(source), // src1_n + destination.getSwizzleMask() // write_mask + )); + } + + void Block::createMin( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination) { + usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); + usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + uint32_t shift = 0; + uint32_t firstSwizzle = 0; + for (usse::SwizzleChannel channel : first.swizzle) { + firstSwizzle |= static_cast(channel) << shift; + shift += 3; + } + + instructions.push_back(usse::makeVNMAD32( + 0, // pred + 0, // skipinv + (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 + firstBankLayout.extension, // src1_bank_ext + secondBankLayout.extension, // src2_bank_ext + second.getSwizzleIndex(), // src2_swiz + 0, // nosched + destination.getSwizzleMask(), // dest_mask + 0b00, // src1_mod + 0b0, // src2_mod + (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 + destBankLayout.number, // dest_bank_sel + firstBankLayout.number, // src1_bank_sel + secondBankLayout.number, // src2_bank_sel + destBankLayout.getIndex(destination), // dest_n + (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 + static_cast(usse::InstructionVNMADOp::Min), // op2 + firstBankLayout.getIndex(first), // src1_n + secondBankLayout.getIndex(second) // src2_n + )); + } + + void Block::createMax( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination) { + usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); + usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + uint32_t shift = 0; + uint32_t firstSwizzle = 0; + for (usse::SwizzleChannel channel : first.swizzle) { + firstSwizzle |= static_cast(channel) << shift; + shift += 3; + } + + instructions.push_back(usse::makeVNMAD32( + 0, // pred + 0, // skipinv + (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 + firstBankLayout.extension, // src1_bank_ext + secondBankLayout.extension, // src2_bank_ext + second.getSwizzleIndex(), // src2_swiz + 0, // nosched + destination.getSwizzleMask(), // dest_mask + 0b00, // src1_mod + 0b0, // src2_mod + (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 + destBankLayout.number, // dest_bank_sel + firstBankLayout.number, // src1_bank_sel + secondBankLayout.number, // src2_bank_sel + destBankLayout.getIndex(destination), // dest_n + (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 + static_cast(usse::InstructionVNMADOp::Max), // op2 + firstBankLayout.getIndex(first), // src1_n + secondBankLayout.getIndex(second) // src2_n + )); } Block::Block(gxp::Builder &parent) : parent(parent) { } diff --git a/src/gxp/src/usse.cpp b/src/gxp/src/usse.cpp index aad3f51..448f81a 100644 --- a/src/gxp/src/usse.cpp +++ b/src/gxp/src/usse.cpp @@ -201,6 +201,7 @@ namespace usse { } BankLayout BankLayout::srcLayout(RegisterBank bank) { switch (bank) { + case RegisterBank::Internal: case RegisterBank::Temporary: return {bank, 0, 0 }; case RegisterBank::Primary: return { bank, 0, 2 }; case RegisterBank::Output: return { bank, 0, 1 }; @@ -268,8 +269,7 @@ namespace usse { } RegisterReference RegisterReference::getHalf(uint32_t half) { - assert(type.components % 2 == 0); - uint32_t width = type.components / 2; + uint32_t width = (type.components - 1) / 2 + 1; return getComponents(width * half, width); } @@ -368,7 +368,7 @@ namespace usse { case RegisterBank::Primary: return "Primary"; case RegisterBank::Output: return "Output"; case RegisterBank::Secondary: return "Secondary"; - case RegisterBank::Internal: return "Float Internal"; + case RegisterBank::Internal: return "Internal"; case RegisterBank::Special: return "Special"; case RegisterBank::Global: return "Global"; case RegisterBank::FloatConstant: return "Float Constant"; @@ -407,7 +407,7 @@ namespace usse { for (uint32_t a = 0; a < swizzleStandardSize; a++) { bool matches = true; - for (uint32_t b = 0; b < 4; b++) { + for (uint32_t b = 0; b < 3; b++) { if (elements[b] != SwizzleChannel::DontCare) { if (swizzleVector3[extended][a][b] != elements[b]) { matches = false; diff --git a/src/translator/include/translator/translator.h b/src/translator/include/translator/translator.h index d837298..33f63ae 100644 --- a/src/translator/include/translator/translator.h +++ b/src/translator/include/translator/translator.h @@ -44,6 +44,8 @@ class CompilerGXP : public Compiler { gxp::Builder builder; std::vector codes; + std::unordered_map> extensions; + std::unordered_map idVaryings; std::unordered_map idRegisters; std::unordered_map varyingReferences; @@ -66,7 +68,9 @@ class CompilerGXP : public Compiler { void createFunction(const SPIRFunction &function); void createVertexShaderResources(); void createFragmentShaderResources(); + void createTranslators(); + void createExtensions(); // SPRIV Translation OPs void unimplemented(const TranslatorArguments &arguments); @@ -81,6 +85,14 @@ class CompilerGXP : public Compiler { void opAccessChain(const TranslatorArguments &arguments); void opVectorShuffle(const TranslatorArguments &arguments); void opFSub(const TranslatorArguments &arguments); + void opDot(const TranslatorArguments &arguments); + void opFunctionCall(const TranslatorArguments &arguments); + void opExtInst(const TranslatorArguments &arguments); + + // SPIRV Extension OPs + void extGLSLNormalize(const TranslatorArguments &arguments); + void extGLSLFMin(const TranslatorArguments &arguments); + void extGLSLFMax(const TranslatorArguments &arguments); public: std::vector compileData(); diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp index 59e4d0e..b6fbb05 100644 --- a/src/translator/src/codes.cpp +++ b/src/translator/src/codes.cpp @@ -4,15 +4,6 @@ #include -static std::string getString(const uint32_t *program, size_t &length) { - auto *cString = reinterpret_cast(program); - size_t size = strlen(cString) + 1; - size_t remainder = size % sizeof(uint32_t); - - length = size / sizeof(uint32_t) + (remainder == 0 ? 0 : 1); - return std::string(cString); -} - void CompilerGXP::unimplemented(const TranslatorArguments &arguments) { throw std::runtime_error(fmt::format("{} is not implemented.", arguments.code.name)); } @@ -60,10 +51,10 @@ void CompilerGXP::opMatrixTimesVector(const TranslatorArguments &arguments) { assert(matrixRegister.type.type == vectorRegister.type.type); assert(matrixRegister.type.arraySize == vectorRegister.type.components); - usse::RegisterReference internal = arguments.block.parent.allocateRegister( + usse::RegisterReference internal = builder.allocateRegister( usse::RegisterBank::Internal, vectorRegister.type); - usse::RegisterReference temp = arguments.block.parent.allocateRegister( + usse::RegisterReference temp = builder.allocateRegister( usse::RegisterBank::Temporary, vectorRegister.type); arguments.block.createPack(vectorRegister, internal); @@ -72,7 +63,7 @@ void CompilerGXP::opMatrixTimesVector(const TranslatorArguments &arguments) { arguments.block.createDot(matrixRegister.getElement(a), internal, temp.getComponents(a, 1)); } - arguments.block.parent.freeRegister(internal); + builder.freeRegister(internal); idRegisters[result] = { temp }; } @@ -82,7 +73,7 @@ void CompilerGXP::opConvertUToF(const TranslatorArguments &arguments) { spv::Id source = arguments.instruction[2]; usse::RegisterReference srcReg = getOrThrow(idRegisters, source).reference; - usse::RegisterReference destReg = arguments.block.parent.allocateRegister( + usse::RegisterReference destReg = builder.allocateRegister( usse::RegisterBank::Temporary, { usse::Type::Float32, 4, 1 }); arguments.block.createPack(srcReg, destReg); @@ -109,7 +100,7 @@ void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { SPIRType type = get_type(typeId); - usse::RegisterReference output = arguments.block.parent.allocateRegister(usse::RegisterBank::Temporary, + usse::RegisterReference output = builder.allocateRegister(usse::RegisterBank::Temporary, { translateType(type.basetype), type.vecsize, 1 }); for (size_t a = 0; a < type.vecsize; a++) { @@ -148,29 +139,39 @@ void CompilerGXP::opAccessChain(const TranslatorArguments &arguments) { spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id base = arguments.instruction[2]; - spv::Id index = arguments.instruction[3]; // Multiple indices, apparently. - SPIRConstant constant = get(index); - uint32_t value = constant.m.c[0].r[0].u32; + TranslatorReference ref; + + auto builtIn = static_cast(0); + uint32_t builtInValue = get(arguments.instruction[3]).m.c[0].r[0].u32; SPIRType type = get_type_from_variable(base); - spv::BuiltIn builtIn; - if (type.basetype == SPIRType::Struct && is_member_builtin(type, value, &builtIn)) { + if (type.basetype == SPIRType::Struct && is_member_builtin(type, builtInValue, &builtIn)) { idRegisters[result] = { getOrThrow(varyingReferences, translateVarying(builtIn)) }; + return; } else { - TranslatorReference reference = getOrThrow(idRegisters, base); - - if (reference.isStruct()) { - idRegisters[result] = reference.subreferences[value]; - } else if (reference.reference.type.arraySize > 1) { - idRegisters[result] = { reference.reference.getElement(value) }; - } else if (reference.reference.type.components > 1) { - idRegisters[result] = { reference.reference.getComponents(value, 1) }; + ref = getOrThrow(idRegisters, base); + } + + for (uint32_t a = 3; a < arguments.wordCount - 1; a++) { + spv::Id index = arguments.instruction[a]; + + uint32_t value = get(index).m.c[0].r[0].u32; + + if (ref.isStruct()) { + TranslatorReference temp = ref.subreferences[value]; + ref = temp; + } else if (ref.reference.type.arraySize > 1) { + ref = { ref.reference.getElement(value) }; + } else if (ref.reference.type.components > 1) { + ref = { ref.reference.getComponents(value, 1) }; } else { throw std::runtime_error("Access Chain to a non composite type."); } } + + idRegisters[result] = ref; } @@ -185,7 +186,7 @@ void CompilerGXP::opVectorShuffle(const TranslatorArguments &arguments) { usse::RegisterReference first = getOrThrow(idRegisters, firstId).reference; usse::RegisterReference second = getOrThrow(idRegisters, secondId).reference; - usse::RegisterReference temp = arguments.block.parent.allocateRegister( + usse::RegisterReference temp = builder.allocateRegister( usse::RegisterBank::Temporary, translateType(type)); for (uint32_t a = 0; a < type.vecsize; a++) { @@ -210,7 +211,122 @@ void CompilerGXP::opFSub(const TranslatorArguments &arguments) { spv::Id firstId = arguments.instruction[2]; spv::Id secondId = arguments.instruction[3]; - assert(false); + usse::RegisterReference first = getOrThrow(idRegisters, firstId).reference; + usse::RegisterReference second = getOrThrow(idRegisters, secondId).reference; + + usse::RegisterReference destination = builder.allocateRegister( + usse::RegisterBank::Temporary, first.type); + + arguments.block.createSub(first, second, destination); + + idRegisters[result] = { destination }; +} + +void CompilerGXP::opDot(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id firstId = arguments.instruction[2]; + spv::Id secondId = arguments.instruction[3]; + + usse::RegisterReference destination = builder.allocateRegister( + usse::RegisterBank::Temporary, { usse::Type::Float32, 1, 1 }); + + usse::RegisterReference first = getOrThrow(idRegisters, firstId).reference; + usse::RegisterReference second = getOrThrow(idRegisters, secondId).reference; + + usse::RegisterReference internal = builder.allocateRegister( + usse::RegisterBank::Internal, second.type); + + arguments.block.createPack(second, internal); + arguments.block.createDot(first, internal, destination); + + builder.freeRegister(internal); + + idRegisters[result] = { destination }; +} + +void CompilerGXP::opFunctionCall(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id functionId = arguments.instruction[2]; + + SPIRFunction function = get(functionId); + + for (size_t a = 0; a < function.arguments.size(); a++) { + idRegisters[function.arguments[a].id] = getOrThrow(idRegisters, arguments.instruction[3 + a]); + } + + createFunction(function); +} + +void CompilerGXP::opExtInst(const TranslatorArguments &arguments) { + auto set = static_cast(arguments.instruction[2]); + auto literal = static_cast(arguments.instruction[3]); + + TranslatorImplementation implementation = getOrThrow(getOrThrow(extensions, set), literal); + + (this->*implementation)(arguments); +} + +void CompilerGXP::extGLSLNormalize(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id sourceId = arguments.instruction[4]; + + usse::RegisterReference source = getOrThrow(idRegisters, sourceId).reference; + usse::RegisterReference destination = builder.allocateRegister( + usse::RegisterBank::Temporary, source.type); + + usse::RegisterReference temporary = builder.allocateRegister( + usse::RegisterBank::Internal, source.type); + usse::RegisterReference magnitude = builder.allocateRegister( + usse::RegisterBank::Internal, { source.type.type, 1, 1 }); + + arguments.block.createMov(source, temporary); + arguments.block.createDot(temporary, temporary, magnitude); + arguments.block.createReverseSquareRoot(magnitude, magnitude); + + magnitude.swizzle = std::vector(source.type.components, usse::SwizzleChannel::X); + magnitude.lockSwizzle = true; + magnitude.type.components = 4; + arguments.block.createMul(temporary, magnitude, destination); + + builder.freeRegister(magnitude); + builder.freeRegister(temporary); + + idRegisters[result] = { destination }; +} + +void CompilerGXP::extGLSLFMin(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id firstId = arguments.instruction[4]; + spv::Id secondId = arguments.instruction[5]; + + usse::RegisterReference first = getOrThrow(idRegisters, firstId).reference; + usse::RegisterReference second = getOrThrow(idRegisters, secondId).reference; + usse::RegisterReference destination = builder.allocateRegister( + usse::RegisterBank::Temporary, first.type); + + arguments.block.createMin(first, second, destination); + + idRegisters[result] = { destination }; +} + +void CompilerGXP::extGLSLFMax(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id firstId = arguments.instruction[4]; + spv::Id secondId = arguments.instruction[5]; + + usse::RegisterReference first = getOrThrow(idRegisters, firstId).reference; + usse::RegisterReference second = getOrThrow(idRegisters, secondId).reference; + usse::RegisterReference destination = builder.allocateRegister( + usse::RegisterBank::Temporary, first.type); + + arguments.block.createMax(first, second, destination); + + idRegisters[result] = { destination }; } TranslatorArguments::TranslatorArguments( @@ -239,7 +355,7 @@ void CompilerGXP::createTranslators() { { static_cast(9), "OpUndefined", &CompilerGXP::undefined }, { spv::Op::OpExtension, "OpExtension", &CompilerGXP::unimplemented }, { spv::Op::OpExtInstImport, "OpExtInstImport", &CompilerGXP::unimplemented }, - { spv::Op::OpExtInst, "OpExtInst", &CompilerGXP::unimplemented }, + { spv::Op::OpExtInst, "OpExtInst", &CompilerGXP::opExtInst }, { static_cast(13), "OpUndefined", &CompilerGXP::undefined }, { spv::Op::OpMemoryModel, "OpMemoryModel", &CompilerGXP::unimplemented }, { spv::Op::OpEntryPoint, "OpEntryPoint", &CompilerGXP::unimplemented }, @@ -284,7 +400,7 @@ void CompilerGXP::createTranslators() { { spv::Op::OpFunction, "OpFunction", &CompilerGXP::unimplemented }, { spv::Op::OpFunctionParameter, "OpFunctionParameter", &CompilerGXP::unimplemented }, { spv::Op::OpFunctionEnd, "OpFunctionEnd", &CompilerGXP::unimplemented }, - { spv::Op::OpFunctionCall, "OpFunctionCall", &CompilerGXP::unimplemented }, + { spv::Op::OpFunctionCall, "OpFunctionCall", &CompilerGXP::opFunctionCall }, { static_cast(58), "OpUndefined", &CompilerGXP::undefined }, { spv::Op::OpVariable, "OpVariable", &CompilerGXP::unimplemented }, { spv::Op::OpImageTexelPointer, "OpImageTexelPointer", &CompilerGXP::unimplemented }, @@ -358,7 +474,7 @@ void CompilerGXP::createTranslators() { { spv::Op::OpIAdd, "OpIAdd", &CompilerGXP::unimplemented }, { spv::Op::OpFAdd, "OpFAdd", &CompilerGXP::unimplemented }, { spv::Op::OpISub, "OpISub", &CompilerGXP::unimplemented }, - { spv::Op::OpFSub, "OpFSub", &CompilerGXP::unimplemented }, + { spv::Op::OpFSub, "OpFSub", &CompilerGXP::opFSub }, { spv::Op::OpIMul, "OpIMul", &CompilerGXP::unimplemented }, { spv::Op::OpFMul, "OpFMul", &CompilerGXP::unimplemented }, { spv::Op::OpUDiv, "OpUDiv", &CompilerGXP::unimplemented }, @@ -375,7 +491,7 @@ void CompilerGXP::createTranslators() { { spv::Op::OpMatrixTimesVector, "OpMatrixTimesVector", &CompilerGXP::opMatrixTimesVector }, { spv::Op::OpMatrixTimesMatrix, "OpMatrixTimesMatrix", &CompilerGXP::unimplemented }, { spv::Op::OpOuterProduct, "OpOuterProduct", &CompilerGXP::unimplemented }, - { spv::Op::OpDot, "OpDot", &CompilerGXP::unimplemented }, + { spv::Op::OpDot, "OpDot", &CompilerGXP::opDot }, { spv::Op::OpIAddCarry, "OpIAddCarry", &CompilerGXP::unimplemented }, { spv::Op::OpISubBorrow, "OpISubBorrow", &CompilerGXP::unimplemented }, { spv::Op::OpUMulExtended, "OpUMulExtended", &CompilerGXP::unimplemented }, @@ -633,3 +749,16 @@ void CompilerGXP::createTranslators() { { spv::Op::OpPtrDiff, "OpPtrDiff", &CompilerGXP::unimplemented }, }; } + +void CompilerGXP::createExtensions() { + extensions = { + { + SPIRExtension::GLSL, + { + { GLSLstd450Normalize, &CompilerGXP::extGLSLNormalize }, + { GLSLstd450FMin, &CompilerGXP::extGLSLFMin }, + { GLSLstd450FMax, &CompilerGXP::extGLSLFMax }, + } + } + }; +} diff --git a/src/translator/src/translator.cpp b/src/translator/src/translator.cpp index a689e07..dea4e8f 100644 --- a/src/translator/src/translator.cpp +++ b/src/translator/src/translator.cpp @@ -282,7 +282,9 @@ void CompilerGXP::createFragmentShaderResources() { SPIRType subType = get_type(type.member_types[0]); - idRegisters[uniform.id] = { { }, { createParameter(gxp::ParameterCategory::Uniform, subType, uniform.name) } }; + TranslatorReference uniformReference = createParameter(gxp::ParameterCategory::Uniform, subType, uniform.name); + + idRegisters[uniform.id] = { { }, { uniformReference } }; } std::vector varyings; @@ -325,16 +327,16 @@ std::vector CompilerGXP::compileData() { SPIREntryPoint entryPoint = get_entry_point(entryPoints[0].name, entryPoints[0].execution_model); SPIRFunction entryFunction = get(entryPoint.self); - // addFunction should recursively call the other functions. -// try { + try { createFunction(entryFunction); -// } catch (std::runtime_error &e) { -// fmt::print("{}\n", e.what()); -// } + } catch (std::runtime_error &e) { + fmt::print("{}\n", e.what()); + } return builder.build(); } CompilerGXP::CompilerGXP(const std::vector &data) : Compiler(data) { createTranslators(); + createExtensions(); } diff --git a/src/util/include/util/spirv.h b/src/util/include/util/spirv.h index dabd3d7..73aacdc 100644 --- a/src/util/include/util/spirv.h +++ b/src/util/include/util/spirv.h @@ -1,5 +1,6 @@ #pragma once #include +#include using namespace spirv_cross; diff --git a/src/util/include/util/util.h b/src/util/include/util/util.h index 5f22ae4..6608452 100644 --- a/src/util/include/util/util.h +++ b/src/util/include/util/util.h @@ -16,7 +16,7 @@ std::vector loadFileData(const std::string &path) { } template -typename T::mapped_type &getOrThrow(T map, typename T::key_type key) { +typename T::mapped_type &getOrThrow(T &map, typename T::key_type key) { auto reference = map.find(key); if (reference == map.end()) throw std::runtime_error("Missing key in map."); From b191cf9b1d729f37af45d84165c51d088cdfa823 Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Sat, 21 Sep 2019 14:50:31 -0400 Subject: [PATCH 07/19] SA container support --- src/gxp/include/gxp/gxp.h | 21 +++++++++++++++------ src/gxp/src/builder.cpp | 32 +++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/src/gxp/include/gxp/gxp.h b/src/gxp/include/gxp/gxp.h index b590cc8..c85c4b9 100644 --- a/src/gxp/include/gxp/gxp.h +++ b/src/gxp/include/gxp/gxp.h @@ -8,7 +8,7 @@ namespace gxp { // GXP\0 constexpr uint32_t gxpMagic = 0x00505847; - enum class ParameterCategory : uint8_t{ + enum class ParameterCategory : uint8_t { Attribute, Uniform, Sampler, @@ -134,11 +134,20 @@ namespace gxp { uint32_t components; }; - struct ProgramFragmentInputInfo { - std::uint32_t attribute_info = 0; - std::uint32_t resource_index = 0; - std::uint32_t size = 0; - std::uint32_t component_info = 0; + class ProgramFragmentInputInfo { + public: + uint32_t attributeInfo = 0; + uint32_t resourceIndex = 0; + uint32_t size = 0; + uint32_t componentInfo = 0; + }; + + class ProgramContainerInfo { + public: + uint16_t containerIndex; + uint16_t unk02; + uint16_t secondaryOffset; + uint16_t maxResourceIndex; }; class ProgramParameterInfo { diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp index 5e365c0..d141bae 100644 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -8,6 +8,8 @@ #define OFFSET_OF(parent, child) (reinterpret_cast(&parent.child) - reinterpret_cast(&parent)) namespace gxp { + constexpr uint16_t containerIndexSA = 14; + class MovRegisterData { public: uint8_t destMask = 0; @@ -598,13 +600,13 @@ namespace gxp { // What the heck is going on with fragment inputs!?!? ProgramFragmentInputInfo input; input.size = (reference.size - 1) << 4u; - input.component_info = 0b11u << 4u; // 0b11 = Float, 0b10 = Half? - input.resource_index = reference.index; + input.componentInfo = 0b11u << 4u; // 0b11 = Float, 0b10 = Half? + input.resourceIndex = reference.index; - input.attribute_info |= getFragmentVaryingBits(varying.varying); // Id - input.attribute_info |= 0x10A000u; // 0x20000000 = Half, 0x10000000 = Fixed, 0x10A000 = Float... - input.attribute_info |= (reference.type.components - 1) << 22u; // Component Count - input.attribute_info |= 0xFu; // Not a Sampler! + input.attributeInfo |= getFragmentVaryingBits(varying.varying); // Id + input.attributeInfo |= 0x10A000u; // 0x20000000 = Half, 0x10000000 = Fixed, 0x10A000 = Float... + input.attributeInfo |= (reference.type.components - 1) << 22u; // Component Count + input.attributeInfo |= 0xFu; // Not a Sampler! // Samplers are not yet supported. @@ -646,6 +648,22 @@ namespace gxp { stringDB.push_back(entry); } + // Containers + header.containerCount = 1; + header.containerOffset = data.size() - OFFSET_OF(header, containerOffset); + { + ProgramContainerInfo info = { + containerIndexSA, // Container Index + 0, // ?? + 0, // Register Index + static_cast(saRegPointer) // Register Count + }; + data.insert(data.end(), + reinterpret_cast(&info), + reinterpret_cast(&info) + + sizeof(ProgramContainerInfo)); + } + // Parameters header.parameterCount = parameters.size(); header.parametersOffset = data.size() - OFFSET_OF(header, parametersOffset); @@ -655,7 +673,7 @@ namespace gxp { parameter.arraySize = param.type.arraySize; parameter.semantic = static_cast(param.semantic); parameter.config = createParameterConfig(param.category, getParameterTypeFromUSSEType(param.type.type), - param.type.components, param.containerIndex); + param.type.components, containerIndexSA); auto stringEntry = std::find_if(stringDB.begin(), stringDB.end(), [param](const StringEntry &entry) { return entry.text == param.name; From ee96f96f6d685d2a7fa572da11d396f25284ffee Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Mon, 23 Sep 2019 09:26:02 -0400 Subject: [PATCH 08/19] Implement FNegate, VectorTimesScalar, GLSLReflect --- src/gxp/include/gxp/usse.h | 2 +- src/gxp/src/builder.cpp | 5 +- src/gxp/src/usse.cpp | 4 +- .../include/translator/translator.h | 5 + src/translator/src/codes.cpp | 121 ++++++++++++++---- src/translator/src/translator.cpp | 8 +- 6 files changed, 107 insertions(+), 38 deletions(-) diff --git a/src/gxp/include/gxp/usse.h b/src/gxp/include/gxp/usse.h index fb00a96..8cb3ce2 100644 --- a/src/gxp/include/gxp/usse.h +++ b/src/gxp/include/gxp/usse.h @@ -95,7 +95,7 @@ namespace usse { RegisterReference getElement(uint32_t element); RegisterReference() = default; - RegisterReference(DataType type, RegisterBank bank, uint32_t regIndex, uint32_t size); + RegisterReference(DataType type, RegisterBank bank, uint32_t regIndex); }; class BankLayout { diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp index d141bae..ed9d221 100644 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -501,7 +501,7 @@ namespace gxp { fmt::print("Allocating {} registers of type {} (vec{}[{}]), size {} at index {}.\n", usse::getBankName(bank), usse::getTypeName(type.type), type.components, type.arraySize, size, index); - return usse::RegisterReference(type, bank, index, size); + return usse::RegisterReference(type, bank, index); } void Builder::freeRegister(usse::RegisterReference reg) { @@ -622,8 +622,7 @@ namespace gxp { varyings.output_comp_count = components; varyings.output_param_type = static_cast(getParameterTypeFromUSSEType(type)); - return usse::RegisterReference({ type, components, 1 }, - usse::RegisterBank::Primary, 0, usse::getTypeSize(type) * components / 4); + return usse::RegisterReference({ type, components, 1 }, usse::RegisterBank::Primary, 0); } std::vector Builder::build() { diff --git a/src/gxp/src/usse.cpp b/src/gxp/src/usse.cpp index 448f81a..924c953 100644 --- a/src/gxp/src/usse.cpp +++ b/src/gxp/src/usse.cpp @@ -316,8 +316,8 @@ namespace usse { return reg; } - RegisterReference::RegisterReference(DataType type, RegisterBank bank, uint32_t regIndex, uint32_t size) - : type(type), bank(bank), size(size) { + RegisterReference::RegisterReference(DataType type, RegisterBank bank, uint32_t regIndex) + : type(type), bank(bank), size(getTypeSize(type.type) * type.components * type.arraySize / 4) { bool swizzleUp = false; if (regIndex % 2 == 1) { regIndex--; diff --git a/src/translator/include/translator/translator.h b/src/translator/include/translator/translator.h index 33f63ae..e3ea315 100644 --- a/src/translator/include/translator/translator.h +++ b/src/translator/include/translator/translator.h @@ -64,6 +64,8 @@ class CompilerGXP : public Compiler { TranslatorReference createParameter(gxp::ParameterCategory category, const SPIRType &type, const std::string &name); + usse::RegisterReference getRegister(spv::Id id); + void createBlock(const SPIRBlock &block); void createFunction(const SPIRFunction &function); void createVertexShaderResources(); @@ -79,11 +81,13 @@ class CompilerGXP : public Compiler { void opLoad(const TranslatorArguments &arguments); void opStore(const TranslatorArguments &arguments); void opMatrixTimesVector(const TranslatorArguments &arguments); + void opVectorTimesScalar(const TranslatorArguments &arguments); void opConvertUToF(const TranslatorArguments &arguments); void opCompositeExtract(const TranslatorArguments &arguments); void opCompositeConstruct(const TranslatorArguments &arguments); void opAccessChain(const TranslatorArguments &arguments); void opVectorShuffle(const TranslatorArguments &arguments); + void opFNegate(const TranslatorArguments &arguments); void opFSub(const TranslatorArguments &arguments); void opDot(const TranslatorArguments &arguments); void opFunctionCall(const TranslatorArguments &arguments); @@ -93,6 +97,7 @@ class CompilerGXP : public Compiler { void extGLSLNormalize(const TranslatorArguments &arguments); void extGLSLFMin(const TranslatorArguments &arguments); void extGLSLFMax(const TranslatorArguments &arguments); + void extGLSLReflect(const TranslatorArguments &arguments); public: std::vector compileData(); diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp index b6fbb05..1804d68 100644 --- a/src/translator/src/codes.cpp +++ b/src/translator/src/codes.cpp @@ -4,6 +4,31 @@ #include +usse::RegisterReference CompilerGXP::getRegister(spv::Id id) { + auto varying = idVaryings.find(id); + if (varying != idVaryings.end()) + return getOrThrow(varyingReferences, getOrThrow(idVaryings, id)); + + auto reg = idRegisters.find(id); + if (reg != idRegisters.end()) + return getOrThrow(idRegisters, id).reference; + + auto *constant = maybe_get(id); + if (constant) { + // FP Constant only + auto type = get_type(constant->constant_type); + assert(type.vecsize <= 1 + && type.columns <= 1 + && type.basetype == SPIRType::Float); + int32_t regIndex = usse::getFPConstantIndex(constant->m.c[0].r[0].f32); + if (regIndex != -1) { + return usse::RegisterReference({ usse::Type::Float32, 1, 1 }, usse::RegisterBank::FloatConstant, regIndex); + } + } + + throw std::runtime_error(fmt::format("Cannot find register, varying or constant with id {}.", id)); +} + void CompilerGXP::unimplemented(const TranslatorArguments &arguments) { throw std::runtime_error(fmt::format("{} is not implemented.", arguments.code.name)); } @@ -19,22 +44,15 @@ void CompilerGXP::opLoad(const TranslatorArguments &arguments) { // This is a redirect, but it should really load into temp. // Maybe let the user chose if there want to assume redirect or copy until we can introduce analysis. - if (idVaryings.find(pointer) != idVaryings.end()) - idRegisters[result] = { getOrThrow(varyingReferences, getOrThrow(idVaryings, pointer)) }; - else - idRegisters[result] = getOrThrow(idRegisters, pointer); + idRegisters[result] = { getRegister(pointer) }; } void CompilerGXP::opStore(const TranslatorArguments &arguments) { spv::Id destination = arguments.instruction[0]; spv::Id source = arguments.instruction[1]; - usse::RegisterReference sourceRegister = getOrThrow(idRegisters, source).reference; - usse::RegisterReference destinationRegister; - if (idVaryings.find(destination) != idVaryings.end()) - destinationRegister = getOrThrow(varyingReferences, getOrThrow(idVaryings, destination)); - else - destinationRegister = getOrThrow(idRegisters, destination).reference; + usse::RegisterReference sourceRegister = getRegister(source); + usse::RegisterReference destinationRegister = getRegister(destination); arguments.block.createMov(sourceRegister, destinationRegister); } @@ -45,8 +63,8 @@ void CompilerGXP::opMatrixTimesVector(const TranslatorArguments &arguments) { spv::Id matrix = arguments.instruction[2]; spv::Id vector = arguments.instruction[3]; - usse::RegisterReference matrixRegister = getOrThrow(idRegisters, matrix).reference; - usse::RegisterReference vectorRegister = getOrThrow(idRegisters, vector).reference; + usse::RegisterReference matrixRegister = getRegister(matrix); + usse::RegisterReference vectorRegister = getRegister(vector); assert(matrixRegister.type.type == vectorRegister.type.type); assert(matrixRegister.type.arraySize == vectorRegister.type.components); @@ -67,12 +85,31 @@ void CompilerGXP::opMatrixTimesVector(const TranslatorArguments &arguments) { idRegisters[result] = { temp }; } +void CompilerGXP::opVectorTimesScalar(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id vectorId = arguments.instruction[2]; + spv::Id scalarId = arguments.instruction[3]; + + usse::RegisterReference vector = getRegister(vectorId); + usse::RegisterReference scalar = getRegister(scalarId); + usse::RegisterReference destination = builder.allocateRegister(usse::RegisterBank::Temporary, vector.type); + + scalar.swizzle = std::vector(scalar.type.components, usse::SwizzleChannel::X); + scalar.lockSwizzle = true; + scalar.type.components = vector.type.components; + + arguments.block.createMul(vector, scalar, destination); + + idRegisters[result] = { destination }; +} + void CompilerGXP::opConvertUToF(const TranslatorArguments &arguments) { spv::Id type = arguments.instruction[0]; spv::Id destination = arguments.instruction[1]; spv::Id source = arguments.instruction[2]; - usse::RegisterReference srcReg = getOrThrow(idRegisters, source).reference; + usse::RegisterReference srcReg = getRegister(source); usse::RegisterReference destReg = builder.allocateRegister( usse::RegisterBank::Temporary, { usse::Type::Float32, 4, 1 }); @@ -122,7 +159,7 @@ void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { int32_t constantIndex = usse::getFPConstantIndex(packConstant); if (constantIndex != -1) { source = usse::RegisterReference({ usse::Type::Float32, 1, 1 }, - usse::RegisterBank::FloatConstant, constantIndex, 1); + usse::RegisterBank::FloatConstant, constantIndex); source.lockSwizzle = true; usse::SwizzleVec4 swizzleTemp = usse::getSwizzleVec4All(usse::SwizzleChannel::X); source.swizzle = std::vector(swizzleTemp.begin(), swizzleTemp.end()); @@ -183,8 +220,8 @@ void CompilerGXP::opVectorShuffle(const TranslatorArguments &arguments) { SPIRType type = get_type(typeId); - usse::RegisterReference first = getOrThrow(idRegisters, firstId).reference; - usse::RegisterReference second = getOrThrow(idRegisters, secondId).reference; + usse::RegisterReference first = getRegister(firstId); + usse::RegisterReference second = getRegister(secondId); usse::RegisterReference temp = builder.allocateRegister( usse::RegisterBank::Temporary, translateType(type)); @@ -205,14 +242,30 @@ void CompilerGXP::opVectorShuffle(const TranslatorArguments &arguments) { idRegisters[result] = { temp }; } +void CompilerGXP::opFNegate(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id sourceId = arguments.instruction[2]; + + usse::RegisterReference source = getRegister(sourceId); + usse::RegisterReference destination = builder.allocateRegister(usse::RegisterBank::Temporary, source.type); + + int32_t zeroFP = usse::getFPConstantIndex(0); + usse::RegisterReference zero(source.type, usse::RegisterBank::FloatConstant, zeroFP); + + arguments.block.createSub(zero, source, destination); + + idRegisters[result] = { destination }; +} + void CompilerGXP::opFSub(const TranslatorArguments &arguments) { spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id firstId = arguments.instruction[2]; spv::Id secondId = arguments.instruction[3]; - usse::RegisterReference first = getOrThrow(idRegisters, firstId).reference; - usse::RegisterReference second = getOrThrow(idRegisters, secondId).reference; + usse::RegisterReference first = getRegister(firstId); + usse::RegisterReference second = getRegister(secondId); usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, first.type); @@ -231,8 +284,8 @@ void CompilerGXP::opDot(const TranslatorArguments &arguments) { usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, { usse::Type::Float32, 1, 1 }); - usse::RegisterReference first = getOrThrow(idRegisters, firstId).reference; - usse::RegisterReference second = getOrThrow(idRegisters, secondId).reference; + usse::RegisterReference first = getRegister(firstId); + usse::RegisterReference second = getRegister(secondId); usse::RegisterReference internal = builder.allocateRegister( usse::RegisterBank::Internal, second.type); @@ -253,7 +306,7 @@ void CompilerGXP::opFunctionCall(const TranslatorArguments &arguments) { SPIRFunction function = get(functionId); for (size_t a = 0; a < function.arguments.size(); a++) { - idRegisters[function.arguments[a].id] = getOrThrow(idRegisters, arguments.instruction[3 + a]); + idRegisters[function.arguments[a].id] = { getRegister(arguments.instruction[3 + a]) }; } createFunction(function); @@ -273,7 +326,7 @@ void CompilerGXP::extGLSLNormalize(const TranslatorArguments &arguments) { spv::Id result = arguments.instruction[1]; spv::Id sourceId = arguments.instruction[4]; - usse::RegisterReference source = getOrThrow(idRegisters, sourceId).reference; + usse::RegisterReference source = getRegister(sourceId); usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, source.type); @@ -303,8 +356,8 @@ void CompilerGXP::extGLSLFMin(const TranslatorArguments &arguments) { spv::Id firstId = arguments.instruction[4]; spv::Id secondId = arguments.instruction[5]; - usse::RegisterReference first = getOrThrow(idRegisters, firstId).reference; - usse::RegisterReference second = getOrThrow(idRegisters, secondId).reference; + usse::RegisterReference first = getRegister(firstId); + usse::RegisterReference second = getRegister(secondId); usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, first.type); @@ -319,8 +372,8 @@ void CompilerGXP::extGLSLFMax(const TranslatorArguments &arguments) { spv::Id firstId = arguments.instruction[4]; spv::Id secondId = arguments.instruction[5]; - usse::RegisterReference first = getOrThrow(idRegisters, firstId).reference; - usse::RegisterReference second = getOrThrow(idRegisters, secondId).reference; + usse::RegisterReference first = getRegister(firstId); + usse::RegisterReference second = getRegister(secondId); usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, first.type); @@ -329,6 +382,17 @@ void CompilerGXP::extGLSLFMax(const TranslatorArguments &arguments) { idRegisters[result] = { destination }; } +void CompilerGXP::extGLSLReflect(const TranslatorArguments &arguments) { + spv::Id result = arguments.instruction[1]; + spv::Id firstId = arguments.instruction[4]; + spv::Id secondId = arguments.instruction[5]; + + usse::RegisterReference first = getRegister(firstId); + usse::RegisterReference second = getRegister(secondId); + + assert(false); +} + TranslatorArguments::TranslatorArguments( gxp::Block &block, const TranslatorCode &code, @@ -470,7 +534,7 @@ void CompilerGXP::createTranslators() { { spv::Op::OpBitcast, "OpBitcast", &CompilerGXP::unimplemented }, { static_cast(125), "OpUndefined", &CompilerGXP::undefined }, { spv::Op::OpSNegate, "OpSNegate", &CompilerGXP::unimplemented }, - { spv::Op::OpFNegate, "OpFNegate", &CompilerGXP::unimplemented }, + { spv::Op::OpFNegate, "OpFNegate", &CompilerGXP::opFNegate }, { spv::Op::OpIAdd, "OpIAdd", &CompilerGXP::unimplemented }, { spv::Op::OpFAdd, "OpFAdd", &CompilerGXP::unimplemented }, { spv::Op::OpISub, "OpISub", &CompilerGXP::unimplemented }, @@ -485,7 +549,7 @@ void CompilerGXP::createTranslators() { { spv::Op::OpSMod, "OpSMod", &CompilerGXP::unimplemented }, { spv::Op::OpFRem, "OpFRem", &CompilerGXP::unimplemented }, { spv::Op::OpFMod, "OpFMod", &CompilerGXP::unimplemented }, - { spv::Op::OpVectorTimesScalar, "OpVectorTimesScalar", &CompilerGXP::unimplemented }, + { spv::Op::OpVectorTimesScalar, "OpVectorTimesScalar", &CompilerGXP::opVectorTimesScalar }, { spv::Op::OpMatrixTimesScalar, "OpMatrixTimesScalar", &CompilerGXP::unimplemented }, { spv::Op::OpVectorTimesMatrix, "OpVectorTimesMatrix", &CompilerGXP::unimplemented }, { spv::Op::OpMatrixTimesVector, "OpMatrixTimesVector", &CompilerGXP::opMatrixTimesVector }, @@ -758,6 +822,7 @@ void CompilerGXP::createExtensions() { { GLSLstd450Normalize, &CompilerGXP::extGLSLNormalize }, { GLSLstd450FMin, &CompilerGXP::extGLSLFMin }, { GLSLstd450FMax, &CompilerGXP::extGLSLFMax }, + { GLSLstd450Reflect, &CompilerGXP::extGLSLReflect } } } }; diff --git a/src/translator/src/translator.cpp b/src/translator/src/translator.cpp index dea4e8f..1def09f 100644 --- a/src/translator/src/translator.cpp +++ b/src/translator/src/translator.cpp @@ -327,11 +327,11 @@ std::vector CompilerGXP::compileData() { SPIREntryPoint entryPoint = get_entry_point(entryPoints[0].name, entryPoints[0].execution_model); SPIRFunction entryFunction = get(entryPoint.self); - try { +// try { createFunction(entryFunction); - } catch (std::runtime_error &e) { - fmt::print("{}\n", e.what()); - } +// } catch (std::runtime_error &e) { +// fmt::print("{}\n", e.what()); +// } return builder.build(); } From c2a7fda6f445f87cb9b97ed4d72f119687c00fc6 Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Mon, 30 Sep 2019 09:14:08 -0400 Subject: [PATCH 09/19] Implement Mul/Add/Pow --- src/gxp/include/gxp/builder.h | 20 +- src/gxp/src/builder.cpp | 188 +++++++++++++++--- .../include/translator/translator.h | 19 +- src/translator/src/codes.cpp | 100 +++++++++- src/translator/src/translator.cpp | 17 +- 5 files changed, 289 insertions(+), 55 deletions(-) diff --git a/src/gxp/include/gxp/builder.h b/src/gxp/include/gxp/builder.h index 4fa2fcf..5bd509f 100644 --- a/src/gxp/include/gxp/builder.h +++ b/src/gxp/include/gxp/builder.h @@ -2,8 +2,8 @@ #include +#include #include -#include namespace gxp { typedef uint64_t Instruction; @@ -34,6 +34,10 @@ namespace gxp { usse::RegisterReference first, usse::RegisterReference second, usse::RegisterReference destination); + void createAdd( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination); void createSub( usse::RegisterReference first, usse::RegisterReference second, @@ -42,6 +46,12 @@ namespace gxp { usse::RegisterReference first, usse::RegisterReference second, usse::RegisterReference destination); + void createExp( + usse::RegisterReference source, + usse::RegisterReference destination); + void createLog( + usse::RegisterReference source, + usse::RegisterReference destination); void createReverseSquareRoot( usse::RegisterReference source, usse::RegisterReference destination); @@ -80,8 +90,8 @@ namespace gxp { uint32_t tRegPointer = 0; uint32_t iRegPointer = 0; - std::vector primaryBlocks; - std::vector secondaryBlocks; + std::vector> primaryBlocks; + std::vector> secondaryBlocks; std::vector parameters; std::vector fragmentInputs; public: @@ -96,9 +106,9 @@ namespace gxp { usse::RegisterReference registerParameter(const Parameter ¶meter); - std::unordered_map registerVertexVaryings( + std::map registerVertexVaryings( const std::vector &outputs, const std::vector &texCoords); - std::unordered_map registerFragmentVaryings( + std::map registerFragmentVaryings( const std::vector &inputs /*, samplers...*/); usse::RegisterReference createFragmentOutput(usse::Type type, uint32_t components); diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp index ed9d221..f3c24d4 100644 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -236,6 +236,48 @@ namespace gxp { )); } + void Block::createAdd( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination) { + usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); + usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + uint32_t shift = 0; + uint32_t firstSwizzle = 0; + for (usse::SwizzleChannel channel : first.swizzle) { + firstSwizzle |= static_cast(channel) << shift; + shift += 3; + } + + // First/Second sources are flipped so negative effect can be applied to src1. -x + y = y - x + instructions.push_back(usse::makeVNMAD32( + 0, // pred + 0, // skipinv + (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 + secondBankLayout.extension, // src1_bank_ext + firstBankLayout.extension, // src2_bank_ext + second.getSwizzleIndex(), // src2_swiz + 0, // nosched + destination.getSwizzleMask(), // dest_mask + 0b00, // src1_mod + 0b0, // src2_mod + (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 + destBankLayout.number, // dest_bank_sel + secondBankLayout.number, // src1_bank_sel + firstBankLayout.number, // src2_bank_sel + destBankLayout.getIndex(destination), // dest_n + (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 + static_cast(usse::InstructionVNMADOp::Add), // op2 + secondBankLayout.getIndex(second), // src1_n + firstBankLayout.getIndex(first) // src2_n + )); + } + void Block::createSub( usse::RegisterReference first, usse::RegisterReference second, @@ -319,6 +361,88 @@ namespace gxp { )); } + void Block::createExp( + usse::RegisterReference source, + usse::RegisterReference destination) { + usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + usse::Param typeTable[] = { + 0, // Signed8 - Unsupported + 0, // Signed16 - Unsupported + 0, // Signed32 - Unsupported + 2, // Fixed10 + 1, // Float16 + 0, // Float32 + 0, // Unsigned8 - Unsupported + 0, // Unsigned16 - Unsupported + 0, // Unsigned32 - Unsupported + 0, // Output8 - Unsupported + }; + + instructions.push_back(usse::makeVCOMP( + 0, // pred + 0, // skipinv + typeTable[static_cast(destination.type.type)], // dest_type + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + 0, // end + srcBankLayout.extension, // src1_bank_ext + 0, // repeat_count + 0, // nosched + static_cast(usse::InstructionVCOMPOp::Exponent), // op2 + typeTable[static_cast(source.type.type)], // src_type + 0b00, // src1_mod + static_cast(source.swizzle[0]), // src_comp + destBankLayout.number, // dest_bank + srcBankLayout.number, // src1_bank + destBankLayout.getIndex(destination), // dest_n + srcBankLayout.getIndex(source), // src1_n + destination.getSwizzleMask() // write_mask + )); + } + + void Block::createLog( + usse::RegisterReference source, + usse::RegisterReference destination) { + usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + usse::Param typeTable[] = { + 0, // Signed8 - Unsupported + 0, // Signed16 - Unsupported + 0, // Signed32 - Unsupported + 2, // Fixed10 + 1, // Float16 + 0, // Float32 + 0, // Unsigned8 - Unsupported + 0, // Unsigned16 - Unsupported + 0, // Unsigned32 - Unsupported + 0, // Output8 - Unsupported + }; + + instructions.push_back(usse::makeVCOMP( + 0, // pred + 0, // skipinv + typeTable[static_cast(destination.type.type)], // dest_type + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + 0, // end + srcBankLayout.extension, // src1_bank_ext + 0, // repeat_count + 0, // nosched + static_cast(usse::InstructionVCOMPOp::Logarithm), // op2 + typeTable[static_cast(source.type.type)], // src_type + 0b00, // src1_mod + static_cast(source.swizzle[0]), // src_comp + destBankLayout.number, // dest_bank + srcBankLayout.number, // src1_bank + destBankLayout.getIndex(destination), // dest_n + srcBankLayout.getIndex(source), // src1_n + destination.getSwizzleMask() // write_mask + )); + } + void Block::createReverseSquareRoot( usse::RegisterReference source, usse::RegisterReference destination) { @@ -457,16 +581,22 @@ namespace gxp { Block *Builder::createPrimaryBlock() { size_t index = primaryBlocks.size(); - primaryBlocks.push_back(Block(*this)); + primaryBlocks.push_back(std::unique_ptr(new Block(*this))); - return &primaryBlocks[index]; + return primaryBlocks[index].get(); } Block *Builder::createSecondaryBlock() { size_t index = secondaryBlocks.size(); - secondaryBlocks.push_back(Block(*this)); + secondaryBlocks.push_back(std::unique_ptr(new Block(*this))); + + return secondaryBlocks[index].get(); + } - return &secondaryBlocks[index]; + // Odd index can only reference vec3 (.yzw?) + // For vec4, index must be even. + static bool needsAllocOffset(uint32_t index, uint32_t size) { + return size == 4 && index % 2 == 1; } usse::RegisterReference Builder::allocateRegister(usse::RegisterBank bank, usse::DataType type) { @@ -475,24 +605,24 @@ namespace gxp { switch (bank) { case usse::RegisterBank::Primary: - index = paRegPointer; - paRegPointer += size; + index = paRegPointer + needsAllocOffset(paRegPointer, size); + paRegPointer += size + needsAllocOffset(paRegPointer, size); break; case usse::RegisterBank::Secondary: - index = saRegPointer; - saRegPointer += size; + index = saRegPointer + needsAllocOffset(saRegPointer, size); + saRegPointer += size + needsAllocOffset(saRegPointer, size); break; case usse::RegisterBank::Output: - index = oRegPointer; - oRegPointer += size; + index = oRegPointer + needsAllocOffset(oRegPointer, size); + oRegPointer += size + needsAllocOffset(oRegPointer, size); break; case usse::RegisterBank::Temporary: - index = tRegPointer; - tRegPointer += size; + index = tRegPointer + needsAllocOffset(tRegPointer, size); + tRegPointer += size + needsAllocOffset(tRegPointer, size); break; case usse::RegisterBank::Internal: - index = iRegPointer; - iRegPointer += size; + index = iRegPointer + needsAllocOffset(iRegPointer, size); + iRegPointer += size + needsAllocOffset(iRegPointer, size); break; default: throw std::runtime_error("Missing allocation method for bank."); @@ -521,7 +651,7 @@ namespace gxp { return reg; } - std::unordered_map Builder::registerVertexVaryings( + std::map Builder::registerVertexVaryings( const std::vector &outputs, const std::vector &texCoords) { varyings.varyings_count = outputs.size() + texCoords.size(); @@ -549,7 +679,7 @@ namespace gxp { varyings.vertex_outputs2 |= texCoordBits << (texCoordIndex * 3u); } - std::unordered_map references; + std::map references; for (auto a = static_cast(ProgramVarying::Position); a < static_cast(ProgramVarying::TexCoord0); a++) { @@ -589,9 +719,9 @@ namespace gxp { return references; } - std::unordered_map Builder::registerFragmentVaryings( + std::map Builder::registerFragmentVaryings( const std::vector &inputs) { - std::unordered_map references; + std::map references; for (ProgramVectorInfo varying : inputs) { usse::DataType type = {usse::Type::Float32, varying.components, 1 }; @@ -705,11 +835,11 @@ namespace gxp { header.tempRegCount2 = tRegPointer; // Difference between both reg counts? { header.secondaryProgramOffset = data.size() - OFFSET_OF(header, secondaryProgramOffset); - for (const Block &block : secondaryBlocks) { + for (const std::unique_ptr &block : secondaryBlocks) { data.insert(data.end(), - reinterpret_cast(block.instructions.data()), - reinterpret_cast(block.instructions.data()) - + block.instructions.size() * sizeof(usse::Instruction)); + reinterpret_cast(block->instructions.data()), + reinterpret_cast(block->instructions.data()) + + block->instructions.size() * sizeof(usse::Instruction)); } header.secondaryProgramOffsetEnd = data.size() - OFFSET_OF(header, secondaryProgramOffsetEnd); @@ -730,18 +860,18 @@ namespace gxp { 0, // exe_addr_high 0, // src1_n_or_exe_addr_mid 0 // src2_n_or_exe_addr_low - ); + ); data.insert(data.end(), reinterpret_cast(&phase), reinterpret_cast(&phase) + sizeof(phase)); header.primaryProgramInstructionCount++; - for (const Block &block : primaryBlocks) { - header.primaryProgramInstructionCount += block.instructions.size(); + for (const std::unique_ptr &block : primaryBlocks) { + header.primaryProgramInstructionCount += block->instructions.size(); data.insert(data.end(), - reinterpret_cast(block.instructions.data()), - reinterpret_cast(block.instructions.data()) - + block.instructions.size() * sizeof(usse::Instruction)); + reinterpret_cast(block->instructions.data()), + reinterpret_cast(block->instructions.data()) + + block->instructions.size() * sizeof(usse::Instruction)); } usse::BankLayout emitLayout = usse::BankLayout::srcLayout(usse::RegisterBank::Immediate); @@ -766,7 +896,7 @@ namespace gxp { 0, // src0_n 0, // src1_n 0 // src2_n - ); + ); data.insert(data.end(), reinterpret_cast(&emit), reinterpret_cast(&emit) + sizeof(emit)); diff --git a/src/translator/include/translator/translator.h b/src/translator/include/translator/translator.h index e3ea315..6774cff 100644 --- a/src/translator/include/translator/translator.h +++ b/src/translator/include/translator/translator.h @@ -3,7 +3,7 @@ #include #include -#include +#include namespace gxp { class Block; } class CompilerGXP; @@ -19,7 +19,7 @@ class TranslatorCode { }; class TranslatorArguments { - TranslatorArguments( + explicit TranslatorArguments( gxp::Block &block, const TranslatorCode &code, const uint32_t *instruction, @@ -44,11 +44,11 @@ class CompilerGXP : public Compiler { gxp::Builder builder; std::vector codes; - std::unordered_map> extensions; + std::map> extensions; - std::unordered_map idVaryings; - std::unordered_map idRegisters; - std::unordered_map varyingReferences; + std::map idVaryings; + std::map idRegisters; + std::map varyingReferences; static usse::Type translateType(SPIRType::BaseType baseType); static usse::DataType translateType(const SPIRType &type); @@ -66,8 +66,8 @@ class CompilerGXP : public Compiler { usse::RegisterReference getRegister(spv::Id id); - void createBlock(const SPIRBlock &block); - void createFunction(const SPIRFunction &function); + spv::Id createBlock(const SPIRBlock &block); + spv::Id createFunction(const SPIRFunction &function); void createVertexShaderResources(); void createFragmentShaderResources(); @@ -88,7 +88,9 @@ class CompilerGXP : public Compiler { void opAccessChain(const TranslatorArguments &arguments); void opVectorShuffle(const TranslatorArguments &arguments); void opFNegate(const TranslatorArguments &arguments); + void opFAdd(const TranslatorArguments &arguments); void opFSub(const TranslatorArguments &arguments); + void opFMul(const TranslatorArguments &arguments); void opDot(const TranslatorArguments &arguments); void opFunctionCall(const TranslatorArguments &arguments); void opExtInst(const TranslatorArguments &arguments); @@ -98,6 +100,7 @@ class CompilerGXP : public Compiler { void extGLSLFMin(const TranslatorArguments &arguments); void extGLSLFMax(const TranslatorArguments &arguments); void extGLSLReflect(const TranslatorArguments &arguments); + void extGLSLPow(const TranslatorArguments &arguments); public: std::vector compileData(); diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp index 1804d68..7688159 100644 --- a/src/translator/src/codes.cpp +++ b/src/translator/src/codes.cpp @@ -121,14 +121,12 @@ void CompilerGXP::opConvertUToF(const TranslatorArguments &arguments) { void CompilerGXP::opCompositeExtract(const TranslatorArguments &arguments) { spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; - spv::Id source = arguments.instruction[2]; + spv::Id sourceId = arguments.instruction[2]; uint32_t index = arguments.instruction[3]; - auto reg = idRegisters.find(source); - if (reg == idRegisters.end()) - throw std::runtime_error(fmt::format("Source ID {} was not loaded with a register reference.", source)); + usse::RegisterReference source = getRegister(sourceId); - idRegisters[result] = { reg->second.reference.getComponents(index, 1) }; + idRegisters[result] = { source.getComponents(index, 1) }; } void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { @@ -258,6 +256,23 @@ void CompilerGXP::opFNegate(const TranslatorArguments &arguments) { idRegisters[result] = { destination }; } +void CompilerGXP::opFAdd(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id firstId = arguments.instruction[2]; + spv::Id secondId = arguments.instruction[3]; + + usse::RegisterReference first = getRegister(firstId); + usse::RegisterReference second = getRegister(secondId); + + usse::RegisterReference destination = builder.allocateRegister( + usse::RegisterBank::Temporary, first.type); + + arguments.block.createAdd(first, second, destination); + + idRegisters[result] = { destination }; +} + void CompilerGXP::opFSub(const TranslatorArguments &arguments) { spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; @@ -275,6 +290,23 @@ void CompilerGXP::opFSub(const TranslatorArguments &arguments) { idRegisters[result] = { destination }; } +void CompilerGXP::opFMul(const TranslatorArguments &arguments) { + spv::Id typeId = arguments.instruction[0]; + spv::Id result = arguments.instruction[1]; + spv::Id firstId = arguments.instruction[2]; + spv::Id secondId = arguments.instruction[3]; + + usse::RegisterReference first = getRegister(firstId); + usse::RegisterReference second = getRegister(secondId); + + usse::RegisterReference destination = builder.allocateRegister( + usse::RegisterBank::Temporary, first.type); + + arguments.block.createMul(first, second, destination); + + idRegisters[result] = { destination }; +} + void CompilerGXP::opDot(const TranslatorArguments &arguments) { spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; @@ -309,7 +341,9 @@ void CompilerGXP::opFunctionCall(const TranslatorArguments &arguments) { idRegisters[function.arguments[a].id] = { getRegister(arguments.instruction[3 + a]) }; } - createFunction(function); + spv::Id returnValue = createFunction(function); + if (returnValue != 0) + idRegisters[result] = { getRegister(returnValue) }; } void CompilerGXP::opExtInst(const TranslatorArguments &arguments) { @@ -390,7 +424,52 @@ void CompilerGXP::extGLSLReflect(const TranslatorArguments &arguments) { usse::RegisterReference first = getRegister(firstId); usse::RegisterReference second = getRegister(secondId); - assert(false); + usse::RegisterReference internal = builder.allocateRegister( + usse::RegisterBank::Internal, second.type); + usse::RegisterReference magnitude = builder.allocateRegister( + usse::RegisterBank::Internal, { usse::Type::Float32, 1, 1 }); + usse::RegisterReference destination = builder.allocateRegister( + usse::RegisterBank::Temporary, second.type); + + magnitude.swizzle = std::vector(second.type.components, usse::SwizzleChannel::X); + magnitude.lockSwizzle = true; + + usse::RegisterReference two({ usse::Type::Float32, 1, 1 }, + usse::RegisterBank::FloatConstant, usse::getFPConstantIndex(2)); + two.swizzle = std::vector(second.type.components, usse::SwizzleChannel::X); + two.lockSwizzle = true; + + arguments.block.createPack(second, internal); + arguments.block.createDot(first, internal, magnitude); + arguments.block.createMul(two, magnitude, magnitude); + arguments.block.createMul(magnitude, first, destination); + arguments.block.createSub(internal, destination, destination); + + builder.freeRegister(magnitude); + builder.freeRegister(internal); + + idRegisters[result] = { destination }; +} + +void CompilerGXP::extGLSLPow(const TranslatorArguments &arguments) { + spv::Id result = arguments.instruction[1]; + spv::Id firstId = arguments.instruction[4]; + spv::Id secondId = arguments.instruction[5]; + + usse::RegisterReference first = getRegister(firstId); + usse::RegisterReference second = getRegister(secondId); + + usse::RegisterReference destination = builder.allocateRegister( + usse::RegisterBank::Temporary, { usse::Type::Float32, 1, 1 }); + + // Thank you xyz for doing my math homework. + // e^(b*log(a)) + + arguments.block.createLog(first, destination); + arguments.block.createMul(destination, second, destination); + arguments.block.createExp(destination, destination); + + idRegisters[result] = { destination }; } TranslatorArguments::TranslatorArguments( @@ -536,11 +615,11 @@ void CompilerGXP::createTranslators() { { spv::Op::OpSNegate, "OpSNegate", &CompilerGXP::unimplemented }, { spv::Op::OpFNegate, "OpFNegate", &CompilerGXP::opFNegate }, { spv::Op::OpIAdd, "OpIAdd", &CompilerGXP::unimplemented }, - { spv::Op::OpFAdd, "OpFAdd", &CompilerGXP::unimplemented }, + { spv::Op::OpFAdd, "OpFAdd", &CompilerGXP::opFAdd }, { spv::Op::OpISub, "OpISub", &CompilerGXP::unimplemented }, { spv::Op::OpFSub, "OpFSub", &CompilerGXP::opFSub }, { spv::Op::OpIMul, "OpIMul", &CompilerGXP::unimplemented }, - { spv::Op::OpFMul, "OpFMul", &CompilerGXP::unimplemented }, + { spv::Op::OpFMul, "OpFMul", &CompilerGXP::opFMul }, { spv::Op::OpUDiv, "OpUDiv", &CompilerGXP::unimplemented }, { spv::Op::OpSDiv, "OpSDiv", &CompilerGXP::unimplemented }, { spv::Op::OpFDiv, "OpFDiv", &CompilerGXP::unimplemented }, @@ -822,7 +901,8 @@ void CompilerGXP::createExtensions() { { GLSLstd450Normalize, &CompilerGXP::extGLSLNormalize }, { GLSLstd450FMin, &CompilerGXP::extGLSLFMin }, { GLSLstd450FMax, &CompilerGXP::extGLSLFMax }, - { GLSLstd450Reflect, &CompilerGXP::extGLSLReflect } + { GLSLstd450Reflect, &CompilerGXP::extGLSLReflect }, + { GLSLstd450Pow, &CompilerGXP::extGLSLPow } } } }; diff --git a/src/translator/src/translator.cpp b/src/translator/src/translator.cpp index 1def09f..9b2f347 100644 --- a/src/translator/src/translator.cpp +++ b/src/translator/src/translator.cpp @@ -153,7 +153,7 @@ TranslatorReference CompilerGXP::createParameter(gxp::ParameterCategory category } } -void CompilerGXP::createBlock(const SPIRBlock &block) { +spv::Id CompilerGXP::createBlock(const SPIRBlock &block) { gxp::Block *gxpBlock = builder.createPrimaryBlock(); for (Instruction instruction : block.ops) { @@ -168,19 +168,30 @@ void CompilerGXP::createBlock(const SPIRBlock &block) { (this->*code.implementation)(arguments); } + + if (block.terminator == SPIRBlock::Return) + return block.return_value; + + return 0; } -void CompilerGXP::createFunction(const SPIRFunction &function) { +spv::Id CompilerGXP::createFunction(const SPIRFunction &function) { for (uint32_t local : function.local_variables) { SPIRType type = get_type_from_variable(local); idRegisters[local] = createVariable(usse::RegisterBank::Temporary, type); } + spv::Id out = 0; + for (uint32_t blockId : function.blocks) { auto &block = get(blockId); - createBlock(block); + spv::Id temp = createBlock(block); + if (temp != 0) + out = temp; } + + return out; } void CompilerGXP::createVertexShaderResources() { From d29efe5d48e036001693cb134962a6abf1632202 Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Mon, 30 Sep 2019 14:10:53 -0400 Subject: [PATCH 10/19] Disassembly support --- src/gxp/CMakeLists.txt | 2 + src/gxp/include/gxp/disasm.h | 12 ++ src/gxp/include/gxp/usse.h | 46 +++---- src/gxp/src/builder.cpp | 260 ++++++++++++++++++----------------- src/gxp/src/disasm.cpp | 74 ++++++++++ src/gxp/src/usse.cpp | 19 +-- src/translator/src/codes.cpp | 63 ++++----- 7 files changed, 291 insertions(+), 185 deletions(-) create mode 100644 src/gxp/include/gxp/disasm.h create mode 100644 src/gxp/src/disasm.cpp diff --git a/src/gxp/CMakeLists.txt b/src/gxp/CMakeLists.txt index ee2a46f..508fce9 100644 --- a/src/gxp/CMakeLists.txt +++ b/src/gxp/CMakeLists.txt @@ -1,11 +1,13 @@ add_library(gxp include/gxp/usse.h include/gxp/instructions.h + include/gxp/disasm.h include/gxp/gxp.h include/gxp/builder.h src/usse.cpp src/instructions.cpp + src/disasm.cpp src/gxp.cpp src/builder.cpp) target_include_directories(gxp PUBLIC include) diff --git a/src/gxp/include/gxp/disasm.h b/src/gxp/include/gxp/disasm.h new file mode 100644 index 0000000..dc27cac --- /dev/null +++ b/src/gxp/include/gxp/disasm.h @@ -0,0 +1,12 @@ +#pragma once + +#include + +#include + +namespace usse::disasm { + std::string disassembleReference(usse::RegisterReference reference); + std::string disassemble(const std::string &name, + const std::vector &sources, + const usse::RegisterReference *destination = nullptr); +} diff --git a/src/gxp/include/gxp/usse.h b/src/gxp/include/gxp/usse.h index 8cb3ce2..eff1abb 100644 --- a/src/gxp/include/gxp/usse.h +++ b/src/gxp/include/gxp/usse.h @@ -71,6 +71,28 @@ namespace usse { typedef std::array SwizzleVec3; typedef std::array SwizzleVec4; + bool areSwizzlesInMatchingHalf(SwizzleChannel x, SwizzleChannel y); + int32_t getFPConstantIndex(float constant); + int32_t getSwizzleScalarIndex(SwizzleChannel element); + int32_t getSwizzleVec3Index(SwizzleVec3 elements, bool extended = false); + int32_t getSwizzleVec4Index(SwizzleVec4 elements, bool extended = false); + + inline SwizzleVec3 getSwizzleVec3All(SwizzleChannel channel) { + return { channel, channel, channel }; + } + + inline SwizzleVec4 getSwizzleVec4All(SwizzleChannel channel) { + return { channel, channel, channel, channel }; + } + + inline SwizzleVec3 getSwizzleVec3Default() { + return { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Z }; + } + + inline SwizzleVec4 getwizzleVec4Default() { + return { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::W }; + } + class DataType { public: Type type = usse::Type::Float32; @@ -85,7 +107,7 @@ namespace usse { uint32_t index = 0; uint32_t size = 1; bool lockSwizzle = false; - std::vector swizzle; + usse::SwizzleVec4 swizzle = usse::getSwizzleVec4All(usse::SwizzleChannel::DontCare); RegisterReference operator+(uint32_t value); uint32_t getSwizzleMask(); @@ -116,26 +138,4 @@ namespace usse { std::string getTypeName(Type type); uint32_t getTypeSize(Type type); std::string getBankName(RegisterBank bank); - - bool areSwizzlesInMatchingHalf(SwizzleChannel x, SwizzleChannel y); - int32_t getFPConstantIndex(float constant); - int32_t getSwizzleScalarIndex(SwizzleChannel element); - int32_t getSwizzleVec3Index(SwizzleVec3 elements, bool extended = false); - int32_t getSwizzleVec4Index(SwizzleVec4 elements, bool extended = false); - - inline SwizzleVec3 getSwizzleVec3All(SwizzleChannel channel) { - return { channel, channel, channel }; - } - - inline SwizzleVec4 getSwizzleVec4All(SwizzleChannel channel) { - return { channel, channel, channel, channel }; - } - - inline SwizzleVec3 getSwizzleVec3Default() { - return { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Z }; - } - - inline SwizzleVec4 getwizzleVec4Default() { - return { SwizzleChannel::X, SwizzleChannel::Y, SwizzleChannel::Z, SwizzleChannel::W }; - } } diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp index f3c24d4..9d2ab54 100644 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -2,6 +2,7 @@ #include #include +#include #include @@ -10,94 +11,94 @@ namespace gxp { constexpr uint16_t containerIndexSA = 14; - class MovRegisterData { - public: - uint8_t destMask = 0; - int32_t swizzleIndex = 0; - usse::RegisterReference source; - usse::RegisterReference destination; - }; - - std::vector splitRegisterF32(usse::RegisterReference source, usse::RegisterReference destination) { - std::vector data; - - uint32_t destMask = destination.getSwizzleMask(); - - for (uint32_t a = 0; a < 2; a++) { - uint8_t mask = (destMask & (0b11u << (a * 2))) >> (a * 2); - - usse::RegisterReference sourceHalf = source.getComponents(a * 2, 2); - usse::RegisterReference destinationHalf = destination.getComponents(a * 2, 2); - - if (mask & 0b01u) { - if (mask & 0b10u) { - if (usse::areSwizzlesInMatchingHalf(sourceHalf.swizzle[0], sourceHalf.swizzle[1])) { - data.push_back({ - mask, - usse::getSwizzleVec4Index({ - sourceHalf.swizzle[0], - sourceHalf.swizzle[1], - usse::SwizzleChannel::DontCare, - usse::SwizzleChannel::DontCare, - }), - source.getComponents(a * 2, 2), // is this source index right? - destination.getComponents(a * 2, 2) - }); - } else { - // X and Y swizzle seperately - data.push_back({ - 0b01, - usse::getSwizzleVec4Index({ - sourceHalf.swizzle[0], - usse::SwizzleChannel::DontCare, - usse::SwizzleChannel::DontCare, - usse::SwizzleChannel::DontCare, - }), - source.getComponents(a * 2, 2), // copy both components anyway - destination.getComponents(a * 2, 2) - }); - data.push_back({ - 0b10, - usse::getSwizzleVec4Index({ - usse::SwizzleChannel::DontCare, - sourceHalf.swizzle[1], - usse::SwizzleChannel::DontCare, - usse::SwizzleChannel::DontCare, - }), - source.getComponents(a * 2, 2), // copy both components anyway - destination.getComponents(a * 2, 2) - }); - } - } else { - data.push_back({ - 0b01, - usse::getSwizzleVec4Index({ - sourceHalf.swizzle[0], - usse::SwizzleChannel::DontCare, - usse::SwizzleChannel::DontCare, - usse::SwizzleChannel::DontCare, - }), - source.getComponents(a * 2, 2), // copy both components anyway - destination.getComponents(a * 2, 2) - }); - } - } else if (mask & 0b10u) { - data.push_back({ - 0b10, - usse::getSwizzleVec4Index({ - usse::SwizzleChannel::DontCare, - sourceHalf.swizzle[1], - usse::SwizzleChannel::DontCare, - usse::SwizzleChannel::DontCare, - }), - source.getComponents(a * 2, 2), // copy both components anyway - destination.getComponents(a * 2, 2) - }); - } - } - - return data; - } +// class MovRegisterData { +// public: +// uint8_t destMask = 0; +// int32_t swizzleIndex = 0; +// usse::RegisterReference source; +// usse::RegisterReference destination; +// }; +// +// std::vector splitRegisterF32(usse::RegisterReference source, usse::RegisterReference destination) { +// std::vector data; +// +// uint32_t destMask = destination.getSwizzleMask(); +// +// for (uint32_t a = 0; a < 2; a++) { +// uint8_t mask = (destMask & (0b11u << (a * 2))) >> (a * 2); +// +// usse::RegisterReference sourceHalf = source.getComponents(a * 2, 2); +// usse::RegisterReference destinationHalf = destination.getComponents(a * 2, 2); +// +// if (mask & 0b01u) { +// if (mask & 0b10u) { +// if (usse::areSwizzlesInMatchingHalf(sourceHalf.swizzle[0], sourceHalf.swizzle[1])) { +// data.push_back({ +// mask, +// usse::getSwizzleVec4Index({ +// sourceHalf.swizzle[0], +// sourceHalf.swizzle[1], +// usse::SwizzleChannel::DontCare, +// usse::SwizzleChannel::DontCare, +// }), +// source.getComponents(a * 2, 2), // is this source index right? +// destination.getComponents(a * 2, 2) +// }); +// } else { +// // X and Y swizzle seperately +// data.push_back({ +// 0b01, +// usse::getSwizzleVec4Index({ +// sourceHalf.swizzle[0], +// usse::SwizzleChannel::DontCare, +// usse::SwizzleChannel::DontCare, +// usse::SwizzleChannel::DontCare, +// }), +// source.getComponents(a * 2, 2), // copy both components anyway +// destination.getComponents(a * 2, 2) +// }); +// data.push_back({ +// 0b10, +// usse::getSwizzleVec4Index({ +// usse::SwizzleChannel::DontCare, +// sourceHalf.swizzle[1], +// usse::SwizzleChannel::DontCare, +// usse::SwizzleChannel::DontCare, +// }), +// source.getComponents(a * 2, 2), // copy both components anyway +// destination.getComponents(a * 2, 2) +// }); +// } +// } else { +// data.push_back({ +// 0b01, +// usse::getSwizzleVec4Index({ +// sourceHalf.swizzle[0], +// usse::SwizzleChannel::DontCare, +// usse::SwizzleChannel::DontCare, +// usse::SwizzleChannel::DontCare, +// }), +// source.getComponents(a * 2, 2), // copy both components anyway +// destination.getComponents(a * 2, 2) +// }); +// } +// } else if (mask & 0b10u) { +// data.push_back({ +// 0b10, +// usse::getSwizzleVec4Index({ +// usse::SwizzleChannel::DontCare, +// sourceHalf.swizzle[1], +// usse::SwizzleChannel::DontCare, +// usse::SwizzleChannel::DontCare, +// }), +// source.getComponents(a * 2, 2), // copy both components anyway +// destination.getComponents(a * 2, 2) +// }); +// } +// } +// +// return data; +// } void Builder::setType(gxp::ShaderType type) { header.type = static_cast(type); @@ -108,46 +109,50 @@ namespace gxp { } void Block::createNop() { + usse::disasm::disassemble("nop", { }); instructions.push_back(usse::makeNOP()); } - void Block::createMov(usse::RegisterReference source, usse::RegisterReference destination) { + void Block::createMov( + usse::RegisterReference source, + usse::RegisterReference destination) { usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - const auto movs = splitRegisterF32(source, destination); - - for (const auto &mov : movs) { - instructions.push_back(usse::makeVMOV( - 0, // pred - 0, // skipinv - 0, // test_bit_2 - 0, // src0_comp_sel - 0, // syncstart - destBankLayout.extension, // dest_bank_ext - 0, // end_or_src0_bank_ext - srcBankLayout.extension, // src1_bank_ext - 0, // src2_bank_ext - 0, // move_type - 0, // repeat_count - 0, // nosched - static_cast(destination.type.type) & 0b111u, // move_data_type - 0, // test_bit_1 - mov.swizzleIndex, // src0_swiz - 0, // src0_bank_sel - destBankLayout.number, // dest_bank_sel - srcBankLayout.number, // src1_bank_sel - 0, // src2_bank_sel - mov.destMask, // dest_mask - destBankLayout.getIndex(mov.destination), // dest_n - 0, // src0_n - srcBankLayout.getIndex(mov.source), // src1_n - 0 // src2_n - )); - } + assert(source.type.components <= 2 && destination.type.components <= 2); + + fmt::print("{}\n", usse::disasm::disassemble("mov", { source }, &destination)); + instructions.push_back(usse::makeVMOV( + 0, // pred + 0, // skipinv + 0, // test_bit_2 + 0, // src0_comp_sel + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + 0, // end_or_src0_bank_ext + srcBankLayout.extension, // src1_bank_ext + 0, // src2_bank_ext + 0, // move_type + 0, // repeat_count + 0, // nosched + static_cast(destination.type.type) & 0b111u, // move_data_type + 0, // test_bit_1 + source.getSwizzleIndex(), // src0_swiz + 0, // src0_bank_sel + destBankLayout.number, // dest_bank_sel + srcBankLayout.number, // src1_bank_sel + 0, // src2_bank_sel + destination.getSwizzleMask(), // dest_mask + destBankLayout.getIndex(destination), // dest_n + 0, // src0_n + srcBankLayout.getIndex(source), // src1_n + 0 // src2_n + )); } - void Block::createPack(usse::RegisterReference source, usse::RegisterReference destination) { + void Block::createPack( + usse::RegisterReference source, + usse::RegisterReference destination) { usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); @@ -168,6 +173,7 @@ namespace gxp { 2, // Output8 }; + fmt::print("{}\n", usse::disasm::disassemble("pck", { source }, &destination)); instructions.push_back(usse::makeVPCK( 0, // pred 0, // skipinv @@ -207,6 +213,7 @@ namespace gxp { assert(second.bank == usse::RegisterBank::Internal); + fmt::print("{}\n", usse::disasm::disassemble("dot", { first, second }, &destination)); instructions.push_back(usse::makeVDP( 0, // pred 0, // skipinv @@ -251,7 +258,7 @@ namespace gxp { shift += 3; } - // First/Second sources are flipped so negative effect can be applied to src1. -x + y = y - x + fmt::print("{}\n", usse::disasm::disassemble("add", { first, second }, &destination)); instructions.push_back(usse::makeVNMAD32( 0, // pred 0, // skipinv @@ -294,6 +301,7 @@ namespace gxp { } // First/Second sources are flipped so negative effect can be applied to src1. -x + y = y - x + fmt::print("{}\n", usse::disasm::disassemble("sub", { first, second }, &destination)); instructions.push_back(usse::makeVNMAD32( 0, // pred 0, // skipinv @@ -335,6 +343,7 @@ namespace gxp { shift += 3; } + fmt::print("{}\n", usse::disasm::disassemble("mul", { first, second }, &destination)); instructions.push_back(usse::makeVNMAD32( 0, // pred 0, // skipinv @@ -380,6 +389,7 @@ namespace gxp { 0, // Output8 - Unsupported }; + fmt::print("{}\n", usse::disasm::disassemble("exp", { source }, &destination)); instructions.push_back(usse::makeVCOMP( 0, // pred 0, // skipinv @@ -421,6 +431,7 @@ namespace gxp { 0, // Output8 - Unsupported }; + fmt::print("{}\n", usse::disasm::disassemble("log", { source }, &destination)); instructions.push_back(usse::makeVCOMP( 0, // pred 0, // skipinv @@ -462,6 +473,7 @@ namespace gxp { 0, // Output8 - Unsupported }; + fmt::print("{}\n", usse::disasm::disassemble("rsq", { source }, &destination)); instructions.push_back(usse::makeVCOMP( 0, // pred 0, // skipinv @@ -499,6 +511,7 @@ namespace gxp { shift += 3; } + fmt::print("{}\n", usse::disasm::disassemble("min", { first, second }, &destination)); instructions.push_back(usse::makeVNMAD32( 0, // pred 0, // skipinv @@ -540,6 +553,7 @@ namespace gxp { shift += 3; } + fmt::print("{}\n", usse::disasm::disassemble("max", { first, second }, &destination)); instructions.push_back(usse::makeVNMAD32( 0, // pred 0, // skipinv @@ -628,8 +642,8 @@ namespace gxp { throw std::runtime_error("Missing allocation method for bank."); } - fmt::print("Allocating {} registers of type {} (vec{}[{}]), size {} at index {}.\n", - usse::getBankName(bank), usse::getTypeName(type.type), type.components, type.arraySize, size, index); +// fmt::print("Allocating {} registers of type {} (vec{}[{}]), size {} at index {}.\n", +// usse::getBankName(bank), usse::getTypeName(type.type), type.components, type.arraySize, size, index); return usse::RegisterReference(type, bank, index); } diff --git a/src/gxp/src/disasm.cpp b/src/gxp/src/disasm.cpp new file mode 100644 index 0000000..66a2f9d --- /dev/null +++ b/src/gxp/src/disasm.cpp @@ -0,0 +1,74 @@ +#include + +#include + +#include + +namespace usse::disasm { + static std::string getBankRegisterName(usse::RegisterBank bank) { + switch (bank) { + case RegisterBank::Temporary: return "r"; + case RegisterBank::Primary: return "pa"; + case RegisterBank::Output: return "o"; + case RegisterBank::Secondary: return "sa"; + case RegisterBank::Internal: return "i"; + case RegisterBank::Special: return "s"; + case RegisterBank::Global: return "g"; + case RegisterBank::FloatConstant: return "fp"; + case RegisterBank::Immediate: return "#"; + case RegisterBank::Index: return "index"; + case RegisterBank::Indexed1: return "indexed[1]"; + case RegisterBank::Indexed2: return "indexed[2]"; + case RegisterBank::Predicate: return "p"; + case RegisterBank::Invalid: return "invalid"; + default: return "unknown"; + } + } + + static std::string getSwizzleName(usse::SwizzleChannel channel) { + switch (channel) { + case SwizzleChannel::X: return "x"; + case SwizzleChannel::Y: return "y"; + case SwizzleChannel::Z: return "z"; + case SwizzleChannel::W: return "w"; + case SwizzleChannel::Zero: return "0"; + case SwizzleChannel::One: return "1"; + case SwizzleChannel::Two: return "2"; + case SwizzleChannel::Half: return "h"; + case SwizzleChannel::DontCare: return "*"; + } + } + + static std::string getFullSwizzleName(const SwizzleVec4 &swizzle, uint32_t count) { + std::stringstream stream; + + for (uint32_t a = 0; a < count; a++) { + stream << getSwizzleName(swizzle[a]); + } + + return stream.str(); + } + + std::string disassembleReference(usse::RegisterReference reference) { + return fmt::format("{}{}.{}", getBankRegisterName(reference.bank), reference.index, + getFullSwizzleName(reference.swizzle, reference.type.components)); + } + + std::string disassemble(const std::string &name, + const std::vector &sources, + const usse::RegisterReference *destination) { + std::stringstream stream; + + stream << name; + + if (destination) { + stream << " " << disassembleReference(*destination); + } + + for (const usse::RegisterReference &source : sources) { + stream << " " << disassembleReference(source); + } + + return stream.str(); + } +} diff --git a/src/gxp/src/usse.cpp b/src/gxp/src/usse.cpp index 924c953..f8b1ced 100644 --- a/src/gxp/src/usse.cpp +++ b/src/gxp/src/usse.cpp @@ -227,7 +227,8 @@ namespace usse { uint32_t RegisterReference::getSwizzleMask() { uint32_t mask = 0; - for (SwizzleChannel channel : swizzle) { + for (uint32_t a = 0; a < type.components; a++) { + SwizzleChannel channel = swizzle[a]; switch (channel) { case SwizzleChannel::X: mask |= 0b0001u; @@ -241,6 +242,8 @@ namespace usse { case SwizzleChannel::W: mask |= 0b1000u; break; + case SwizzleChannel::DontCare: + break; default: throw std::runtime_error("Unimplemented swizzle."); } @@ -293,12 +296,12 @@ namespace usse { ref.type.components = count; ref.type.arraySize = 1; - ref.swizzle.clear(); + ref.swizzle = usse::getSwizzleVec4All(SwizzleChannel::DontCare); for (uint32_t a = 0; a < count; a++) { if (lockSwizzle) - ref.swizzle.push_back(swizzle[component + a]); + ref.swizzle[a] = swizzle[component + a]; else - ref.swizzle.push_back(static_cast(component + a + swizzleOffset)); + ref.swizzle[a] = static_cast(component + a + swizzleOffset); } return ref; @@ -323,8 +326,9 @@ namespace usse { regIndex--; swizzleUp = true; } + for (uint32_t a = 0; a < type.components; a++) { - swizzle.push_back(static_cast(a + swizzleUp)); + swizzle[a] = static_cast(a + swizzleUp); } index = regIndex; } @@ -420,8 +424,7 @@ namespace usse { return a; } - - return -1; + throw std::runtime_error("Missing swizzle index for vec3."); } int32_t getSwizzleVec4Index(std::array elements, bool extended) { for (uint32_t a = 0; a < swizzleStandardSize; a++) { @@ -440,6 +443,6 @@ namespace usse { return a; } - return -1; + throw std::runtime_error("Missing swizzle index for vec4."); } } diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp index 7688159..9e4bc18 100644 --- a/src/translator/src/codes.cpp +++ b/src/translator/src/codes.cpp @@ -54,7 +54,10 @@ void CompilerGXP::opStore(const TranslatorArguments &arguments) { usse::RegisterReference sourceRegister = getRegister(source); usse::RegisterReference destinationRegister = getRegister(destination); - arguments.block.createMov(sourceRegister, destinationRegister); + if (sourceRegister.type.components == 1) + arguments.block.createMov(sourceRegister, destinationRegister); + else + arguments.block.createPack(sourceRegister, destinationRegister); } void CompilerGXP::opMatrixTimesVector(const TranslatorArguments &arguments) { @@ -95,7 +98,7 @@ void CompilerGXP::opVectorTimesScalar(const TranslatorArguments &arguments) { usse::RegisterReference scalar = getRegister(scalarId); usse::RegisterReference destination = builder.allocateRegister(usse::RegisterBank::Temporary, vector.type); - scalar.swizzle = std::vector(scalar.type.components, usse::SwizzleChannel::X); + scalar.swizzle = usse::getSwizzleVec4All(usse::SwizzleChannel::X); scalar.lockSwizzle = true; scalar.type.components = vector.type.components; @@ -138,33 +141,31 @@ void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { usse::RegisterReference output = builder.allocateRegister(usse::RegisterBank::Temporary, { translateType(type.basetype), type.vecsize, 1 }); - for (size_t a = 0; a < type.vecsize; a++) { - usse::RegisterReference source; - - spv::Id vecId = arguments.instruction[2 + a]; - - // This is very wrong. Rework this! - auto reg = idRegisters.find(vecId); - if (reg != idRegisters.end()) { - source = reg->second.reference; - } else if (type.basetype == SPIRType::Float) { - SPIRConstant spvConstant = get(vecId); - - float packConstant = spvConstant.m.c[0].r[0].f32; - - // Especially here, when it returns it should try the next option or add it to literals. - // Continue the if case here. - int32_t constantIndex = usse::getFPConstantIndex(packConstant); - if (constantIndex != -1) { - source = usse::RegisterReference({ usse::Type::Float32, 1, 1 }, - usse::RegisterBank::FloatConstant, constantIndex); - source.lockSwizzle = true; - usse::SwizzleVec4 swizzleTemp = usse::getSwizzleVec4All(usse::SwizzleChannel::X); - source.swizzle = std::vector(swizzleTemp.begin(), swizzleTemp.end()); + for (size_t a = 0; a < type.vecsize;) { + spv::Id sourceId = arguments.instruction[2 + a]; + usse::RegisterReference source = getRegister(sourceId); + + uint32_t size = 1; + while (a + size < type.vecsize) { + usse::RegisterReference next = getRegister(arguments.instruction[2 + a + size]); + bool matchingBanks = source.bank == next.bank; + bool matchingIndices = (source.index + static_cast(source.swizzle[0]) + size) == + (next.index + static_cast(next.swizzle[0])); + if (matchingBanks && matchingIndices) { + size++; + } else { + break; } } - arguments.block.createMov(source, output.getComponents(a, 1)); + if (size == 1) + arguments.block.createMov(usse::RegisterReference( + { source.type.type, size, 1 }, source.bank, source.index), output.getComponents(a, size)); + else + arguments.block.createPack(usse::RegisterReference( + { source.type.type, size, 1 }, source.bank, source.index), output.getComponents(a, size)); + + a += size; } idRegisters[result] = { output }; @@ -369,13 +370,13 @@ void CompilerGXP::extGLSLNormalize(const TranslatorArguments &arguments) { usse::RegisterReference magnitude = builder.allocateRegister( usse::RegisterBank::Internal, { source.type.type, 1, 1 }); - arguments.block.createMov(source, temporary); + arguments.block.createPack(source, temporary); arguments.block.createDot(temporary, temporary, magnitude); arguments.block.createReverseSquareRoot(magnitude, magnitude); - magnitude.swizzle = std::vector(source.type.components, usse::SwizzleChannel::X); + magnitude.swizzle = usse::getSwizzleVec4All(usse::SwizzleChannel::X); magnitude.lockSwizzle = true; - magnitude.type.components = 4; + magnitude.type.components = source.type.components; arguments.block.createMul(temporary, magnitude, destination); builder.freeRegister(magnitude); @@ -431,12 +432,12 @@ void CompilerGXP::extGLSLReflect(const TranslatorArguments &arguments) { usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, second.type); - magnitude.swizzle = std::vector(second.type.components, usse::SwizzleChannel::X); + magnitude.swizzle = usse::getSwizzleVec4All(usse::SwizzleChannel::X); magnitude.lockSwizzle = true; usse::RegisterReference two({ usse::Type::Float32, 1, 1 }, usse::RegisterBank::FloatConstant, usse::getFPConstantIndex(2)); - two.swizzle = std::vector(second.type.components, usse::SwizzleChannel::X); + two.swizzle = usse::getSwizzleVec4All(usse::SwizzleChannel::X); two.lockSwizzle = true; arguments.block.createPack(second, internal); From bbf2f3aac8cdeb2d30bb70a5e950571b6db58a98 Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Mon, 30 Sep 2019 22:08:02 -0400 Subject: [PATCH 11/19] Safe/Debug instruction builder support --- generate-usse.py | 34 +- src/gxp/include/gxp/instructions.h | 834 ++++++------- src/gxp/include/gxp/usse.h | 3 +- src/gxp/src/builder.cpp | 11 +- src/gxp/src/instructions.cpp | 1812 ++++++++++++++-------------- src/gxp/src/usse.cpp | 48 +- src/translator/src/codes.cpp | 19 +- 7 files changed, 1393 insertions(+), 1368 deletions(-) diff --git a/generate-usse.py b/generate-usse.py index c86913f..401b86c 100644 --- a/generate-usse.py +++ b/generate-usse.py @@ -1,15 +1,27 @@ from yaml import load, Loader +from enum import Enum + + +class Protection(Enum): + NONE = 0 + SAFE = 1 + DEBUG = 2 + + +protection = Protection.SAFE bit_types = """ typedef uint64_t Instruction; typedef uint64_t Param; """ +debug = '#include \n\n' if protection == Protection.DEBUG else '' + with open('external/usse-decoder-gen/grammar.yaml', 'r') as stream: instructions = load(stream, Loader=Loader) header = '#pragma once\n\n#include \n\nnamespace usse {' + bit_types + '\n' - source = '#include \n\nnamespace usse {\n' + source = '#include \n\n' + debug + 'namespace usse {\n' for instruction_name, instruction in instructions.items(): members = instruction['members'] @@ -30,7 +42,15 @@ parameters += ',\n\t\t\t' parameters += 'Param/*' + str(member_info) + '*/ ' + member_name first = False - function += '\t\tinst |= ' + member_name + ' << ' + str(index) + 'u;\n' + if protection == Protection.DEBUG: + function += '\t\tif ((' + member_name + ' & ~0b' + ('1' * member_info) + 'ull) != 0)\n'\ + + '\t\t\tthrow std::runtime_error("Instruction field ' + member_name\ + + ' for ' + instruction_name + ' out of bounds.' + '");\n' + if protection == Protection.NONE: + function += '\t\tinst |= ' + member_name + ' << ' + str(index) + 'u;\n' + else: + function += '\t\tinst |= (' + member_name + ' & 0b'\ + + ('1' * member_info) + 'ull) << ' + str(index) + 'u;\n' else: if 'offset' in member_info: index = member_info['offset'] @@ -43,7 +63,15 @@ parameters += ',\n\t\t\t' parameters += 'Param/*' + str(member_info['size']) + '*/ ' + member_name first = False - function += '\t\tinst |= ' + member_name + ' << ' + str(index) + 'u;\n' + if protection == Protection.DEBUG: + function += '\t\tif ((' + member_name + ' & ~0b' + ('1' * member_info['size']) + 'ull) != 0)\n'\ + + '\t\t\tthrow std::runtime_error("Instruction field ' + member_name \ + + ' for ' + instruction_name + ' out of bounds.' + '");\n' + if protection == Protection.NONE: + function += '\t\tinst |= ' + member_name + ' << ' + str(index) + 'u;\n' + else: + function += '\t\tinst |= (' + member_name + ' & 0b'\ + + ('1' * member_info['size']) + 'ull) << ' + str(index) + 'u;\n' if parameters: declaration += '\n\t\t\t' + parameters diff --git a/src/gxp/include/gxp/instructions.h b/src/gxp/include/gxp/instructions.h index c4d0e51..e4f5836 100644 --- a/src/gxp/include/gxp/instructions.h +++ b/src/gxp/include/gxp/instructions.h @@ -6,440 +6,440 @@ namespace usse { typedef uint64_t Instruction; typedef uint64_t Param; - Instruction makeVMOV( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ test_bit_2, - Param/*1*/ src0_comp_sel, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ end_or_src0_bank_ext, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*2*/ move_type, - Param/*2*/ repeat_count, - Param/*1*/ nosched, - Param/*3*/ move_data_type, - Param/*1*/ test_bit_1, - Param/*4*/ src0_swiz, - Param/*1*/ src0_bank_sel, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*4*/ dest_mask, - Param/*6*/ dest_n, - Param/*6*/ src0_n, - Param/*6*/ src1_n, - Param/*6*/ src2_n); + Instruction makeVMOV( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ test_bit_2, + Param/*1*/ src0_comp_sel, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end_or_src0_bank_ext, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*2*/ move_type, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*3*/ move_data_type, + Param/*1*/ test_bit_1, + Param/*4*/ src0_swiz, + Param/*1*/ src0_bank_sel, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*4*/ dest_mask, + Param/*6*/ dest_n, + Param/*6*/ src0_n, + Param/*6*/ src1_n, + Param/*6*/ src2_n); - Instruction makeVMAD( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ gpi1_swiz_ext, - Param/*1*/ opcode2, - Param/*1*/ dest_use_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*2*/ increment_mode, - Param/*1*/ gpi0_abs, - Param/*2*/ repeat_count, - Param/*1*/ nosched, - Param/*4*/ write_mask, - Param/*1*/ src1_neg, - Param/*1*/ src1_abs, - Param/*1*/ gpi1_neg, - Param/*1*/ gpi1_abs, - Param/*1*/ gpi0_swiz_ext, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ gpi0_n, - Param/*6*/ dest_n, - Param/*4*/ gpi0_swiz, - Param/*4*/ gpi1_swiz, - Param/*2*/ gpi1_n, - Param/*1*/ gpi0_neg, - Param/*1*/ src1_swiz_ext, - Param/*4*/ src1_swiz, - Param/*6*/ src1_n); + Instruction makeVMAD( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ gpi1_swiz_ext, + Param/*1*/ opcode2, + Param/*1*/ dest_use_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*2*/ increment_mode, + Param/*1*/ gpi0_abs, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*4*/ write_mask, + Param/*1*/ src1_neg, + Param/*1*/ src1_abs, + Param/*1*/ gpi1_neg, + Param/*1*/ gpi1_abs, + Param/*1*/ gpi0_swiz_ext, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ gpi0_n, + Param/*6*/ dest_n, + Param/*4*/ gpi0_swiz, + Param/*4*/ gpi1_swiz, + Param/*2*/ gpi1_n, + Param/*1*/ gpi0_neg, + Param/*1*/ src1_swiz_ext, + Param/*4*/ src1_swiz, + Param/*6*/ src1_n); - Instruction makeVMAD2( - Param/*1*/ dat_fmt, - Param/*2*/ pred, - Param/*1*/ skipinv, - Param/*1*/ src0_swiz_bits2, - Param/*1*/ syncstart, - Param/*1*/ src0_abs, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*3*/ src2_swiz, - Param/*1*/ src1_swiz_bit2, - Param/*1*/ nosched, - Param/*4*/ dest_mask, - Param/*2*/ src1_mod, - Param/*2*/ src2_mod, - Param/*1*/ src0_bank, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*6*/ dest_n, - Param/*2*/ src1_swiz_bits01, - Param/*2*/ src0_swiz_bits01, - Param/*6*/ src0_n, - Param/*6*/ src1_n, - Param/*6*/ src2_n); + Instruction makeVMAD2( + Param/*1*/ dat_fmt, + Param/*2*/ pred, + Param/*1*/ skipinv, + Param/*1*/ src0_swiz_bits2, + Param/*1*/ syncstart, + Param/*1*/ src0_abs, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*3*/ src2_swiz, + Param/*1*/ src1_swiz_bit2, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*2*/ src2_mod, + Param/*1*/ src0_bank, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ dest_n, + Param/*2*/ src1_swiz_bits01, + Param/*2*/ src0_swiz_bits01, + Param/*6*/ src0_n, + Param/*6*/ src1_n, + Param/*6*/ src2_n); - Instruction makeVDP( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ clip_plane_enable, - Param/*1*/ opcode2, - Param/*1*/ dest_use_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*2*/ increment_mode, - Param/*1*/ gpi0_abs, - Param/*2*/ repeat_count, - Param/*1*/ nosched, - Param/*4*/ write_mask, - Param/*1*/ src1_neg, - Param/*1*/ src1_abs, - Param/*3*/ clip_plane_n, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ gpi0_n, - Param/*6*/ dest_n, - Param/*4*/ gpi0_swiz, - Param/*3*/ src1_swiz_w, - Param/*3*/ src1_swiz_z, - Param/*3*/ src1_swiz_y, - Param/*3*/ src1_swiz_x, - Param/*6*/ src1_n); + Instruction makeVDP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ clip_plane_enable, + Param/*1*/ opcode2, + Param/*1*/ dest_use_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*2*/ increment_mode, + Param/*1*/ gpi0_abs, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*4*/ write_mask, + Param/*1*/ src1_neg, + Param/*1*/ src1_abs, + Param/*3*/ clip_plane_n, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ gpi0_n, + Param/*6*/ dest_n, + Param/*4*/ gpi0_swiz, + Param/*3*/ src1_swiz_w, + Param/*3*/ src1_swiz_z, + Param/*3*/ src1_swiz_y, + Param/*3*/ src1_swiz_x, + Param/*6*/ src1_n); - Instruction makeVNMAD32( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*2*/ src1_swiz_10_11, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ src1_swiz_9, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*4*/ src2_swiz, - Param/*1*/ nosched, - Param/*4*/ dest_mask, - Param/*2*/ src1_mod, - Param/*1*/ src2_mod, - Param/*2*/ src1_swiz_7_8, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*6*/ dest_n, - Param/*7*/ src1_swiz_0_6, - Param/*3*/ op2, - Param/*6*/ src1_n, - Param/*6*/ src2_n); + Instruction makeVNMAD32( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ src1_swiz_10_11, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ src1_swiz_9, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ src2_swiz, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*1*/ src2_mod, + Param/*2*/ src1_swiz_7_8, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*6*/ dest_n, + Param/*7*/ src1_swiz_0_6, + Param/*3*/ op2, + Param/*6*/ src1_n, + Param/*6*/ src2_n); - Instruction makeVNMAD16( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*2*/ src1_swiz_10_11, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ src1_swiz_9, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*4*/ src2_swiz, - Param/*1*/ nosched, - Param/*4*/ dest_mask, - Param/*2*/ src1_mod, - Param/*1*/ src2_mod, - Param/*2*/ src1_swiz_7_8, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*6*/ dest_n, - Param/*7*/ src1_swiz_0_6, - Param/*3*/ op2, - Param/*6*/ src1_n, - Param/*6*/ src2_n); + Instruction makeVNMAD16( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ src1_swiz_10_11, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ src1_swiz_9, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ src2_swiz, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*1*/ src2_mod, + Param/*2*/ src1_swiz_7_8, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*6*/ dest_n, + Param/*7*/ src1_swiz_0_6, + Param/*3*/ op2, + Param/*6*/ src1_n, + Param/*6*/ src2_n); - Instruction makeVLDST( - Param/*2*/ op1, - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ moe_expand, - Param/*1*/ sync_start, - Param/*1*/ cache_ext, - Param/*1*/ src0_bank_ext, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*4*/ mask_count, - Param/*2*/ addr_mode, - Param/*2*/ mode, - Param/*1*/ dest_bank_primattr, - Param/*1*/ range_enable, - Param/*2*/ data_type, - Param/*1*/ increment_or_decrement, - Param/*1*/ src0_bank, - Param/*1*/ cache_by_pass12, - Param/*1*/ drc_sel, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*7*/ src0_n, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeVLDST( + Param/*2*/ op1, + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ moe_expand, + Param/*1*/ sync_start, + Param/*1*/ cache_ext, + Param/*1*/ src0_bank_ext, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ mask_count, + Param/*2*/ addr_mode, + Param/*2*/ mode, + Param/*1*/ dest_bank_primattr, + Param/*1*/ range_enable, + Param/*2*/ data_type, + Param/*1*/ increment_or_decrement, + Param/*1*/ src0_bank, + Param/*1*/ cache_by_pass12, + Param/*1*/ drc_sel, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeVTST( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ onceonly, - Param/*1*/ syncstart, - Param/*1*/ dest_ext, - Param/*1*/ src1_neg, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*1*/ prec, - Param/*1*/ src2_vscomp, - Param/*2*/ rpt_count, - Param/*2*/ sign_test, - Param/*2*/ zero_test, - Param/*1*/ test_crcomb_and, - Param/*3*/ chan_cc, - Param/*2*/ pdst_n, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*1*/ test_wben, - Param/*2*/ alu_sel, - Param/*4*/ alu_op, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeVTST( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ onceonly, + Param/*1*/ syncstart, + Param/*1*/ dest_ext, + Param/*1*/ src1_neg, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*1*/ prec, + Param/*1*/ src2_vscomp, + Param/*2*/ rpt_count, + Param/*2*/ sign_test, + Param/*2*/ zero_test, + Param/*1*/ test_crcomb_and, + Param/*3*/ chan_cc, + Param/*2*/ pdst_n, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ test_wben, + Param/*2*/ alu_sel, + Param/*4*/ alu_op, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeVTSTMSK( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ onceonly, - Param/*1*/ syncstart, - Param/*1*/ dest_ext, - Param/*1*/ test_flag_2, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*1*/ prec, - Param/*1*/ src2_vscomp, - Param/*2*/ rpt_count, - Param/*2*/ sign_test, - Param/*2*/ zero_test, - Param/*1*/ test_crcomb_and, - Param/*2*/ tst_mask_type, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*1*/ test_wben, - Param/*2*/ alu_sel, - Param/*4*/ alu_op, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeVTSTMSK( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ onceonly, + Param/*1*/ syncstart, + Param/*1*/ dest_ext, + Param/*1*/ test_flag_2, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*1*/ prec, + Param/*1*/ src2_vscomp, + Param/*2*/ rpt_count, + Param/*2*/ sign_test, + Param/*2*/ zero_test, + Param/*1*/ test_crcomb_and, + Param/*2*/ tst_mask_type, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ test_wben, + Param/*2*/ alu_sel, + Param/*4*/ alu_op, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeVPCK( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ unknown, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*3*/ repeat_count, - Param/*3*/ src_fmt, - Param/*3*/ dest_fmt, - Param/*4*/ dest_mask, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*7*/ dest_n, - Param/*2*/ comp_sel_3, - Param/*1*/ scale, - Param/*2*/ comp_sel_1, - Param/*2*/ comp_sel_2, - Param/*6*/ src1_n, - Param/*1*/ comp0_sel_bit1, - Param/*6*/ src2_n, - Param/*1*/ comp_sel_0_bit0); + Instruction makeVPCK( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ unknown, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*3*/ repeat_count, + Param/*3*/ src_fmt, + Param/*3*/ dest_fmt, + Param/*4*/ dest_mask, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*7*/ dest_n, + Param/*2*/ comp_sel_3, + Param/*1*/ scale, + Param/*2*/ comp_sel_1, + Param/*2*/ comp_sel_2, + Param/*6*/ src1_n, + Param/*1*/ comp0_sel_bit1, + Param/*6*/ src2_n, + Param/*1*/ comp_sel_0_bit0); - Instruction makeVBW( - Param/*3*/ op1, - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ repeat_count, - Param/*1*/ sync_start, - Param/*1*/ dest_ext, - Param/*1*/ end, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*4*/ mask_count, - Param/*1*/ src2_invert, - Param/*5*/ src2_rot, - Param/*2*/ src2_exth, - Param/*1*/ op2, - Param/*1*/ bitwise_partial, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*7*/ src2_sel, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeVBW( + Param/*3*/ op1, + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ repeat_count, + Param/*1*/ sync_start, + Param/*1*/ dest_ext, + Param/*1*/ end, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*4*/ mask_count, + Param/*1*/ src2_invert, + Param/*5*/ src2_rot, + Param/*2*/ src2_exth, + Param/*1*/ op2, + Param/*1*/ bitwise_partial, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src2_sel, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeSMP( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ syncstart, - Param/*1*/ minpack, - Param/*1*/ src0_ext, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*2*/ fconv_type, - Param/*2*/ mask_count, - Param/*2*/ dim, - Param/*2*/ lod_mode, - Param/*1*/ dest_use_pa, - Param/*2*/ sb_mode, - Param/*2*/ src0_type, - Param/*1*/ src0_bank, - Param/*2*/ drc_sel, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*7*/ src0_n, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeSMP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ syncstart, + Param/*1*/ minpack, + Param/*1*/ src0_ext, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*2*/ fconv_type, + Param/*2*/ mask_count, + Param/*2*/ dim, + Param/*2*/ lod_mode, + Param/*1*/ dest_use_pa, + Param/*2*/ sb_mode, + Param/*2*/ src0_type, + Param/*1*/ src0_bank, + Param/*2*/ drc_sel, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeVCOMP( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*2*/ dest_type, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*4*/ repeat_count, - Param/*1*/ nosched, - Param/*2*/ op2, - Param/*2*/ src_type, - Param/*2*/ src1_mod, - Param/*2*/ src_comp, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*7*/ dest_n, - Param/*7*/ src1_n, - Param/*4*/ write_mask); + Instruction makeVCOMP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ dest_type, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*4*/ repeat_count, + Param/*1*/ nosched, + Param/*2*/ op2, + Param/*2*/ src_type, + Param/*2*/ src1_mod, + Param/*2*/ src_comp, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*7*/ dest_n, + Param/*7*/ src1_n, + Param/*4*/ write_mask); - Instruction makeSOP2( - Param/*2*/ pred, - Param/*1*/ cmod1, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*2*/ asel1, - Param/*1*/ dest_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*1*/ cmod2, - Param/*3*/ count, - Param/*1*/ amod1, - Param/*2*/ asel2, - Param/*3*/ csel1, - Param/*3*/ csel2, - Param/*1*/ amod2, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*1*/ src1_mod, - Param/*2*/ cop, - Param/*2*/ aop, - Param/*1*/ asrc1_mod, - Param/*1*/ dest_mod, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeSOP2( + Param/*2*/ pred, + Param/*1*/ cmod1, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*2*/ asel1, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*1*/ cmod2, + Param/*3*/ count, + Param/*1*/ amod1, + Param/*2*/ asel2, + Param/*3*/ csel1, + Param/*3*/ csel2, + Param/*1*/ amod2, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ src1_mod, + Param/*2*/ cop, + Param/*2*/ aop, + Param/*1*/ asrc1_mod, + Param/*1*/ dest_mod, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeBR( - Param/*3*/ pred, - Param/*1*/ syncend, - Param/*1*/ exception, - Param/*1*/ pwait, - Param/*1*/ sync_ext, - Param/*1*/ nosched, - Param/*1*/ br_monitor, - Param/*1*/ save_link, - Param/*1*/ br_type, - Param/*1*/ any_inst, - Param/*1*/ all_inst, - Param/*20*/ br_off); + Instruction makeBR( + Param/*3*/ pred, + Param/*1*/ syncend, + Param/*1*/ exception, + Param/*1*/ pwait, + Param/*1*/ sync_ext, + Param/*1*/ nosched, + Param/*1*/ br_monitor, + Param/*1*/ save_link, + Param/*1*/ br_type, + Param/*1*/ any_inst, + Param/*1*/ all_inst, + Param/*20*/ br_off); - Instruction makePHAS( - Param/*1*/ sprvv, - Param/*1*/ end, - Param/*1*/ imm, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*1*/ mode, - Param/*1*/ rate_hi, - Param/*1*/ rate_lo_or_nosched, - Param/*3*/ wait_cond, - Param/*8*/ temp_count, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*6*/ exe_addr_high, - Param/*7*/ src1_n_or_exe_addr_mid, - Param/*7*/ src2_n_or_exe_addr_low); + Instruction makePHAS( + Param/*1*/ sprvv, + Param/*1*/ end, + Param/*1*/ imm, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*1*/ mode, + Param/*1*/ rate_hi, + Param/*1*/ rate_lo_or_nosched, + Param/*3*/ wait_cond, + Param/*8*/ temp_count, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ exe_addr_high, + Param/*7*/ src1_n_or_exe_addr_mid, + Param/*7*/ src2_n_or_exe_addr_low); - Instruction makeNOP(); + Instruction makeNOP(); - Instruction makeSMLSI( - Param/*1*/ nosched, - Param/*4*/ temp_limit, - Param/*4*/ pa_limit, - Param/*4*/ sa_limit, - Param/*1*/ dest_inc_mode, - Param/*1*/ src0_inc_mode, - Param/*1*/ src1_inc_mode, - Param/*1*/ src2_inc_mode, - Param/*8*/ dest_inc, - Param/*8*/ src0_inc, - Param/*8*/ src1_inc, - Param/*8*/ src2_inc); + Instruction makeSMLSI( + Param/*1*/ nosched, + Param/*4*/ temp_limit, + Param/*4*/ pa_limit, + Param/*4*/ sa_limit, + Param/*1*/ dest_inc_mode, + Param/*1*/ src0_inc_mode, + Param/*1*/ src1_inc_mode, + Param/*1*/ src2_inc_mode, + Param/*8*/ dest_inc, + Param/*8*/ src0_inc, + Param/*8*/ src1_inc, + Param/*8*/ src2_inc); - Instruction makeEMIT( - Param/*2*/ sideband_high, - Param/*1*/ src0_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*2*/ target, - Param/*1*/ task_start_or_mte_hi, - Param/*1*/ task_end_or_mte_lo, - Param/*1*/ nosched, - Param/*6*/ sideband_mid, - Param/*1*/ src0_bank, - Param/*2*/ incp, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*6*/ sideband_low, - Param/*1*/ freep, - Param/*7*/ src0_n, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeEMIT( + Param/*2*/ sideband_high, + Param/*1*/ src0_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*2*/ target, + Param/*1*/ task_start_or_mte_hi, + Param/*1*/ task_end_or_mte_lo, + Param/*1*/ nosched, + Param/*6*/ sideband_mid, + Param/*1*/ src0_bank, + Param/*2*/ incp, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ sideband_low, + Param/*1*/ freep, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeSPEC( - Param/*1*/ special, - Param/*2*/ category); + Instruction makeSPEC( + Param/*1*/ special, + Param/*2*/ category); -} \ No newline at end of file +} diff --git a/src/gxp/include/gxp/usse.h b/src/gxp/include/gxp/usse.h index eff1abb..3c93089 100644 --- a/src/gxp/include/gxp/usse.h +++ b/src/gxp/include/gxp/usse.h @@ -109,12 +109,13 @@ namespace usse { bool lockSwizzle = false; usse::SwizzleVec4 swizzle = usse::getSwizzleVec4All(usse::SwizzleChannel::DontCare); - RegisterReference operator+(uint32_t value); uint32_t getSwizzleMask(); int32_t getSwizzleIndex(bool extended = false); + uint32_t getEffectiveIndex(); RegisterReference getHalf(uint32_t half); RegisterReference getComponents(uint32_t component, uint32_t count); RegisterReference getElement(uint32_t element); + RegisterReference getExpanded(uint32_t count); RegisterReference() = default; RegisterReference(DataType type, RegisterBank bank, uint32_t regIndex); diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp index 9d2ab54..8be3916 100644 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -235,7 +235,7 @@ namespace gxp { second.index, // gpi0_n destBankLayout.getIndex(destination), // dest_n second.getSwizzleIndex(), // gpi0_swiz - first.swizzle.size() > 3 ? static_cast(first.swizzle[3]) : 0, // src1_swiz_w + first.type.components > 3 ? static_cast(first.swizzle[3]) : 0, // src1_swiz_w static_cast(first.swizzle[2]), // src1_swiz_z static_cast(first.swizzle[1]), // src1_swiz_y static_cast(first.swizzle[0]), // src1_swiz_x @@ -637,20 +637,23 @@ namespace gxp { case usse::RegisterBank::Internal: index = iRegPointer + needsAllocOffset(iRegPointer, size); iRegPointer += size + needsAllocOffset(iRegPointer, size); + assert(iRegPointer <= 8); break; default: throw std::runtime_error("Missing allocation method for bank."); } -// fmt::print("Allocating {} registers of type {} (vec{}[{}]), size {} at index {}.\n", -// usse::getBankName(bank), usse::getTypeName(type.type), type.components, type.arraySize, size, index); + fmt::print("Allocating {} registers of type {} (vec{}[{}]), size {} at index {}.\n", + usse::getBankName(bank), usse::getTypeName(type.type), type.components, type.arraySize, size, index); return usse::RegisterReference(type, bank, index); } void Builder::freeRegister(usse::RegisterReference reg) { - if (reg.bank == usse::RegisterBank::Internal && reg.index + reg.size == iRegPointer) { + if (reg.bank == usse::RegisterBank::Internal && reg.getEffectiveIndex() + reg.size == iRegPointer) { iRegPointer -= reg.size; + } else { + assert(false); } } diff --git a/src/gxp/src/instructions.cpp b/src/gxp/src/instructions.cpp index e18555d..e9563fc 100644 --- a/src/gxp/src/instructions.cpp +++ b/src/gxp/src/instructions.cpp @@ -1,930 +1,930 @@ #include namespace usse { - Instruction makeVMOV( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ test_bit_2, - Param/*1*/ src0_comp_sel, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ end_or_src0_bank_ext, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*2*/ move_type, - Param/*2*/ repeat_count, - Param/*1*/ nosched, - Param/*3*/ move_data_type, - Param/*1*/ test_bit_1, - Param/*4*/ src0_swiz, - Param/*1*/ src0_bank_sel, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*4*/ dest_mask, - Param/*6*/ dest_n, - Param/*6*/ src0_n, - Param/*6*/ src1_n, - Param/*6*/ src2_n) { - Instruction inst = 0; - inst |= 0b00111ull << 59u; - inst |= pred << 56u; - inst |= skipinv << 55u; - inst |= test_bit_2 << 54u; - inst |= src0_comp_sel << 53u; - inst |= syncstart << 52u; - inst |= dest_bank_ext << 51u; - inst |= end_or_src0_bank_ext << 50u; - inst |= src1_bank_ext << 49u; - inst |= src2_bank_ext << 48u; - inst |= move_type << 46u; - inst |= repeat_count << 44u; - inst |= nosched << 43u; - inst |= move_data_type << 40u; - inst |= test_bit_1 << 39u; - inst |= src0_swiz << 35u; - inst |= src0_bank_sel << 34u; - inst |= dest_bank_sel << 32u; - inst |= src1_bank_sel << 30u; - inst |= src2_bank_sel << 28u; - inst |= dest_mask << 24u; - inst |= dest_n << 18u; - inst |= src0_n << 12u; - inst |= src1_n << 6u; - inst |= src2_n << 0u; - return inst; - } + Instruction makeVMOV( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ test_bit_2, + Param/*1*/ src0_comp_sel, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end_or_src0_bank_ext, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*2*/ move_type, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*3*/ move_data_type, + Param/*1*/ test_bit_1, + Param/*4*/ src0_swiz, + Param/*1*/ src0_bank_sel, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*4*/ dest_mask, + Param/*6*/ dest_n, + Param/*6*/ src0_n, + Param/*6*/ src1_n, + Param/*6*/ src2_n) { + Instruction inst = 0; + inst |= 0b00111ull << 59u; + inst |= (pred & 0b111ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (test_bit_2 & 0b1ull) << 54u; + inst |= (src0_comp_sel & 0b1ull) << 53u; + inst |= (syncstart & 0b1ull) << 52u; + inst |= (dest_bank_ext & 0b1ull) << 51u; + inst |= (end_or_src0_bank_ext & 0b1ull) << 50u; + inst |= (src1_bank_ext & 0b1ull) << 49u; + inst |= (src2_bank_ext & 0b1ull) << 48u; + inst |= (move_type & 0b11ull) << 46u; + inst |= (repeat_count & 0b11ull) << 44u; + inst |= (nosched & 0b1ull) << 43u; + inst |= (move_data_type & 0b111ull) << 40u; + inst |= (test_bit_1 & 0b1ull) << 39u; + inst |= (src0_swiz & 0b1111ull) << 35u; + inst |= (src0_bank_sel & 0b1ull) << 34u; + inst |= (dest_bank_sel & 0b11ull) << 32u; + inst |= (src1_bank_sel & 0b11ull) << 30u; + inst |= (src2_bank_sel & 0b11ull) << 28u; + inst |= (dest_mask & 0b1111ull) << 24u; + inst |= (dest_n & 0b111111ull) << 18u; + inst |= (src0_n & 0b111111ull) << 12u; + inst |= (src1_n & 0b111111ull) << 6u; + inst |= (src2_n & 0b111111ull) << 0u; + return inst; + } - Instruction makeVMAD( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ gpi1_swiz_ext, - Param/*1*/ opcode2, - Param/*1*/ dest_use_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*2*/ increment_mode, - Param/*1*/ gpi0_abs, - Param/*2*/ repeat_count, - Param/*1*/ nosched, - Param/*4*/ write_mask, - Param/*1*/ src1_neg, - Param/*1*/ src1_abs, - Param/*1*/ gpi1_neg, - Param/*1*/ gpi1_abs, - Param/*1*/ gpi0_swiz_ext, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ gpi0_n, - Param/*6*/ dest_n, - Param/*4*/ gpi0_swiz, - Param/*4*/ gpi1_swiz, - Param/*2*/ gpi1_n, - Param/*1*/ gpi0_neg, - Param/*1*/ src1_swiz_ext, - Param/*4*/ src1_swiz, - Param/*6*/ src1_n) { - Instruction inst = 0; - inst |= 0b00011ull << 59u; - inst |= pred << 56u; - inst |= skipinv << 55u; - inst |= gpi1_swiz_ext << 54u; - inst |= 0b1ull << 53u; - inst |= opcode2 << 52u; - inst |= dest_use_bank_ext << 51u; - inst |= end << 50u; - inst |= src1_bank_ext << 49u; - inst |= increment_mode << 47u; - inst |= gpi0_abs << 46u; - inst |= repeat_count << 44u; - inst |= nosched << 43u; - inst |= write_mask << 39u; - inst |= src1_neg << 38u; - inst |= src1_abs << 37u; - inst |= gpi1_neg << 36u; - inst |= gpi1_abs << 35u; - inst |= gpi0_swiz_ext << 34u; - inst |= dest_bank << 32u; - inst |= src1_bank << 30u; - inst |= gpi0_n << 28u; - inst |= dest_n << 22u; - inst |= gpi0_swiz << 18u; - inst |= gpi1_swiz << 14u; - inst |= gpi1_n << 12u; - inst |= gpi0_neg << 11u; - inst |= src1_swiz_ext << 10u; - inst |= src1_swiz << 6u; - inst |= src1_n << 0u; - return inst; - } + Instruction makeVMAD( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ gpi1_swiz_ext, + Param/*1*/ opcode2, + Param/*1*/ dest_use_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*2*/ increment_mode, + Param/*1*/ gpi0_abs, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*4*/ write_mask, + Param/*1*/ src1_neg, + Param/*1*/ src1_abs, + Param/*1*/ gpi1_neg, + Param/*1*/ gpi1_abs, + Param/*1*/ gpi0_swiz_ext, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ gpi0_n, + Param/*6*/ dest_n, + Param/*4*/ gpi0_swiz, + Param/*4*/ gpi1_swiz, + Param/*2*/ gpi1_n, + Param/*1*/ gpi0_neg, + Param/*1*/ src1_swiz_ext, + Param/*4*/ src1_swiz, + Param/*6*/ src1_n) { + Instruction inst = 0; + inst |= 0b00011ull << 59u; + inst |= (pred & 0b111ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (gpi1_swiz_ext & 0b1ull) << 54u; + inst |= 0b1ull << 53u; + inst |= (opcode2 & 0b1ull) << 52u; + inst |= (dest_use_bank_ext & 0b1ull) << 51u; + inst |= (end & 0b1ull) << 50u; + inst |= (src1_bank_ext & 0b1ull) << 49u; + inst |= (increment_mode & 0b11ull) << 47u; + inst |= (gpi0_abs & 0b1ull) << 46u; + inst |= (repeat_count & 0b11ull) << 44u; + inst |= (nosched & 0b1ull) << 43u; + inst |= (write_mask & 0b1111ull) << 39u; + inst |= (src1_neg & 0b1ull) << 38u; + inst |= (src1_abs & 0b1ull) << 37u; + inst |= (gpi1_neg & 0b1ull) << 36u; + inst |= (gpi1_abs & 0b1ull) << 35u; + inst |= (gpi0_swiz_ext & 0b1ull) << 34u; + inst |= (dest_bank & 0b11ull) << 32u; + inst |= (src1_bank & 0b11ull) << 30u; + inst |= (gpi0_n & 0b11ull) << 28u; + inst |= (dest_n & 0b111111ull) << 22u; + inst |= (gpi0_swiz & 0b1111ull) << 18u; + inst |= (gpi1_swiz & 0b1111ull) << 14u; + inst |= (gpi1_n & 0b11ull) << 12u; + inst |= (gpi0_neg & 0b1ull) << 11u; + inst |= (src1_swiz_ext & 0b1ull) << 10u; + inst |= (src1_swiz & 0b1111ull) << 6u; + inst |= (src1_n & 0b111111ull) << 0u; + return inst; + } - Instruction makeVMAD2( - Param/*1*/ dat_fmt, - Param/*2*/ pred, - Param/*1*/ skipinv, - Param/*1*/ src0_swiz_bits2, - Param/*1*/ syncstart, - Param/*1*/ src0_abs, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*3*/ src2_swiz, - Param/*1*/ src1_swiz_bit2, - Param/*1*/ nosched, - Param/*4*/ dest_mask, - Param/*2*/ src1_mod, - Param/*2*/ src2_mod, - Param/*1*/ src0_bank, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*6*/ dest_n, - Param/*2*/ src1_swiz_bits01, - Param/*2*/ src0_swiz_bits01, - Param/*6*/ src0_n, - Param/*6*/ src1_n, - Param/*6*/ src2_n) { - Instruction inst = 0; - inst |= 0b00000ull << 59u; - inst |= dat_fmt << 58u; - inst |= pred << 56u; - inst |= skipinv << 55u; - inst |= src0_swiz_bits2 << 53u; - inst |= syncstart << 52u; - inst |= src0_abs << 50u; - inst |= src1_bank_ext << 49u; - inst |= src2_bank_ext << 48u; - inst |= src2_swiz << 45u; - inst |= src1_swiz_bit2 << 44u; - inst |= nosched << 43u; - inst |= dest_mask << 39u; - inst |= src1_mod << 37u; - inst |= src2_mod << 35u; - inst |= src0_bank << 34u; - inst |= dest_bank << 32u; - inst |= src1_bank << 30u; - inst |= src2_bank << 28u; - inst |= dest_n << 22u; - inst |= src1_swiz_bits01 << 20u; - inst |= src0_swiz_bits01 << 18u; - inst |= src0_n << 12u; - inst |= src1_n << 6u; - inst |= src2_n << 0u; - return inst; - } + Instruction makeVMAD2( + Param/*1*/ dat_fmt, + Param/*2*/ pred, + Param/*1*/ skipinv, + Param/*1*/ src0_swiz_bits2, + Param/*1*/ syncstart, + Param/*1*/ src0_abs, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*3*/ src2_swiz, + Param/*1*/ src1_swiz_bit2, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*2*/ src2_mod, + Param/*1*/ src0_bank, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ dest_n, + Param/*2*/ src1_swiz_bits01, + Param/*2*/ src0_swiz_bits01, + Param/*6*/ src0_n, + Param/*6*/ src1_n, + Param/*6*/ src2_n) { + Instruction inst = 0; + inst |= 0b00000ull << 59u; + inst |= (dat_fmt & 0b1ull) << 58u; + inst |= (pred & 0b11ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (src0_swiz_bits2 & 0b1ull) << 53u; + inst |= (syncstart & 0b1ull) << 52u; + inst |= (src0_abs & 0b1ull) << 50u; + inst |= (src1_bank_ext & 0b1ull) << 49u; + inst |= (src2_bank_ext & 0b1ull) << 48u; + inst |= (src2_swiz & 0b111ull) << 45u; + inst |= (src1_swiz_bit2 & 0b1ull) << 44u; + inst |= (nosched & 0b1ull) << 43u; + inst |= (dest_mask & 0b1111ull) << 39u; + inst |= (src1_mod & 0b11ull) << 37u; + inst |= (src2_mod & 0b11ull) << 35u; + inst |= (src0_bank & 0b1ull) << 34u; + inst |= (dest_bank & 0b11ull) << 32u; + inst |= (src1_bank & 0b11ull) << 30u; + inst |= (src2_bank & 0b11ull) << 28u; + inst |= (dest_n & 0b111111ull) << 22u; + inst |= (src1_swiz_bits01 & 0b11ull) << 20u; + inst |= (src0_swiz_bits01 & 0b11ull) << 18u; + inst |= (src0_n & 0b111111ull) << 12u; + inst |= (src1_n & 0b111111ull) << 6u; + inst |= (src2_n & 0b111111ull) << 0u; + return inst; + } - Instruction makeVDP( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ clip_plane_enable, - Param/*1*/ opcode2, - Param/*1*/ dest_use_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*2*/ increment_mode, - Param/*1*/ gpi0_abs, - Param/*2*/ repeat_count, - Param/*1*/ nosched, - Param/*4*/ write_mask, - Param/*1*/ src1_neg, - Param/*1*/ src1_abs, - Param/*3*/ clip_plane_n, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ gpi0_n, - Param/*6*/ dest_n, - Param/*4*/ gpi0_swiz, - Param/*3*/ src1_swiz_w, - Param/*3*/ src1_swiz_z, - Param/*3*/ src1_swiz_y, - Param/*3*/ src1_swiz_x, - Param/*6*/ src1_n) { - Instruction inst = 0; - inst |= 0b00011ull << 59u; - inst |= pred << 56u; - inst |= skipinv << 55u; - inst |= clip_plane_enable << 54u; - inst |= 0b0ull << 53u; - inst |= opcode2 << 52u; - inst |= dest_use_bank_ext << 51u; - inst |= end << 50u; - inst |= src1_bank_ext << 49u; - inst |= increment_mode << 47u; - inst |= gpi0_abs << 46u; - inst |= repeat_count << 44u; - inst |= nosched << 43u; - inst |= write_mask << 39u; - inst |= src1_neg << 38u; - inst |= src1_abs << 37u; - inst |= clip_plane_n << 34u; - inst |= dest_bank << 32u; - inst |= src1_bank << 30u; - inst |= gpi0_n << 28u; - inst |= dest_n << 22u; - inst |= gpi0_swiz << 18u; - inst |= src1_swiz_w << 15u; - inst |= src1_swiz_z << 12u; - inst |= src1_swiz_y << 9u; - inst |= src1_swiz_x << 6u; - inst |= src1_n << 0u; - return inst; - } + Instruction makeVDP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ clip_plane_enable, + Param/*1*/ opcode2, + Param/*1*/ dest_use_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*2*/ increment_mode, + Param/*1*/ gpi0_abs, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*4*/ write_mask, + Param/*1*/ src1_neg, + Param/*1*/ src1_abs, + Param/*3*/ clip_plane_n, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ gpi0_n, + Param/*6*/ dest_n, + Param/*4*/ gpi0_swiz, + Param/*3*/ src1_swiz_w, + Param/*3*/ src1_swiz_z, + Param/*3*/ src1_swiz_y, + Param/*3*/ src1_swiz_x, + Param/*6*/ src1_n) { + Instruction inst = 0; + inst |= 0b00011ull << 59u; + inst |= (pred & 0b111ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (clip_plane_enable & 0b1ull) << 54u; + inst |= 0b0ull << 53u; + inst |= (opcode2 & 0b1ull) << 52u; + inst |= (dest_use_bank_ext & 0b1ull) << 51u; + inst |= (end & 0b1ull) << 50u; + inst |= (src1_bank_ext & 0b1ull) << 49u; + inst |= (increment_mode & 0b11ull) << 47u; + inst |= (gpi0_abs & 0b1ull) << 46u; + inst |= (repeat_count & 0b11ull) << 44u; + inst |= (nosched & 0b1ull) << 43u; + inst |= (write_mask & 0b1111ull) << 39u; + inst |= (src1_neg & 0b1ull) << 38u; + inst |= (src1_abs & 0b1ull) << 37u; + inst |= (clip_plane_n & 0b111ull) << 34u; + inst |= (dest_bank & 0b11ull) << 32u; + inst |= (src1_bank & 0b11ull) << 30u; + inst |= (gpi0_n & 0b11ull) << 28u; + inst |= (dest_n & 0b111111ull) << 22u; + inst |= (gpi0_swiz & 0b1111ull) << 18u; + inst |= (src1_swiz_w & 0b111ull) << 15u; + inst |= (src1_swiz_z & 0b111ull) << 12u; + inst |= (src1_swiz_y & 0b111ull) << 9u; + inst |= (src1_swiz_x & 0b111ull) << 6u; + inst |= (src1_n & 0b111111ull) << 0u; + return inst; + } - Instruction makeVNMAD32( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*2*/ src1_swiz_10_11, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ src1_swiz_9, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*4*/ src2_swiz, - Param/*1*/ nosched, - Param/*4*/ dest_mask, - Param/*2*/ src1_mod, - Param/*1*/ src2_mod, - Param/*2*/ src1_swiz_7_8, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*6*/ dest_n, - Param/*7*/ src1_swiz_0_6, - Param/*3*/ op2, - Param/*6*/ src1_n, - Param/*6*/ src2_n) { - Instruction inst = 0; - inst |= 0b00001ull << 59u; - inst |= pred << 56u; - inst |= skipinv << 55u; - inst |= src1_swiz_10_11 << 53u; - inst |= syncstart << 52u; - inst |= dest_bank_ext << 51u; - inst |= src1_swiz_9 << 50u; - inst |= src1_bank_ext << 49u; - inst |= src2_bank_ext << 48u; - inst |= src2_swiz << 44u; - inst |= nosched << 43u; - inst |= dest_mask << 39u; - inst |= src1_mod << 37u; - inst |= src2_mod << 36u; - inst |= src1_swiz_7_8 << 34u; - inst |= dest_bank_sel << 32u; - inst |= src1_bank_sel << 30u; - inst |= src2_bank_sel << 28u; - inst |= dest_n << 22u; - inst |= src1_swiz_0_6 << 15u; - inst |= op2 << 12u; - inst |= src1_n << 6u; - inst |= src2_n << 0u; - return inst; - } + Instruction makeVNMAD32( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ src1_swiz_10_11, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ src1_swiz_9, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ src2_swiz, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*1*/ src2_mod, + Param/*2*/ src1_swiz_7_8, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*6*/ dest_n, + Param/*7*/ src1_swiz_0_6, + Param/*3*/ op2, + Param/*6*/ src1_n, + Param/*6*/ src2_n) { + Instruction inst = 0; + inst |= 0b00001ull << 59u; + inst |= (pred & 0b111ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (src1_swiz_10_11 & 0b11ull) << 53u; + inst |= (syncstart & 0b1ull) << 52u; + inst |= (dest_bank_ext & 0b1ull) << 51u; + inst |= (src1_swiz_9 & 0b1ull) << 50u; + inst |= (src1_bank_ext & 0b1ull) << 49u; + inst |= (src2_bank_ext & 0b1ull) << 48u; + inst |= (src2_swiz & 0b1111ull) << 44u; + inst |= (nosched & 0b1ull) << 43u; + inst |= (dest_mask & 0b1111ull) << 39u; + inst |= (src1_mod & 0b11ull) << 37u; + inst |= (src2_mod & 0b1ull) << 36u; + inst |= (src1_swiz_7_8 & 0b11ull) << 34u; + inst |= (dest_bank_sel & 0b11ull) << 32u; + inst |= (src1_bank_sel & 0b11ull) << 30u; + inst |= (src2_bank_sel & 0b11ull) << 28u; + inst |= (dest_n & 0b111111ull) << 22u; + inst |= (src1_swiz_0_6 & 0b1111111ull) << 15u; + inst |= (op2 & 0b111ull) << 12u; + inst |= (src1_n & 0b111111ull) << 6u; + inst |= (src2_n & 0b111111ull) << 0u; + return inst; + } - Instruction makeVNMAD16( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*2*/ src1_swiz_10_11, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ src1_swiz_9, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*4*/ src2_swiz, - Param/*1*/ nosched, - Param/*4*/ dest_mask, - Param/*2*/ src1_mod, - Param/*1*/ src2_mod, - Param/*2*/ src1_swiz_7_8, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*6*/ dest_n, - Param/*7*/ src1_swiz_0_6, - Param/*3*/ op2, - Param/*6*/ src1_n, - Param/*6*/ src2_n) { - Instruction inst = 0; - inst |= 0b00010ull << 59u; - inst |= pred << 56u; - inst |= skipinv << 55u; - inst |= src1_swiz_10_11 << 53u; - inst |= syncstart << 52u; - inst |= dest_bank_ext << 51u; - inst |= src1_swiz_9 << 50u; - inst |= src1_bank_ext << 49u; - inst |= src2_bank_ext << 48u; - inst |= src2_swiz << 44u; - inst |= nosched << 43u; - inst |= dest_mask << 39u; - inst |= src1_mod << 37u; - inst |= src2_mod << 36u; - inst |= src1_swiz_7_8 << 34u; - inst |= dest_bank_sel << 32u; - inst |= src1_bank_sel << 30u; - inst |= src2_bank_sel << 28u; - inst |= dest_n << 22u; - inst |= src1_swiz_0_6 << 15u; - inst |= op2 << 12u; - inst |= src1_n << 6u; - inst |= src2_n << 0u; - return inst; - } + Instruction makeVNMAD16( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ src1_swiz_10_11, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ src1_swiz_9, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ src2_swiz, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*1*/ src2_mod, + Param/*2*/ src1_swiz_7_8, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*6*/ dest_n, + Param/*7*/ src1_swiz_0_6, + Param/*3*/ op2, + Param/*6*/ src1_n, + Param/*6*/ src2_n) { + Instruction inst = 0; + inst |= 0b00010ull << 59u; + inst |= (pred & 0b111ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (src1_swiz_10_11 & 0b11ull) << 53u; + inst |= (syncstart & 0b1ull) << 52u; + inst |= (dest_bank_ext & 0b1ull) << 51u; + inst |= (src1_swiz_9 & 0b1ull) << 50u; + inst |= (src1_bank_ext & 0b1ull) << 49u; + inst |= (src2_bank_ext & 0b1ull) << 48u; + inst |= (src2_swiz & 0b1111ull) << 44u; + inst |= (nosched & 0b1ull) << 43u; + inst |= (dest_mask & 0b1111ull) << 39u; + inst |= (src1_mod & 0b11ull) << 37u; + inst |= (src2_mod & 0b1ull) << 36u; + inst |= (src1_swiz_7_8 & 0b11ull) << 34u; + inst |= (dest_bank_sel & 0b11ull) << 32u; + inst |= (src1_bank_sel & 0b11ull) << 30u; + inst |= (src2_bank_sel & 0b11ull) << 28u; + inst |= (dest_n & 0b111111ull) << 22u; + inst |= (src1_swiz_0_6 & 0b1111111ull) << 15u; + inst |= (op2 & 0b111ull) << 12u; + inst |= (src1_n & 0b111111ull) << 6u; + inst |= (src2_n & 0b111111ull) << 0u; + return inst; + } - Instruction makeVLDST( - Param/*2*/ op1, - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ moe_expand, - Param/*1*/ sync_start, - Param/*1*/ cache_ext, - Param/*1*/ src0_bank_ext, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*4*/ mask_count, - Param/*2*/ addr_mode, - Param/*2*/ mode, - Param/*1*/ dest_bank_primattr, - Param/*1*/ range_enable, - Param/*2*/ data_type, - Param/*1*/ increment_or_decrement, - Param/*1*/ src0_bank, - Param/*1*/ cache_by_pass12, - Param/*1*/ drc_sel, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*7*/ src0_n, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b111ull << 61u; - inst |= op1 << 59u; - inst |= pred << 56u; - inst |= skipinv << 55u; - inst |= nosched << 54u; - inst |= moe_expand << 53u; - inst |= sync_start << 52u; - inst |= cache_ext << 51u; - inst |= src0_bank_ext << 50u; - inst |= src1_bank_ext << 49u; - inst |= src2_bank_ext << 48u; - inst |= mask_count << 44u; - inst |= addr_mode << 42u; - inst |= mode << 40u; - inst |= dest_bank_primattr << 39u; - inst |= range_enable << 38u; - inst |= data_type << 36u; - inst |= increment_or_decrement << 35u; - inst |= src0_bank << 34u; - inst |= cache_by_pass12 << 33u; - inst |= drc_sel << 32u; - inst |= src1_bank << 30u; - inst |= src2_bank << 28u; - inst |= dest_n << 21u; - inst |= src0_n << 14u; - inst |= src1_n << 7u; - inst |= src2_n << 0u; - return inst; - } + Instruction makeVLDST( + Param/*2*/ op1, + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ moe_expand, + Param/*1*/ sync_start, + Param/*1*/ cache_ext, + Param/*1*/ src0_bank_ext, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ mask_count, + Param/*2*/ addr_mode, + Param/*2*/ mode, + Param/*1*/ dest_bank_primattr, + Param/*1*/ range_enable, + Param/*2*/ data_type, + Param/*1*/ increment_or_decrement, + Param/*1*/ src0_bank, + Param/*1*/ cache_by_pass12, + Param/*1*/ drc_sel, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b111ull << 61u; + inst |= (op1 & 0b11ull) << 59u; + inst |= (pred & 0b111ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (nosched & 0b1ull) << 54u; + inst |= (moe_expand & 0b1ull) << 53u; + inst |= (sync_start & 0b1ull) << 52u; + inst |= (cache_ext & 0b1ull) << 51u; + inst |= (src0_bank_ext & 0b1ull) << 50u; + inst |= (src1_bank_ext & 0b1ull) << 49u; + inst |= (src2_bank_ext & 0b1ull) << 48u; + inst |= (mask_count & 0b1111ull) << 44u; + inst |= (addr_mode & 0b11ull) << 42u; + inst |= (mode & 0b11ull) << 40u; + inst |= (dest_bank_primattr & 0b1ull) << 39u; + inst |= (range_enable & 0b1ull) << 38u; + inst |= (data_type & 0b11ull) << 36u; + inst |= (increment_or_decrement & 0b1ull) << 35u; + inst |= (src0_bank & 0b1ull) << 34u; + inst |= (cache_by_pass12 & 0b1ull) << 33u; + inst |= (drc_sel & 0b1ull) << 32u; + inst |= (src1_bank & 0b11ull) << 30u; + inst |= (src2_bank & 0b11ull) << 28u; + inst |= (dest_n & 0b1111111ull) << 21u; + inst |= (src0_n & 0b1111111ull) << 14u; + inst |= (src1_n & 0b1111111ull) << 7u; + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeVTST( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ onceonly, - Param/*1*/ syncstart, - Param/*1*/ dest_ext, - Param/*1*/ src1_neg, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*1*/ prec, - Param/*1*/ src2_vscomp, - Param/*2*/ rpt_count, - Param/*2*/ sign_test, - Param/*2*/ zero_test, - Param/*1*/ test_crcomb_and, - Param/*3*/ chan_cc, - Param/*2*/ pdst_n, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*1*/ test_wben, - Param/*2*/ alu_sel, - Param/*4*/ alu_op, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b01001ull << 59u; - inst |= pred << 56u; - inst |= skipinv << 55u; - inst |= onceonly << 53u; - inst |= syncstart << 52u; - inst |= dest_ext << 51u; - inst |= src1_neg << 50u; - inst |= src1_ext << 49u; - inst |= src2_ext << 48u; - inst |= prec << 47u; - inst |= src2_vscomp << 46u; - inst |= rpt_count << 44u; - inst |= sign_test << 42u; - inst |= zero_test << 40u; - inst |= test_crcomb_and << 39u; - inst |= chan_cc << 36u; - inst |= pdst_n << 34u; - inst |= dest_bank << 32u; - inst |= src1_bank << 30u; - inst |= src2_bank << 28u; - inst |= dest_n << 21u; - inst |= test_wben << 20u; - inst |= alu_sel << 18u; - inst |= alu_op << 14u; - inst |= src1_n << 7u; - inst |= src2_n << 0u; - return inst; - } + Instruction makeVTST( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ onceonly, + Param/*1*/ syncstart, + Param/*1*/ dest_ext, + Param/*1*/ src1_neg, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*1*/ prec, + Param/*1*/ src2_vscomp, + Param/*2*/ rpt_count, + Param/*2*/ sign_test, + Param/*2*/ zero_test, + Param/*1*/ test_crcomb_and, + Param/*3*/ chan_cc, + Param/*2*/ pdst_n, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ test_wben, + Param/*2*/ alu_sel, + Param/*4*/ alu_op, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b01001ull << 59u; + inst |= (pred & 0b111ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (onceonly & 0b1ull) << 53u; + inst |= (syncstart & 0b1ull) << 52u; + inst |= (dest_ext & 0b1ull) << 51u; + inst |= (src1_neg & 0b1ull) << 50u; + inst |= (src1_ext & 0b1ull) << 49u; + inst |= (src2_ext & 0b1ull) << 48u; + inst |= (prec & 0b1ull) << 47u; + inst |= (src2_vscomp & 0b1ull) << 46u; + inst |= (rpt_count & 0b11ull) << 44u; + inst |= (sign_test & 0b11ull) << 42u; + inst |= (zero_test & 0b11ull) << 40u; + inst |= (test_crcomb_and & 0b1ull) << 39u; + inst |= (chan_cc & 0b111ull) << 36u; + inst |= (pdst_n & 0b11ull) << 34u; + inst |= (dest_bank & 0b11ull) << 32u; + inst |= (src1_bank & 0b11ull) << 30u; + inst |= (src2_bank & 0b11ull) << 28u; + inst |= (dest_n & 0b1111111ull) << 21u; + inst |= (test_wben & 0b1ull) << 20u; + inst |= (alu_sel & 0b11ull) << 18u; + inst |= (alu_op & 0b1111ull) << 14u; + inst |= (src1_n & 0b1111111ull) << 7u; + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeVTSTMSK( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ onceonly, - Param/*1*/ syncstart, - Param/*1*/ dest_ext, - Param/*1*/ test_flag_2, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*1*/ prec, - Param/*1*/ src2_vscomp, - Param/*2*/ rpt_count, - Param/*2*/ sign_test, - Param/*2*/ zero_test, - Param/*1*/ test_crcomb_and, - Param/*2*/ tst_mask_type, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*1*/ test_wben, - Param/*2*/ alu_sel, - Param/*4*/ alu_op, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b01111ull << 59u; - inst |= pred << 56u; - inst |= skipinv << 55u; - inst |= onceonly << 53u; - inst |= syncstart << 52u; - inst |= dest_ext << 51u; - inst |= test_flag_2 << 50u; - inst |= src1_ext << 49u; - inst |= src2_ext << 48u; - inst |= prec << 47u; - inst |= src2_vscomp << 46u; - inst |= rpt_count << 44u; - inst |= sign_test << 42u; - inst |= zero_test << 40u; - inst |= test_crcomb_and << 39u; - inst |= tst_mask_type << 36u; - inst |= dest_bank << 32u; - inst |= src1_bank << 30u; - inst |= src2_bank << 28u; - inst |= dest_n << 21u; - inst |= test_wben << 20u; - inst |= alu_sel << 18u; - inst |= alu_op << 14u; - inst |= src1_n << 7u; - inst |= src2_n << 0u; - return inst; - } + Instruction makeVTSTMSK( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ onceonly, + Param/*1*/ syncstart, + Param/*1*/ dest_ext, + Param/*1*/ test_flag_2, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*1*/ prec, + Param/*1*/ src2_vscomp, + Param/*2*/ rpt_count, + Param/*2*/ sign_test, + Param/*2*/ zero_test, + Param/*1*/ test_crcomb_and, + Param/*2*/ tst_mask_type, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ test_wben, + Param/*2*/ alu_sel, + Param/*4*/ alu_op, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b01111ull << 59u; + inst |= (pred & 0b111ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (onceonly & 0b1ull) << 53u; + inst |= (syncstart & 0b1ull) << 52u; + inst |= (dest_ext & 0b1ull) << 51u; + inst |= (test_flag_2 & 0b1ull) << 50u; + inst |= (src1_ext & 0b1ull) << 49u; + inst |= (src2_ext & 0b1ull) << 48u; + inst |= (prec & 0b1ull) << 47u; + inst |= (src2_vscomp & 0b1ull) << 46u; + inst |= (rpt_count & 0b11ull) << 44u; + inst |= (sign_test & 0b11ull) << 42u; + inst |= (zero_test & 0b11ull) << 40u; + inst |= (test_crcomb_and & 0b1ull) << 39u; + inst |= (tst_mask_type & 0b11ull) << 36u; + inst |= (dest_bank & 0b11ull) << 32u; + inst |= (src1_bank & 0b11ull) << 30u; + inst |= (src2_bank & 0b11ull) << 28u; + inst |= (dest_n & 0b1111111ull) << 21u; + inst |= (test_wben & 0b1ull) << 20u; + inst |= (alu_sel & 0b11ull) << 18u; + inst |= (alu_op & 0b1111ull) << 14u; + inst |= (src1_n & 0b1111111ull) << 7u; + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeVPCK( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ unknown, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*3*/ repeat_count, - Param/*3*/ src_fmt, - Param/*3*/ dest_fmt, - Param/*4*/ dest_mask, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*7*/ dest_n, - Param/*2*/ comp_sel_3, - Param/*1*/ scale, - Param/*2*/ comp_sel_1, - Param/*2*/ comp_sel_2, - Param/*6*/ src1_n, - Param/*1*/ comp0_sel_bit1, - Param/*6*/ src2_n, - Param/*1*/ comp_sel_0_bit0) { - Instruction inst = 0; - inst |= 0b01000ull << 59u; - inst |= pred << 56u; - inst |= skipinv << 55u; - inst |= nosched << 54u; - inst |= unknown << 53u; - inst |= syncstart << 52u; - inst |= dest_bank_ext << 51u; - inst |= end << 50u; - inst |= src1_bank_ext << 49u; - inst |= src2_bank_ext << 48u; - inst |= repeat_count << 44u; - inst |= src_fmt << 41u; - inst |= dest_fmt << 38u; - inst |= dest_mask << 34u; - inst |= dest_bank_sel << 32u; - inst |= src1_bank_sel << 30u; - inst |= src2_bank_sel << 28u; - inst |= dest_n << 21u; - inst |= comp_sel_3 << 19u; - inst |= scale << 18u; - inst |= comp_sel_1 << 16u; - inst |= comp_sel_2 << 14u; - inst |= src1_n << 8u; - inst |= comp0_sel_bit1 << 7u; - inst |= src2_n << 1u; - inst |= comp_sel_0_bit0 << 0u; - return inst; - } + Instruction makeVPCK( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ unknown, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*3*/ repeat_count, + Param/*3*/ src_fmt, + Param/*3*/ dest_fmt, + Param/*4*/ dest_mask, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*7*/ dest_n, + Param/*2*/ comp_sel_3, + Param/*1*/ scale, + Param/*2*/ comp_sel_1, + Param/*2*/ comp_sel_2, + Param/*6*/ src1_n, + Param/*1*/ comp0_sel_bit1, + Param/*6*/ src2_n, + Param/*1*/ comp_sel_0_bit0) { + Instruction inst = 0; + inst |= 0b01000ull << 59u; + inst |= (pred & 0b111ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (nosched & 0b1ull) << 54u; + inst |= (unknown & 0b1ull) << 53u; + inst |= (syncstart & 0b1ull) << 52u; + inst |= (dest_bank_ext & 0b1ull) << 51u; + inst |= (end & 0b1ull) << 50u; + inst |= (src1_bank_ext & 0b1ull) << 49u; + inst |= (src2_bank_ext & 0b1ull) << 48u; + inst |= (repeat_count & 0b111ull) << 44u; + inst |= (src_fmt & 0b111ull) << 41u; + inst |= (dest_fmt & 0b111ull) << 38u; + inst |= (dest_mask & 0b1111ull) << 34u; + inst |= (dest_bank_sel & 0b11ull) << 32u; + inst |= (src1_bank_sel & 0b11ull) << 30u; + inst |= (src2_bank_sel & 0b11ull) << 28u; + inst |= (dest_n & 0b1111111ull) << 21u; + inst |= (comp_sel_3 & 0b11ull) << 19u; + inst |= (scale & 0b1ull) << 18u; + inst |= (comp_sel_1 & 0b11ull) << 16u; + inst |= (comp_sel_2 & 0b11ull) << 14u; + inst |= (src1_n & 0b111111ull) << 8u; + inst |= (comp0_sel_bit1 & 0b1ull) << 7u; + inst |= (src2_n & 0b111111ull) << 1u; + inst |= (comp_sel_0_bit0 & 0b1ull) << 0u; + return inst; + } - Instruction makeVBW( - Param/*3*/ op1, - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ repeat_count, - Param/*1*/ sync_start, - Param/*1*/ dest_ext, - Param/*1*/ end, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*4*/ mask_count, - Param/*1*/ src2_invert, - Param/*5*/ src2_rot, - Param/*2*/ src2_exth, - Param/*1*/ op2, - Param/*1*/ bitwise_partial, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*7*/ src2_sel, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b01ull << 62u; - inst |= op1 << 59u; - inst |= pred << 56u; - inst |= skipinv << 55u; - inst |= nosched << 54u; - inst |= repeat_count << 53u; - inst |= sync_start << 52u; - inst |= dest_ext << 51u; - inst |= end << 50u; - inst |= src1_ext << 49u; - inst |= src2_ext << 48u; - inst |= mask_count << 44u; - inst |= src2_invert << 43u; - inst |= src2_rot << 38u; - inst |= src2_exth << 36u; - inst |= op2 << 35u; - inst |= bitwise_partial << 34u; - inst |= dest_bank << 32u; - inst |= src1_bank << 30u; - inst |= src2_bank << 28u; - inst |= dest_n << 21u; - inst |= src2_sel << 14u; - inst |= src1_n << 7u; - inst |= src2_n << 0u; - return inst; - } + Instruction makeVBW( + Param/*3*/ op1, + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ repeat_count, + Param/*1*/ sync_start, + Param/*1*/ dest_ext, + Param/*1*/ end, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*4*/ mask_count, + Param/*1*/ src2_invert, + Param/*5*/ src2_rot, + Param/*2*/ src2_exth, + Param/*1*/ op2, + Param/*1*/ bitwise_partial, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src2_sel, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b01ull << 62u; + inst |= (op1 & 0b111ull) << 59u; + inst |= (pred & 0b111ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (nosched & 0b1ull) << 54u; + inst |= (repeat_count & 0b1ull) << 53u; + inst |= (sync_start & 0b1ull) << 52u; + inst |= (dest_ext & 0b1ull) << 51u; + inst |= (end & 0b1ull) << 50u; + inst |= (src1_ext & 0b1ull) << 49u; + inst |= (src2_ext & 0b1ull) << 48u; + inst |= (mask_count & 0b1111ull) << 44u; + inst |= (src2_invert & 0b1ull) << 43u; + inst |= (src2_rot & 0b11111ull) << 38u; + inst |= (src2_exth & 0b11ull) << 36u; + inst |= (op2 & 0b1ull) << 35u; + inst |= (bitwise_partial & 0b1ull) << 34u; + inst |= (dest_bank & 0b11ull) << 32u; + inst |= (src1_bank & 0b11ull) << 30u; + inst |= (src2_bank & 0b11ull) << 28u; + inst |= (dest_n & 0b1111111ull) << 21u; + inst |= (src2_sel & 0b1111111ull) << 14u; + inst |= (src1_n & 0b1111111ull) << 7u; + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeSMP( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ syncstart, - Param/*1*/ minpack, - Param/*1*/ src0_ext, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*2*/ fconv_type, - Param/*2*/ mask_count, - Param/*2*/ dim, - Param/*2*/ lod_mode, - Param/*1*/ dest_use_pa, - Param/*2*/ sb_mode, - Param/*2*/ src0_type, - Param/*1*/ src0_bank, - Param/*2*/ drc_sel, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*7*/ src0_n, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b11100ull << 59u; - inst |= pred << 56u; - inst |= skipinv << 55u; - inst |= nosched << 54u; - inst |= syncstart << 52u; - inst |= minpack << 51u; - inst |= src0_ext << 50u; - inst |= src1_ext << 49u; - inst |= src2_ext << 48u; - inst |= fconv_type << 46u; - inst |= mask_count << 44u; - inst |= dim << 42u; - inst |= lod_mode << 40u; - inst |= dest_use_pa << 39u; - inst |= sb_mode << 37u; - inst |= src0_type << 35u; - inst |= src0_bank << 34u; - inst |= drc_sel << 32u; - inst |= src1_bank << 30u; - inst |= src2_bank << 28u; - inst |= dest_n << 21u; - inst |= src0_n << 14u; - inst |= src1_n << 7u; - inst |= src2_n << 0u; - return inst; - } + Instruction makeSMP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ syncstart, + Param/*1*/ minpack, + Param/*1*/ src0_ext, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*2*/ fconv_type, + Param/*2*/ mask_count, + Param/*2*/ dim, + Param/*2*/ lod_mode, + Param/*1*/ dest_use_pa, + Param/*2*/ sb_mode, + Param/*2*/ src0_type, + Param/*1*/ src0_bank, + Param/*2*/ drc_sel, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b11100ull << 59u; + inst |= (pred & 0b111ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (nosched & 0b1ull) << 54u; + inst |= (syncstart & 0b1ull) << 52u; + inst |= (minpack & 0b1ull) << 51u; + inst |= (src0_ext & 0b1ull) << 50u; + inst |= (src1_ext & 0b1ull) << 49u; + inst |= (src2_ext & 0b1ull) << 48u; + inst |= (fconv_type & 0b11ull) << 46u; + inst |= (mask_count & 0b11ull) << 44u; + inst |= (dim & 0b11ull) << 42u; + inst |= (lod_mode & 0b11ull) << 40u; + inst |= (dest_use_pa & 0b1ull) << 39u; + inst |= (sb_mode & 0b11ull) << 37u; + inst |= (src0_type & 0b11ull) << 35u; + inst |= (src0_bank & 0b1ull) << 34u; + inst |= (drc_sel & 0b11ull) << 32u; + inst |= (src1_bank & 0b11ull) << 30u; + inst |= (src2_bank & 0b11ull) << 28u; + inst |= (dest_n & 0b1111111ull) << 21u; + inst |= (src0_n & 0b1111111ull) << 14u; + inst |= (src1_n & 0b1111111ull) << 7u; + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeVCOMP( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*2*/ dest_type, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*4*/ repeat_count, - Param/*1*/ nosched, - Param/*2*/ op2, - Param/*2*/ src_type, - Param/*2*/ src1_mod, - Param/*2*/ src_comp, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*7*/ dest_n, - Param/*7*/ src1_n, - Param/*4*/ write_mask) { - Instruction inst = 0; - inst |= 0b00110ull << 59u; - inst |= pred << 56u; - inst |= skipinv << 55u; - inst |= dest_type << 53u; - inst |= syncstart << 52u; - inst |= dest_bank_ext << 51u; - inst |= end << 50u; - inst |= src1_bank_ext << 49u; - inst |= repeat_count << 44u; - inst |= nosched << 43u; - inst |= op2 << 41u; - inst |= src_type << 39u; - inst |= src1_mod << 37u; - inst |= src_comp << 35u; - inst |= dest_bank << 32u; - inst |= src1_bank << 30u; - inst |= dest_n << 21u; - inst |= src1_n << 7u; - inst |= write_mask << 0u; - return inst; - } + Instruction makeVCOMP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ dest_type, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*4*/ repeat_count, + Param/*1*/ nosched, + Param/*2*/ op2, + Param/*2*/ src_type, + Param/*2*/ src1_mod, + Param/*2*/ src_comp, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*7*/ dest_n, + Param/*7*/ src1_n, + Param/*4*/ write_mask) { + Instruction inst = 0; + inst |= 0b00110ull << 59u; + inst |= (pred & 0b111ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (dest_type & 0b11ull) << 53u; + inst |= (syncstart & 0b1ull) << 52u; + inst |= (dest_bank_ext & 0b1ull) << 51u; + inst |= (end & 0b1ull) << 50u; + inst |= (src1_bank_ext & 0b1ull) << 49u; + inst |= (repeat_count & 0b1111ull) << 44u; + inst |= (nosched & 0b1ull) << 43u; + inst |= (op2 & 0b11ull) << 41u; + inst |= (src_type & 0b11ull) << 39u; + inst |= (src1_mod & 0b11ull) << 37u; + inst |= (src_comp & 0b11ull) << 35u; + inst |= (dest_bank & 0b11ull) << 32u; + inst |= (src1_bank & 0b11ull) << 30u; + inst |= (dest_n & 0b1111111ull) << 21u; + inst |= (src1_n & 0b1111111ull) << 7u; + inst |= (write_mask & 0b1111ull) << 0u; + return inst; + } - Instruction makeSOP2( - Param/*2*/ pred, - Param/*1*/ cmod1, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*2*/ asel1, - Param/*1*/ dest_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*1*/ cmod2, - Param/*3*/ count, - Param/*1*/ amod1, - Param/*2*/ asel2, - Param/*3*/ csel1, - Param/*3*/ csel2, - Param/*1*/ amod2, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*1*/ src1_mod, - Param/*2*/ cop, - Param/*2*/ aop, - Param/*1*/ asrc1_mod, - Param/*1*/ dest_mod, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b10000ull << 59u; - inst |= pred << 57u; - inst |= cmod1 << 56u; - inst |= skipinv << 55u; - inst |= nosched << 54u; - inst |= asel1 << 52u; - inst |= dest_bank_ext << 51u; - inst |= end << 50u; - inst |= src1_bank_ext << 49u; - inst |= src2_bank_ext << 48u; - inst |= cmod2 << 47u; - inst |= count << 44u; - inst |= amod1 << 43u; - inst |= asel2 << 41u; - inst |= csel1 << 38u; - inst |= csel2 << 35u; - inst |= amod2 << 34u; - inst |= dest_bank << 32u; - inst |= src1_bank << 30u; - inst |= src2_bank << 28u; - inst |= dest_n << 21u; - inst |= src1_mod << 20u; - inst |= cop << 18u; - inst |= aop << 16u; - inst |= asrc1_mod << 15u; - inst |= dest_mod << 14u; - inst |= src1_n << 7u; - inst |= src2_n << 0u; - return inst; - } + Instruction makeSOP2( + Param/*2*/ pred, + Param/*1*/ cmod1, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*2*/ asel1, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*1*/ cmod2, + Param/*3*/ count, + Param/*1*/ amod1, + Param/*2*/ asel2, + Param/*3*/ csel1, + Param/*3*/ csel2, + Param/*1*/ amod2, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ src1_mod, + Param/*2*/ cop, + Param/*2*/ aop, + Param/*1*/ asrc1_mod, + Param/*1*/ dest_mod, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b10000ull << 59u; + inst |= (pred & 0b11ull) << 57u; + inst |= (cmod1 & 0b1ull) << 56u; + inst |= (skipinv & 0b1ull) << 55u; + inst |= (nosched & 0b1ull) << 54u; + inst |= (asel1 & 0b11ull) << 52u; + inst |= (dest_bank_ext & 0b1ull) << 51u; + inst |= (end & 0b1ull) << 50u; + inst |= (src1_bank_ext & 0b1ull) << 49u; + inst |= (src2_bank_ext & 0b1ull) << 48u; + inst |= (cmod2 & 0b1ull) << 47u; + inst |= (count & 0b111ull) << 44u; + inst |= (amod1 & 0b1ull) << 43u; + inst |= (asel2 & 0b11ull) << 41u; + inst |= (csel1 & 0b111ull) << 38u; + inst |= (csel2 & 0b111ull) << 35u; + inst |= (amod2 & 0b1ull) << 34u; + inst |= (dest_bank & 0b11ull) << 32u; + inst |= (src1_bank & 0b11ull) << 30u; + inst |= (src2_bank & 0b11ull) << 28u; + inst |= (dest_n & 0b1111111ull) << 21u; + inst |= (src1_mod & 0b1ull) << 20u; + inst |= (cop & 0b11ull) << 18u; + inst |= (aop & 0b11ull) << 16u; + inst |= (asrc1_mod & 0b1ull) << 15u; + inst |= (dest_mod & 0b1ull) << 14u; + inst |= (src1_n & 0b1111111ull) << 7u; + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeBR( - Param/*3*/ pred, - Param/*1*/ syncend, - Param/*1*/ exception, - Param/*1*/ pwait, - Param/*1*/ sync_ext, - Param/*1*/ nosched, - Param/*1*/ br_monitor, - Param/*1*/ save_link, - Param/*1*/ br_type, - Param/*1*/ any_inst, - Param/*1*/ all_inst, - Param/*20*/ br_off) { - Instruction inst = 0; - inst |= 0b11111ull << 59u; - inst |= pred << 56u; - inst |= syncend << 55u; - inst |= 0b0ull << 54u; - inst |= 0b00ull << 52u; - inst |= exception << 51u; - inst |= pwait << 45u; - inst |= sync_ext << 44u; - inst |= nosched << 43u; - inst |= br_monitor << 42u; - inst |= save_link << 41u; - inst |= 0b00ull << 39u; - inst |= br_type << 38u; - inst |= any_inst << 21u; - inst |= all_inst << 20u; - inst |= br_off << 0u; - return inst; - } + Instruction makeBR( + Param/*3*/ pred, + Param/*1*/ syncend, + Param/*1*/ exception, + Param/*1*/ pwait, + Param/*1*/ sync_ext, + Param/*1*/ nosched, + Param/*1*/ br_monitor, + Param/*1*/ save_link, + Param/*1*/ br_type, + Param/*1*/ any_inst, + Param/*1*/ all_inst, + Param/*20*/ br_off) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= (pred & 0b111ull) << 56u; + inst |= (syncend & 0b1ull) << 55u; + inst |= 0b0ull << 54u; + inst |= 0b00ull << 52u; + inst |= (exception & 0b1ull) << 51u; + inst |= (pwait & 0b1ull) << 45u; + inst |= (sync_ext & 0b1ull) << 44u; + inst |= (nosched & 0b1ull) << 43u; + inst |= (br_monitor & 0b1ull) << 42u; + inst |= (save_link & 0b1ull) << 41u; + inst |= 0b00ull << 39u; + inst |= (br_type & 0b1ull) << 38u; + inst |= (any_inst & 0b1ull) << 21u; + inst |= (all_inst & 0b1ull) << 20u; + inst |= (br_off & 0b11111111111111111111ull) << 0u; + return inst; + } - Instruction makePHAS( - Param/*1*/ sprvv, - Param/*1*/ end, - Param/*1*/ imm, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*1*/ mode, - Param/*1*/ rate_hi, - Param/*1*/ rate_lo_or_nosched, - Param/*3*/ wait_cond, - Param/*8*/ temp_count, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*6*/ exe_addr_high, - Param/*7*/ src1_n_or_exe_addr_mid, - Param/*7*/ src2_n_or_exe_addr_low) { - Instruction inst = 0; - inst |= 0b11111ull << 59u; - inst |= 0b010ull << 56u; - inst |= sprvv << 55u; - inst |= 0b100ull << 52u; - inst |= end << 51u; - inst |= imm << 50u; - inst |= src1_bank_ext << 49u; - inst |= src2_bank_ext << 48u; - inst |= mode << 45u; - inst |= rate_hi << 44u; - inst |= rate_lo_or_nosched << 43u; - inst |= wait_cond << 40u; - inst |= temp_count << 32u; - inst |= src1_bank << 30u; - inst |= src2_bank << 28u; - inst |= exe_addr_high << 14u; - inst |= src1_n_or_exe_addr_mid << 7u; - inst |= src2_n_or_exe_addr_low << 0u; - return inst; - } + Instruction makePHAS( + Param/*1*/ sprvv, + Param/*1*/ end, + Param/*1*/ imm, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*1*/ mode, + Param/*1*/ rate_hi, + Param/*1*/ rate_lo_or_nosched, + Param/*3*/ wait_cond, + Param/*8*/ temp_count, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ exe_addr_high, + Param/*7*/ src1_n_or_exe_addr_mid, + Param/*7*/ src2_n_or_exe_addr_low) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= 0b010ull << 56u; + inst |= (sprvv & 0b1ull) << 55u; + inst |= 0b100ull << 52u; + inst |= (end & 0b1ull) << 51u; + inst |= (imm & 0b1ull) << 50u; + inst |= (src1_bank_ext & 0b1ull) << 49u; + inst |= (src2_bank_ext & 0b1ull) << 48u; + inst |= (mode & 0b1ull) << 45u; + inst |= (rate_hi & 0b1ull) << 44u; + inst |= (rate_lo_or_nosched & 0b1ull) << 43u; + inst |= (wait_cond & 0b111ull) << 40u; + inst |= (temp_count & 0b11111111ull) << 32u; + inst |= (src1_bank & 0b11ull) << 30u; + inst |= (src2_bank & 0b11ull) << 28u; + inst |= (exe_addr_high & 0b111111ull) << 14u; + inst |= (src1_n_or_exe_addr_mid & 0b1111111ull) << 7u; + inst |= (src2_n_or_exe_addr_low & 0b1111111ull) << 0u; + return inst; + } - Instruction makeNOP() { - Instruction inst = 0; - inst |= 0b11111ull << 59u; - inst |= 0b0ull << 54u; - inst |= 0b00ull << 52u; - inst |= 0b101ull << 38u; - return inst; - } + Instruction makeNOP() { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= 0b0ull << 54u; + inst |= 0b00ull << 52u; + inst |= 0b101ull << 38u; + return inst; + } - Instruction makeSMLSI( - Param/*1*/ nosched, - Param/*4*/ temp_limit, - Param/*4*/ pa_limit, - Param/*4*/ sa_limit, - Param/*1*/ dest_inc_mode, - Param/*1*/ src0_inc_mode, - Param/*1*/ src1_inc_mode, - Param/*1*/ src2_inc_mode, - Param/*8*/ dest_inc, - Param/*8*/ src0_inc, - Param/*8*/ src1_inc, - Param/*8*/ src2_inc) { - Instruction inst = 0; - inst |= 0b11111ull << 59u; - inst |= 0b010ull << 56u; - inst |= 0b01ull << 52u; - inst |= nosched << 50u; - inst |= temp_limit << 44u; - inst |= pa_limit << 40u; - inst |= sa_limit << 36u; - inst |= dest_inc_mode << 35u; - inst |= src0_inc_mode << 34u; - inst |= src1_inc_mode << 33u; - inst |= src2_inc_mode << 32u; - inst |= dest_inc << 24u; - inst |= src0_inc << 16u; - inst |= src1_inc << 8u; - inst |= src2_inc << 0u; - return inst; - } + Instruction makeSMLSI( + Param/*1*/ nosched, + Param/*4*/ temp_limit, + Param/*4*/ pa_limit, + Param/*4*/ sa_limit, + Param/*1*/ dest_inc_mode, + Param/*1*/ src0_inc_mode, + Param/*1*/ src1_inc_mode, + Param/*1*/ src2_inc_mode, + Param/*8*/ dest_inc, + Param/*8*/ src0_inc, + Param/*8*/ src1_inc, + Param/*8*/ src2_inc) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= 0b010ull << 56u; + inst |= 0b01ull << 52u; + inst |= (nosched & 0b1ull) << 50u; + inst |= (temp_limit & 0b1111ull) << 44u; + inst |= (pa_limit & 0b1111ull) << 40u; + inst |= (sa_limit & 0b1111ull) << 36u; + inst |= (dest_inc_mode & 0b1ull) << 35u; + inst |= (src0_inc_mode & 0b1ull) << 34u; + inst |= (src1_inc_mode & 0b1ull) << 33u; + inst |= (src2_inc_mode & 0b1ull) << 32u; + inst |= (dest_inc & 0b11111111ull) << 24u; + inst |= (src0_inc & 0b11111111ull) << 16u; + inst |= (src1_inc & 0b11111111ull) << 8u; + inst |= (src2_inc & 0b11111111ull) << 0u; + return inst; + } - Instruction makeEMIT( - Param/*2*/ sideband_high, - Param/*1*/ src0_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*2*/ target, - Param/*1*/ task_start_or_mte_hi, - Param/*1*/ task_end_or_mte_lo, - Param/*1*/ nosched, - Param/*6*/ sideband_mid, - Param/*1*/ src0_bank, - Param/*2*/ incp, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*6*/ sideband_low, - Param/*1*/ freep, - Param/*7*/ src0_n, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b11111ull << 59u; - inst |= 0b011ull << 56u; - inst |= sideband_high << 54u; - inst |= 0b10ull << 52u; - inst |= src0_bank_ext << 51u; - inst |= end << 50u; - inst |= src1_bank_ext << 49u; - inst |= src2_bank_ext << 48u; - inst |= target << 46u; - inst |= task_start_or_mte_hi << 45u; - inst |= task_end_or_mte_lo << 44u; - inst |= nosched << 43u; - inst |= sideband_mid << 35u; - inst |= src0_bank << 34u; - inst |= incp << 32u; - inst |= src1_bank << 30u; - inst |= src2_bank << 28u; - inst |= sideband_low << 22u; - inst |= freep << 21u; - inst |= src0_n << 14u; - inst |= src1_n << 7u; - inst |= src2_n << 0u; - return inst; - } + Instruction makeEMIT( + Param/*2*/ sideband_high, + Param/*1*/ src0_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*2*/ target, + Param/*1*/ task_start_or_mte_hi, + Param/*1*/ task_end_or_mte_lo, + Param/*1*/ nosched, + Param/*6*/ sideband_mid, + Param/*1*/ src0_bank, + Param/*2*/ incp, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ sideband_low, + Param/*1*/ freep, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= 0b011ull << 56u; + inst |= (sideband_high & 0b11ull) << 54u; + inst |= 0b10ull << 52u; + inst |= (src0_bank_ext & 0b1ull) << 51u; + inst |= (end & 0b1ull) << 50u; + inst |= (src1_bank_ext & 0b1ull) << 49u; + inst |= (src2_bank_ext & 0b1ull) << 48u; + inst |= (target & 0b11ull) << 46u; + inst |= (task_start_or_mte_hi & 0b1ull) << 45u; + inst |= (task_end_or_mte_lo & 0b1ull) << 44u; + inst |= (nosched & 0b1ull) << 43u; + inst |= (sideband_mid & 0b111111ull) << 35u; + inst |= (src0_bank & 0b1ull) << 34u; + inst |= (incp & 0b11ull) << 32u; + inst |= (src1_bank & 0b11ull) << 30u; + inst |= (src2_bank & 0b11ull) << 28u; + inst |= (sideband_low & 0b111111ull) << 22u; + inst |= (freep & 0b1ull) << 21u; + inst |= (src0_n & 0b1111111ull) << 14u; + inst |= (src1_n & 0b1111111ull) << 7u; + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeSPEC( - Param/*1*/ special, - Param/*2*/ category) { - Instruction inst = 0; - inst |= 0b11111ull << 59u; - inst |= special << 54u; - inst |= category << 52u; - return inst; - } + Instruction makeSPEC( + Param/*1*/ special, + Param/*2*/ category) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= (special & 0b1ull) << 54u; + inst |= (category & 0b11ull) << 52u; + return inst; + } } diff --git a/src/gxp/src/usse.cpp b/src/gxp/src/usse.cpp index f8b1ced..9a11d16 100644 --- a/src/gxp/src/usse.cpp +++ b/src/gxp/src/usse.cpp @@ -216,14 +216,6 @@ namespace usse { } } - usse::RegisterReference RegisterReference::operator+(uint32_t value) { - usse::RegisterReference ref = *this; - - ref.index += value; - - return ref; - } - uint32_t RegisterReference::getSwizzleMask() { uint32_t mask = 0; @@ -271,46 +263,44 @@ namespace usse { } } + uint32_t RegisterReference::getEffectiveIndex() { + return index + static_cast(swizzle[0]); + } + RegisterReference RegisterReference::getHalf(uint32_t half) { uint32_t width = (type.components - 1) / 2 + 1; return getComponents(width * half, width); } - usse::RegisterReference RegisterReference::getComponents(uint32_t component, uint32_t count) { -// if (component + count > type.components) -// throw std::runtime_error(fmt::format( -// "Tried to get component {} (size: {}) on a register with only {} components.", -// component, count, type.components)); - // Other restrictions, can't do size > 2, can't do .yz swizzle... - - usse::RegisterReference ref = *this; + RegisterReference RegisterReference::getComponents(uint32_t component, uint32_t count) { + RegisterReference reg = *this; int32_t swizzleOffset = 0; if (component >= 2) { - ref.index += 2; + reg.index += 2; swizzleOffset = -2; } - ref.type.components = count; - ref.type.arraySize = 1; + reg.type.components = count; + reg.type.arraySize = 1; - ref.swizzle = usse::getSwizzleVec4All(SwizzleChannel::DontCare); + reg.swizzle = usse::getSwizzleVec4All(SwizzleChannel::DontCare); for (uint32_t a = 0; a < count; a++) { if (lockSwizzle) - ref.swizzle[a] = swizzle[component + a]; + reg.swizzle[a] = swizzle[component + a]; else - ref.swizzle[a] = static_cast(component + a + swizzleOffset); + reg.swizzle[a] = static_cast(component + a + swizzleOffset); } - return ref; + return reg; } RegisterReference RegisterReference::getElement(uint32_t element) { if (element >= type.arraySize) throw std::runtime_error("Register reference array out of bounds."); - usse::RegisterReference reg = *this; + RegisterReference reg = *this; reg.type.arraySize = 1; reg.size = size / type.arraySize; @@ -319,6 +309,16 @@ namespace usse { return reg; } + RegisterReference RegisterReference::getExpanded(uint32_t count) { + RegisterReference reg = *this; + + reg.lockSwizzle = true; + reg.swizzle = usse::getSwizzleVec4All(swizzle[0]); + reg.type.components = count; + + return reg; + } + RegisterReference::RegisterReference(DataType type, RegisterBank bank, uint32_t regIndex) : type(type), bank(bank), size(getTypeSize(type.type) * type.components * type.arraySize / 4) { bool swizzleUp = false; diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp index 9e4bc18..6ea8c4a 100644 --- a/src/translator/src/codes.cpp +++ b/src/translator/src/codes.cpp @@ -374,10 +374,7 @@ void CompilerGXP::extGLSLNormalize(const TranslatorArguments &arguments) { arguments.block.createDot(temporary, temporary, magnitude); arguments.block.createReverseSquareRoot(magnitude, magnitude); - magnitude.swizzle = usse::getSwizzleVec4All(usse::SwizzleChannel::X); - magnitude.lockSwizzle = true; - magnitude.type.components = source.type.components; - arguments.block.createMul(temporary, magnitude, destination); + arguments.block.createMul(temporary, magnitude.getExpanded(source.type.components), destination); builder.freeRegister(magnitude); builder.freeRegister(temporary); @@ -428,17 +425,13 @@ void CompilerGXP::extGLSLReflect(const TranslatorArguments &arguments) { usse::RegisterReference internal = builder.allocateRegister( usse::RegisterBank::Internal, second.type); usse::RegisterReference magnitude = builder.allocateRegister( - usse::RegisterBank::Internal, { usse::Type::Float32, 1, 1 }); + usse::RegisterBank::Internal, { usse::Type::Float32, 1, 1 }) + .getExpanded(second.type.components); usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, second.type); - magnitude.swizzle = usse::getSwizzleVec4All(usse::SwizzleChannel::X); - magnitude.lockSwizzle = true; - - usse::RegisterReference two({ usse::Type::Float32, 1, 1 }, - usse::RegisterBank::FloatConstant, usse::getFPConstantIndex(2)); - two.swizzle = usse::getSwizzleVec4All(usse::SwizzleChannel::X); - two.lockSwizzle = true; + usse::RegisterReference two = usse::RegisterReference({ usse::Type::Float32, 1, 1 }, + usse::RegisterBank::FloatConstant, usse::getFPConstantIndex(2)).getExpanded(1); arguments.block.createPack(second, internal); arguments.block.createDot(first, internal, magnitude); @@ -463,7 +456,7 @@ void CompilerGXP::extGLSLPow(const TranslatorArguments &arguments) { usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, { usse::Type::Float32, 1, 1 }); - // Thank you xyz for doing my math homework. + // Thank you xyz for fixing my Vita related problem. // e^(b*log(a)) arguments.block.createLog(first, destination); From 5c82cbfba8ed4e36e44f65dfb9d541b9ce05fd28 Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Fri, 18 Oct 2019 15:24:53 -0400 Subject: [PATCH 12/19] Add config and project cleanup --- src/gxp/CMakeLists.txt | 4 +- src/gxp/include/gxp/block.h | 64 ++ src/gxp/include/gxp/builder.h | 59 +- src/gxp/include/gxp/usse.h | 2 + src/gxp/src/block.cpp | 490 +++++++++++++++ src/gxp/src/builder.cpp | 580 +----------------- src/interface/include/interface/interface.h | 4 + src/interface/src/interface.cpp | 9 +- src/translator/CMakeLists.txt | 1 + src/translator/include/translator/config.h | 7 + .../include/translator/translator.h | 9 +- src/translator/src/codes.cpp | 2 + src/translator/src/translator.cpp | 3 +- src/util/include/util/util.h | 5 + 14 files changed, 613 insertions(+), 626 deletions(-) create mode 100644 src/gxp/include/gxp/block.h create mode 100644 src/gxp/src/block.cpp create mode 100644 src/translator/include/translator/config.h diff --git a/src/gxp/CMakeLists.txt b/src/gxp/CMakeLists.txt index 508fce9..9c1bd0c 100644 --- a/src/gxp/CMakeLists.txt +++ b/src/gxp/CMakeLists.txt @@ -3,12 +3,14 @@ add_library(gxp include/gxp/instructions.h include/gxp/disasm.h include/gxp/gxp.h + include/gxp/block.h include/gxp/builder.h src/usse.cpp src/instructions.cpp src/disasm.cpp src/gxp.cpp - src/builder.cpp) + src/builder.cpp + src/block.cpp) target_include_directories(gxp PUBLIC include) target_link_libraries(gxp PUBLIC util) diff --git a/src/gxp/include/gxp/block.h b/src/gxp/include/gxp/block.h new file mode 100644 index 0000000..7c108bc --- /dev/null +++ b/src/gxp/include/gxp/block.h @@ -0,0 +1,64 @@ +#pragma once + +#include + +#include + +namespace gxp { + class Builder; + + class Block { + std::vector instructions; + + void printDisassembly(const std::string &name, + const std::vector &sources, + const usse::RegisterReference *destination = nullptr); + + explicit Block(Builder &parent); + + friend class gxp::Builder; + public: + Builder &parent; + + void createNop(); + void createMov( + usse::RegisterReference source, + usse::RegisterReference destination); + void createPack( + usse::RegisterReference source, + usse::RegisterReference destination); + void createDot( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination); + void createAdd( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination); + void createSub( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination); + void createMul( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination); + void createExp( + usse::RegisterReference source, + usse::RegisterReference destination); + void createLog( + usse::RegisterReference source, + usse::RegisterReference destination); + void createReverseSquareRoot( + usse::RegisterReference source, + usse::RegisterReference destination); + void createMin( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination); + void createMax( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination); + }; +} diff --git a/src/gxp/include/gxp/builder.h b/src/gxp/include/gxp/builder.h index 5bd509f..ec0d1d0 100644 --- a/src/gxp/include/gxp/builder.h +++ b/src/gxp/include/gxp/builder.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -8,63 +9,11 @@ namespace gxp { typedef uint64_t Instruction; - class Builder; - enum class ShaderType : uint8_t { Vertex = 0, Fragment = 1, }; - class Block { - std::vector instructions; - - explicit Block(Builder &parent); - friend class gxp::Builder; - public: - Builder &parent; - - void createNop(); - void createMov( - usse::RegisterReference source, - usse::RegisterReference destination); - void createPack( - usse::RegisterReference source, - usse::RegisterReference destination); - void createDot( - usse::RegisterReference first, - usse::RegisterReference second, - usse::RegisterReference destination); - void createAdd( - usse::RegisterReference first, - usse::RegisterReference second, - usse::RegisterReference destination); - void createSub( - usse::RegisterReference first, - usse::RegisterReference second, - usse::RegisterReference destination); - void createMul( - usse::RegisterReference first, - usse::RegisterReference second, - usse::RegisterReference destination); - void createExp( - usse::RegisterReference source, - usse::RegisterReference destination); - void createLog( - usse::RegisterReference source, - usse::RegisterReference destination); - void createReverseSquareRoot( - usse::RegisterReference source, - usse::RegisterReference destination); - void createMin( - usse::RegisterReference first, - usse::RegisterReference second, - usse::RegisterReference destination); - void createMax( - usse::RegisterReference first, - usse::RegisterReference second, - usse::RegisterReference destination); - }; - class Parameter { public: std::string name; @@ -84,6 +33,9 @@ namespace gxp { ProgramHeader header; ProgramVaryings varyings; + bool printDisassembly = false; + bool printAllocations = false; + uint32_t paRegPointer = 0; uint32_t saRegPointer = 0; uint32_t oRegPointer = 0; @@ -94,6 +46,8 @@ namespace gxp { std::vector> secondaryBlocks; std::vector parameters; std::vector fragmentInputs; + + friend class Block; public: void setType(ShaderType type); ShaderType getType(); @@ -116,5 +70,6 @@ namespace gxp { std::vector build(); Builder(); + Builder(bool printDisassembly, bool printAllocations); }; } diff --git a/src/gxp/include/gxp/usse.h b/src/gxp/include/gxp/usse.h index 3c93089..e2e565e 100644 --- a/src/gxp/include/gxp/usse.h +++ b/src/gxp/include/gxp/usse.h @@ -6,6 +6,8 @@ #include namespace usse { + typedef uint64_t Instruction; + enum class RegisterBank { Temporary, Primary, diff --git a/src/gxp/src/block.cpp b/src/gxp/src/block.cpp new file mode 100644 index 0000000..58105e8 --- /dev/null +++ b/src/gxp/src/block.cpp @@ -0,0 +1,490 @@ +#include + +#include +#include +#include + +#include + +namespace gxp { + void Block::printDisassembly(const std::string &name, + const std::vector &sources, + const usse::RegisterReference *destination) { + if (parent.printDisassembly) + fmt::print("[disasm] {}\n", usse::disasm::disassemble(name, sources, destination)); + } + + void Block::createNop() { + usse::disasm::disassemble("nop", { }); + instructions.push_back(usse::makeNOP()); + } + + void Block::createMov( + usse::RegisterReference source, + usse::RegisterReference destination) { + usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + assert(source.type.components <= 2 && destination.type.components <= 2); + + printDisassembly("mov", { source }, &destination); + instructions.push_back(usse::makeVMOV( + 0, // pred + 0, // skipinv + 0, // test_bit_2 + 0, // src0_comp_sel + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + 0, // end_or_src0_bank_ext + srcBankLayout.extension, // src1_bank_ext + 0, // src2_bank_ext + 0, // move_type + 0, // repeat_count + 0, // nosched + static_cast(destination.type.type) & 0b111u, // move_data_type + 0, // test_bit_1 + source.getSwizzleIndex(), // src0_swiz + 0, // src0_bank_sel + destBankLayout.number, // dest_bank_sel + srcBankLayout.number, // src1_bank_sel + 0, // src2_bank_sel + destination.getSwizzleMask(), // dest_mask + destBankLayout.getIndex(destination), // dest_n + 0, // src0_n + srcBankLayout.getIndex(source), // src1_n + 0 // src2_n + )); + } + + void Block::createPack( + usse::RegisterReference source, + usse::RegisterReference destination) { + usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + if (destination.type.type == usse::Type::Signed32 || destination.type.type == usse::Type::Unsigned32 + || source.type.type == usse::Type::Signed32 || source.type.type == usse::Type::Unsigned32) + throw std::runtime_error("Cannot pack S32/U32 type."); + + usse::Param typeTable[] = { + 1, // Signed8 + 4, // Signed16 + 0, // Signed32 - Unsupported + 7, // Fixed10 + 5, // Float16 + 6, // Float32 + 0, // Unsigned8 + 3, // Unsigned16 + 0, // Unsigned32 - Unsupported + 2, // Output8 + }; + + printDisassembly("pck", { source }, &destination); + instructions.push_back(usse::makeVPCK( + 0, // pred + 0, // skipinv + 0, // nosched + 0, // unknown + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + 0, // end + srcBankLayout.extension, // src1_bank_ext + srcBankLayout.extension, // src2_bank_ext + 0, // repeat_count + typeTable[static_cast(source.type.type)], // src_fmt + typeTable[static_cast(destination.type.type)], // dest_fmt + destination.getSwizzleMask(), // dest_mask + destBankLayout.number, // dest_bank_sel + srcBankLayout.number, // src1_bank_sel + srcBankLayout.number, // src2_bank_sel + destBankLayout.getIndex(destination), // dest_n + destination.type.components > 3 ? static_cast(destination.swizzle[3]) & 0b11u : 0, // comp_sel_3 + false, // scale + static_cast(destination.swizzle[1]) & 0b11u, // comp_sel_1 + static_cast(destination.swizzle[2]) & 0b11u, // comp_sel_2 + srcBankLayout.getIndex(source.getHalf(0)), // src1_n + static_cast(destination.swizzle[0]) & 0b10u >> 1u, // comp0_sel_bit1 + srcBankLayout.getIndex(source.getHalf(1)), // src2_n + static_cast(destination.swizzle[0]) & 0b01u // comp_sel_0_bit0 + )); + } + + + void Block::createDot( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination) { + usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + assert(second.bank == usse::RegisterBank::Internal); + + printDisassembly("dot", { first, second }, &destination); + instructions.push_back(usse::makeVDP( + 0, // pred + 0, // skipinv + 0, // clip_plane_enable + first.type.components == 4, // opcode2 + destBankLayout.extension, // dest_use_bank_ext + 0, // end + firstBankLayout.extension, // src1_bank_ext + 3, /* Seems to be the normal value... */ // increment_mode + 0, // gpi0_abs + 0, // repeat_count + 0, // nosched + destination.getSwizzleMask(), // write_mask + 0, // src1_neg + 0, // src1_abs + 0, // clip_plane_n + destBankLayout.number, // dest_bank + firstBankLayout.number, // src1_bank + second.index, // gpi0_n + destBankLayout.getIndex(destination), // dest_n + second.getSwizzleIndex(), // gpi0_swiz + first.type.components > 3 ? static_cast(first.swizzle[3]) : 0, // src1_swiz_w + static_cast(first.swizzle[2]), // src1_swiz_z + static_cast(first.swizzle[1]), // src1_swiz_y + static_cast(first.swizzle[0]), // src1_swiz_x + firstBankLayout.getIndex(first) // src1_n + )); + } + + void Block::createAdd( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination) { + usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); + usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + uint32_t shift = 0; + uint32_t firstSwizzle = 0; + for (usse::SwizzleChannel channel : first.swizzle) { + firstSwizzle |= static_cast(channel) << shift; + shift += 3; + } + + printDisassembly("add", { first, second }, &destination); + instructions.push_back(usse::makeVNMAD32( + 0, // pred + 0, // skipinv + (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 + secondBankLayout.extension, // src1_bank_ext + firstBankLayout.extension, // src2_bank_ext + second.getSwizzleIndex(), // src2_swiz + 0, // nosched + destination.getSwizzleMask(), // dest_mask + 0b00, // src1_mod + 0b0, // src2_mod + (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 + destBankLayout.number, // dest_bank_sel + secondBankLayout.number, // src1_bank_sel + firstBankLayout.number, // src2_bank_sel + destBankLayout.getIndex(destination), // dest_n + (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 + static_cast(usse::InstructionVNMADOp::Add), // op2 + secondBankLayout.getIndex(second), // src1_n + firstBankLayout.getIndex(first) // src2_n + )); + } + + void Block::createSub( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination) { + usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); + usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + uint32_t shift = 0; + uint32_t firstSwizzle = 0; + for (usse::SwizzleChannel channel : first.swizzle) { + firstSwizzle |= static_cast(channel) << shift; + shift += 3; + } + + // First/Second sources are flipped so negative effect can be applied to src1. -x + y = y - x + printDisassembly("sub", { first, second }, &destination); + instructions.push_back(usse::makeVNMAD32( + 0, // pred + 0, // skipinv + (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 + secondBankLayout.extension, // src1_bank_ext + firstBankLayout.extension, // src2_bank_ext + second.getSwizzleIndex(), // src2_swiz + 0, // nosched + destination.getSwizzleMask(), // dest_mask + 0b01, // src1_mod + 0b0, // src2_mod + (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 + destBankLayout.number, // dest_bank_sel + secondBankLayout.number, // src1_bank_sel + firstBankLayout.number, // src2_bank_sel + destBankLayout.getIndex(destination), // dest_n + (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 + static_cast(usse::InstructionVNMADOp::Add), // op2 + secondBankLayout.getIndex(second), // src1_n + firstBankLayout.getIndex(first) // src2_n + )); + } + + void Block::createMul( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination) { + usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); + usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + uint32_t shift = 0; + uint32_t firstSwizzle = 0; + for (usse::SwizzleChannel channel : first.swizzle) { + firstSwizzle |= static_cast(channel) << shift; + shift += 3; + } + + printDisassembly("mul", { first, second }, &destination); + instructions.push_back(usse::makeVNMAD32( + 0, // pred + 0, // skipinv + (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 + firstBankLayout.extension, // src1_bank_ext + secondBankLayout.extension, // src2_bank_ext + second.getSwizzleIndex(), // src2_swiz + 0, // nosched + destination.getSwizzleMask(), // dest_mask + 0b00, // src1_mod + 0b0, // src2_mod + (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 + destBankLayout.number, // dest_bank_sel + firstBankLayout.number, // src1_bank_sel + secondBankLayout.number, // src2_bank_sel + destBankLayout.getIndex(destination), // dest_n + (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 + static_cast(usse::InstructionVNMADOp::Multiply), // op2 + firstBankLayout.getIndex(first), // src1_n + secondBankLayout.getIndex(second) // src2_n + )); + } + + void Block::createExp( + usse::RegisterReference source, + usse::RegisterReference destination) { + usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + usse::Param typeTable[] = { + 0, // Signed8 - Unsupported + 0, // Signed16 - Unsupported + 0, // Signed32 - Unsupported + 2, // Fixed10 + 1, // Float16 + 0, // Float32 + 0, // Unsigned8 - Unsupported + 0, // Unsigned16 - Unsupported + 0, // Unsigned32 - Unsupported + 0, // Output8 - Unsupported + }; + + printDisassembly("exp", { source }, &destination); + instructions.push_back(usse::makeVCOMP( + 0, // pred + 0, // skipinv + typeTable[static_cast(destination.type.type)], // dest_type + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + 0, // end + srcBankLayout.extension, // src1_bank_ext + 0, // repeat_count + 0, // nosched + static_cast(usse::InstructionVCOMPOp::Exponent), // op2 + typeTable[static_cast(source.type.type)], // src_type + 0b00, // src1_mod + static_cast(source.swizzle[0]), // src_comp + destBankLayout.number, // dest_bank + srcBankLayout.number, // src1_bank + destBankLayout.getIndex(destination), // dest_n + srcBankLayout.getIndex(source), // src1_n + destination.getSwizzleMask() // write_mask + )); + } + + void Block::createLog( + usse::RegisterReference source, + usse::RegisterReference destination) { + usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + usse::Param typeTable[] = { + 0, // Signed8 - Unsupported + 0, // Signed16 - Unsupported + 0, // Signed32 - Unsupported + 2, // Fixed10 + 1, // Float16 + 0, // Float32 + 0, // Unsigned8 - Unsupported + 0, // Unsigned16 - Unsupported + 0, // Unsigned32 - Unsupported + 0, // Output8 - Unsupported + }; + + printDisassembly("log", { source }, &destination); + instructions.push_back(usse::makeVCOMP( + 0, // pred + 0, // skipinv + typeTable[static_cast(destination.type.type)], // dest_type + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + 0, // end + srcBankLayout.extension, // src1_bank_ext + 0, // repeat_count + 0, // nosched + static_cast(usse::InstructionVCOMPOp::Logarithm), // op2 + typeTable[static_cast(source.type.type)], // src_type + 0b00, // src1_mod + static_cast(source.swizzle[0]), // src_comp + destBankLayout.number, // dest_bank + srcBankLayout.number, // src1_bank + destBankLayout.getIndex(destination), // dest_n + srcBankLayout.getIndex(source), // src1_n + destination.getSwizzleMask() // write_mask + )); + } + + void Block::createReverseSquareRoot( + usse::RegisterReference source, + usse::RegisterReference destination) { + usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + usse::Param typeTable[] = { + 0, // Signed8 - Unsupported + 0, // Signed16 - Unsupported + 0, // Signed32 - Unsupported + 2, // Fixed10 + 1, // Float16 + 0, // Float32 + 0, // Unsigned8 - Unsupported + 0, // Unsigned16 - Unsupported + 0, // Unsigned32 - Unsupported + 0, // Output8 - Unsupported + }; + + printDisassembly("rsq", { source }, &destination); + instructions.push_back(usse::makeVCOMP( + 0, // pred + 0, // skipinv + typeTable[static_cast(destination.type.type)], // dest_type + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + 0, // end + srcBankLayout.extension, // src1_bank_ext + 0, // repeat_count + 0, // nosched + static_cast(usse::InstructionVCOMPOp::ReverseSquareRoot), // op2 + typeTable[static_cast(source.type.type)], // src_type + 0b00, // src1_mod + static_cast(source.swizzle[0]), // src_comp + destBankLayout.number, // dest_bank + srcBankLayout.number, // src1_bank + destBankLayout.getIndex(destination), // dest_n + srcBankLayout.getIndex(source), // src1_n + destination.getSwizzleMask() // write_mask + )); + } + + void Block::createMin( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination) { + usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); + usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + uint32_t shift = 0; + uint32_t firstSwizzle = 0; + for (usse::SwizzleChannel channel : first.swizzle) { + firstSwizzle |= static_cast(channel) << shift; + shift += 3; + } + + printDisassembly("min", { first, second }, &destination); + instructions.push_back(usse::makeVNMAD32( + 0, // pred + 0, // skipinv + (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 + firstBankLayout.extension, // src1_bank_ext + secondBankLayout.extension, // src2_bank_ext + second.getSwizzleIndex(), // src2_swiz + 0, // nosched + destination.getSwizzleMask(), // dest_mask + 0b00, // src1_mod + 0b0, // src2_mod + (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 + destBankLayout.number, // dest_bank_sel + firstBankLayout.number, // src1_bank_sel + secondBankLayout.number, // src2_bank_sel + destBankLayout.getIndex(destination), // dest_n + (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 + static_cast(usse::InstructionVNMADOp::Min), // op2 + firstBankLayout.getIndex(first), // src1_n + secondBankLayout.getIndex(second) // src2_n + )); + } + + void Block::createMax( + usse::RegisterReference first, + usse::RegisterReference second, + usse::RegisterReference destination) { + usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); + usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); + usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); + + uint32_t shift = 0; + uint32_t firstSwizzle = 0; + for (usse::SwizzleChannel channel : first.swizzle) { + firstSwizzle |= static_cast(channel) << shift; + shift += 3; + } + + printDisassembly("max", { first, second }, &destination); + instructions.push_back(usse::makeVNMAD32( + 0, // pred + 0, // skipinv + (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 + 0, // syncstart + destBankLayout.extension, // dest_bank_ext + (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 + firstBankLayout.extension, // src1_bank_ext + secondBankLayout.extension, // src2_bank_ext + second.getSwizzleIndex(), // src2_swiz + 0, // nosched + destination.getSwizzleMask(), // dest_mask + 0b00, // src1_mod + 0b0, // src2_mod + (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 + destBankLayout.number, // dest_bank_sel + firstBankLayout.number, // src1_bank_sel + secondBankLayout.number, // src2_bank_sel + destBankLayout.getIndex(destination), // dest_n + (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 + static_cast(usse::InstructionVNMADOp::Max), // op2 + firstBankLayout.getIndex(first), // src1_n + secondBankLayout.getIndex(second) // src2_n + )); + } + + Block::Block(gxp::Builder &parent) : parent(parent) { } +} diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp index 8be3916..eac8d98 100644 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -1,8 +1,9 @@ #include #include -#include +#include #include +#include #include @@ -11,95 +12,6 @@ namespace gxp { constexpr uint16_t containerIndexSA = 14; -// class MovRegisterData { -// public: -// uint8_t destMask = 0; -// int32_t swizzleIndex = 0; -// usse::RegisterReference source; -// usse::RegisterReference destination; -// }; -// -// std::vector splitRegisterF32(usse::RegisterReference source, usse::RegisterReference destination) { -// std::vector data; -// -// uint32_t destMask = destination.getSwizzleMask(); -// -// for (uint32_t a = 0; a < 2; a++) { -// uint8_t mask = (destMask & (0b11u << (a * 2))) >> (a * 2); -// -// usse::RegisterReference sourceHalf = source.getComponents(a * 2, 2); -// usse::RegisterReference destinationHalf = destination.getComponents(a * 2, 2); -// -// if (mask & 0b01u) { -// if (mask & 0b10u) { -// if (usse::areSwizzlesInMatchingHalf(sourceHalf.swizzle[0], sourceHalf.swizzle[1])) { -// data.push_back({ -// mask, -// usse::getSwizzleVec4Index({ -// sourceHalf.swizzle[0], -// sourceHalf.swizzle[1], -// usse::SwizzleChannel::DontCare, -// usse::SwizzleChannel::DontCare, -// }), -// source.getComponents(a * 2, 2), // is this source index right? -// destination.getComponents(a * 2, 2) -// }); -// } else { -// // X and Y swizzle seperately -// data.push_back({ -// 0b01, -// usse::getSwizzleVec4Index({ -// sourceHalf.swizzle[0], -// usse::SwizzleChannel::DontCare, -// usse::SwizzleChannel::DontCare, -// usse::SwizzleChannel::DontCare, -// }), -// source.getComponents(a * 2, 2), // copy both components anyway -// destination.getComponents(a * 2, 2) -// }); -// data.push_back({ -// 0b10, -// usse::getSwizzleVec4Index({ -// usse::SwizzleChannel::DontCare, -// sourceHalf.swizzle[1], -// usse::SwizzleChannel::DontCare, -// usse::SwizzleChannel::DontCare, -// }), -// source.getComponents(a * 2, 2), // copy both components anyway -// destination.getComponents(a * 2, 2) -// }); -// } -// } else { -// data.push_back({ -// 0b01, -// usse::getSwizzleVec4Index({ -// sourceHalf.swizzle[0], -// usse::SwizzleChannel::DontCare, -// usse::SwizzleChannel::DontCare, -// usse::SwizzleChannel::DontCare, -// }), -// source.getComponents(a * 2, 2), // copy both components anyway -// destination.getComponents(a * 2, 2) -// }); -// } -// } else if (mask & 0b10u) { -// data.push_back({ -// 0b10, -// usse::getSwizzleVec4Index({ -// usse::SwizzleChannel::DontCare, -// sourceHalf.swizzle[1], -// usse::SwizzleChannel::DontCare, -// usse::SwizzleChannel::DontCare, -// }), -// source.getComponents(a * 2, 2), // copy both components anyway -// destination.getComponents(a * 2, 2) -// }); -// } -// } -// -// return data; -// } - void Builder::setType(gxp::ShaderType type) { header.type = static_cast(type); } @@ -108,480 +20,6 @@ namespace gxp { return static_cast(header.type); } - void Block::createNop() { - usse::disasm::disassemble("nop", { }); - instructions.push_back(usse::makeNOP()); - } - - void Block::createMov( - usse::RegisterReference source, - usse::RegisterReference destination) { - usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); - usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - - assert(source.type.components <= 2 && destination.type.components <= 2); - - fmt::print("{}\n", usse::disasm::disassemble("mov", { source }, &destination)); - instructions.push_back(usse::makeVMOV( - 0, // pred - 0, // skipinv - 0, // test_bit_2 - 0, // src0_comp_sel - 0, // syncstart - destBankLayout.extension, // dest_bank_ext - 0, // end_or_src0_bank_ext - srcBankLayout.extension, // src1_bank_ext - 0, // src2_bank_ext - 0, // move_type - 0, // repeat_count - 0, // nosched - static_cast(destination.type.type) & 0b111u, // move_data_type - 0, // test_bit_1 - source.getSwizzleIndex(), // src0_swiz - 0, // src0_bank_sel - destBankLayout.number, // dest_bank_sel - srcBankLayout.number, // src1_bank_sel - 0, // src2_bank_sel - destination.getSwizzleMask(), // dest_mask - destBankLayout.getIndex(destination), // dest_n - 0, // src0_n - srcBankLayout.getIndex(source), // src1_n - 0 // src2_n - )); - } - - void Block::createPack( - usse::RegisterReference source, - usse::RegisterReference destination) { - usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); - usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - - if (destination.type.type == usse::Type::Signed32 || destination.type.type == usse::Type::Unsigned32 - || source.type.type == usse::Type::Signed32 || source.type.type == usse::Type::Unsigned32) - throw std::runtime_error("Cannot pack S32/U32 type."); - - usse::Param typeTable[] = { - 1, // Signed8 - 4, // Signed16 - 0, // Signed32 - Unsupported - 7, // Fixed10 - 5, // Float16 - 6, // Float32 - 0, // Unsigned8 - 3, // Unsigned16 - 0, // Unsigned32 - Unsupported - 2, // Output8 - }; - - fmt::print("{}\n", usse::disasm::disassemble("pck", { source }, &destination)); - instructions.push_back(usse::makeVPCK( - 0, // pred - 0, // skipinv - 0, // nosched - 0, // unknown - 0, // syncstart - destBankLayout.extension, // dest_bank_ext - 0, // end - srcBankLayout.extension, // src1_bank_ext - srcBankLayout.extension, // src2_bank_ext - 0, // repeat_count - typeTable[static_cast(source.type.type)], // src_fmt - typeTable[static_cast(destination.type.type)], // dest_fmt - destination.getSwizzleMask(), // dest_mask - destBankLayout.number, // dest_bank_sel - srcBankLayout.number, // src1_bank_sel - srcBankLayout.number, // src2_bank_sel - destBankLayout.getIndex(destination), // dest_n - destination.swizzle.size() > 3 ? static_cast(destination.swizzle[3]) & 0b11u : 0, // comp_sel_3 - false, // scale - static_cast(destination.swizzle[1]) & 0b11u, // comp_sel_1 - static_cast(destination.swizzle[2]) & 0b11u, // comp_sel_2 - srcBankLayout.getIndex(source.getHalf(0)), // src1_n - static_cast(destination.swizzle[0]) & 0b10u >> 1u, // comp0_sel_bit1 - srcBankLayout.getIndex(source.getHalf(1)), // src2_n - static_cast(destination.swizzle[0]) & 0b01u // comp_sel_0_bit0 - )); - } - - - void Block::createDot( - usse::RegisterReference first, - usse::RegisterReference second, - usse::RegisterReference destination) { - usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); - usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - - assert(second.bank == usse::RegisterBank::Internal); - - fmt::print("{}\n", usse::disasm::disassemble("dot", { first, second }, &destination)); - instructions.push_back(usse::makeVDP( - 0, // pred - 0, // skipinv - 0, // clip_plane_enable - first.type.components == 4, // opcode2 - destBankLayout.extension, // dest_use_bank_ext - 0, // end - firstBankLayout.extension, // src1_bank_ext - 3, /* Seems to be the normal value... */ // increment_mode - 0, // gpi0_abs - 0, // repeat_count - 0, // nosched - destination.getSwizzleMask(), // write_mask - 0, // src1_neg - 0, // src1_abs - 0, // clip_plane_n - destBankLayout.number, // dest_bank - firstBankLayout.number, // src1_bank - second.index, // gpi0_n - destBankLayout.getIndex(destination), // dest_n - second.getSwizzleIndex(), // gpi0_swiz - first.type.components > 3 ? static_cast(first.swizzle[3]) : 0, // src1_swiz_w - static_cast(first.swizzle[2]), // src1_swiz_z - static_cast(first.swizzle[1]), // src1_swiz_y - static_cast(first.swizzle[0]), // src1_swiz_x - firstBankLayout.getIndex(first) // src1_n - )); - } - - void Block::createAdd( - usse::RegisterReference first, - usse::RegisterReference second, - usse::RegisterReference destination) { - usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); - usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); - usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - - uint32_t shift = 0; - uint32_t firstSwizzle = 0; - for (usse::SwizzleChannel channel : first.swizzle) { - firstSwizzle |= static_cast(channel) << shift; - shift += 3; - } - - fmt::print("{}\n", usse::disasm::disassemble("add", { first, second }, &destination)); - instructions.push_back(usse::makeVNMAD32( - 0, // pred - 0, // skipinv - (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 - 0, // syncstart - destBankLayout.extension, // dest_bank_ext - (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 - secondBankLayout.extension, // src1_bank_ext - firstBankLayout.extension, // src2_bank_ext - second.getSwizzleIndex(), // src2_swiz - 0, // nosched - destination.getSwizzleMask(), // dest_mask - 0b00, // src1_mod - 0b0, // src2_mod - (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 - destBankLayout.number, // dest_bank_sel - secondBankLayout.number, // src1_bank_sel - firstBankLayout.number, // src2_bank_sel - destBankLayout.getIndex(destination), // dest_n - (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 - static_cast(usse::InstructionVNMADOp::Add), // op2 - secondBankLayout.getIndex(second), // src1_n - firstBankLayout.getIndex(first) // src2_n - )); - } - - void Block::createSub( - usse::RegisterReference first, - usse::RegisterReference second, - usse::RegisterReference destination) { - usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); - usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); - usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - - uint32_t shift = 0; - uint32_t firstSwizzle = 0; - for (usse::SwizzleChannel channel : first.swizzle) { - firstSwizzle |= static_cast(channel) << shift; - shift += 3; - } - - // First/Second sources are flipped so negative effect can be applied to src1. -x + y = y - x - fmt::print("{}\n", usse::disasm::disassemble("sub", { first, second }, &destination)); - instructions.push_back(usse::makeVNMAD32( - 0, // pred - 0, // skipinv - (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 - 0, // syncstart - destBankLayout.extension, // dest_bank_ext - (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 - secondBankLayout.extension, // src1_bank_ext - firstBankLayout.extension, // src2_bank_ext - second.getSwizzleIndex(), // src2_swiz - 0, // nosched - destination.getSwizzleMask(), // dest_mask - 0b01, // src1_mod - 0b0, // src2_mod - (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 - destBankLayout.number, // dest_bank_sel - secondBankLayout.number, // src1_bank_sel - firstBankLayout.number, // src2_bank_sel - destBankLayout.getIndex(destination), // dest_n - (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 - static_cast(usse::InstructionVNMADOp::Add), // op2 - secondBankLayout.getIndex(second), // src1_n - firstBankLayout.getIndex(first) // src2_n - )); - } - - void Block::createMul( - usse::RegisterReference first, - usse::RegisterReference second, - usse::RegisterReference destination) { - usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); - usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); - usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - - uint32_t shift = 0; - uint32_t firstSwizzle = 0; - for (usse::SwizzleChannel channel : first.swizzle) { - firstSwizzle |= static_cast(channel) << shift; - shift += 3; - } - - fmt::print("{}\n", usse::disasm::disassemble("mul", { first, second }, &destination)); - instructions.push_back(usse::makeVNMAD32( - 0, // pred - 0, // skipinv - (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 - 0, // syncstart - destBankLayout.extension, // dest_bank_ext - (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 - firstBankLayout.extension, // src1_bank_ext - secondBankLayout.extension, // src2_bank_ext - second.getSwizzleIndex(), // src2_swiz - 0, // nosched - destination.getSwizzleMask(), // dest_mask - 0b00, // src1_mod - 0b0, // src2_mod - (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 - destBankLayout.number, // dest_bank_sel - firstBankLayout.number, // src1_bank_sel - secondBankLayout.number, // src2_bank_sel - destBankLayout.getIndex(destination), // dest_n - (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 - static_cast(usse::InstructionVNMADOp::Multiply), // op2 - firstBankLayout.getIndex(first), // src1_n - secondBankLayout.getIndex(second) // src2_n - )); - } - - void Block::createExp( - usse::RegisterReference source, - usse::RegisterReference destination) { - usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); - usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - - usse::Param typeTable[] = { - 0, // Signed8 - Unsupported - 0, // Signed16 - Unsupported - 0, // Signed32 - Unsupported - 2, // Fixed10 - 1, // Float16 - 0, // Float32 - 0, // Unsigned8 - Unsupported - 0, // Unsigned16 - Unsupported - 0, // Unsigned32 - Unsupported - 0, // Output8 - Unsupported - }; - - fmt::print("{}\n", usse::disasm::disassemble("exp", { source }, &destination)); - instructions.push_back(usse::makeVCOMP( - 0, // pred - 0, // skipinv - typeTable[static_cast(destination.type.type)], // dest_type - 0, // syncstart - destBankLayout.extension, // dest_bank_ext - 0, // end - srcBankLayout.extension, // src1_bank_ext - 0, // repeat_count - 0, // nosched - static_cast(usse::InstructionVCOMPOp::Exponent), // op2 - typeTable[static_cast(source.type.type)], // src_type - 0b00, // src1_mod - static_cast(source.swizzle[0]), // src_comp - destBankLayout.number, // dest_bank - srcBankLayout.number, // src1_bank - destBankLayout.getIndex(destination), // dest_n - srcBankLayout.getIndex(source), // src1_n - destination.getSwizzleMask() // write_mask - )); - } - - void Block::createLog( - usse::RegisterReference source, - usse::RegisterReference destination) { - usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); - usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - - usse::Param typeTable[] = { - 0, // Signed8 - Unsupported - 0, // Signed16 - Unsupported - 0, // Signed32 - Unsupported - 2, // Fixed10 - 1, // Float16 - 0, // Float32 - 0, // Unsigned8 - Unsupported - 0, // Unsigned16 - Unsupported - 0, // Unsigned32 - Unsupported - 0, // Output8 - Unsupported - }; - - fmt::print("{}\n", usse::disasm::disassemble("log", { source }, &destination)); - instructions.push_back(usse::makeVCOMP( - 0, // pred - 0, // skipinv - typeTable[static_cast(destination.type.type)], // dest_type - 0, // syncstart - destBankLayout.extension, // dest_bank_ext - 0, // end - srcBankLayout.extension, // src1_bank_ext - 0, // repeat_count - 0, // nosched - static_cast(usse::InstructionVCOMPOp::Logarithm), // op2 - typeTable[static_cast(source.type.type)], // src_type - 0b00, // src1_mod - static_cast(source.swizzle[0]), // src_comp - destBankLayout.number, // dest_bank - srcBankLayout.number, // src1_bank - destBankLayout.getIndex(destination), // dest_n - srcBankLayout.getIndex(source), // src1_n - destination.getSwizzleMask() // write_mask - )); - } - - void Block::createReverseSquareRoot( - usse::RegisterReference source, - usse::RegisterReference destination) { - usse::BankLayout srcBankLayout = usse::BankLayout::srcLayout(source.bank); - usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - - usse::Param typeTable[] = { - 0, // Signed8 - Unsupported - 0, // Signed16 - Unsupported - 0, // Signed32 - Unsupported - 2, // Fixed10 - 1, // Float16 - 0, // Float32 - 0, // Unsigned8 - Unsupported - 0, // Unsigned16 - Unsupported - 0, // Unsigned32 - Unsupported - 0, // Output8 - Unsupported - }; - - fmt::print("{}\n", usse::disasm::disassemble("rsq", { source }, &destination)); - instructions.push_back(usse::makeVCOMP( - 0, // pred - 0, // skipinv - typeTable[static_cast(destination.type.type)], // dest_type - 0, // syncstart - destBankLayout.extension, // dest_bank_ext - 0, // end - srcBankLayout.extension, // src1_bank_ext - 0, // repeat_count - 0, // nosched - static_cast(usse::InstructionVCOMPOp::ReverseSquareRoot), // op2 - typeTable[static_cast(source.type.type)], // src_type - 0b00, // src1_mod - static_cast(source.swizzle[0]), // src_comp - destBankLayout.number, // dest_bank - srcBankLayout.number, // src1_bank - destBankLayout.getIndex(destination), // dest_n - srcBankLayout.getIndex(source), // src1_n - destination.getSwizzleMask() // write_mask - )); - } - - void Block::createMin( - usse::RegisterReference first, - usse::RegisterReference second, - usse::RegisterReference destination) { - usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); - usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); - usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - - uint32_t shift = 0; - uint32_t firstSwizzle = 0; - for (usse::SwizzleChannel channel : first.swizzle) { - firstSwizzle |= static_cast(channel) << shift; - shift += 3; - } - - fmt::print("{}\n", usse::disasm::disassemble("min", { first, second }, &destination)); - instructions.push_back(usse::makeVNMAD32( - 0, // pred - 0, // skipinv - (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 - 0, // syncstart - destBankLayout.extension, // dest_bank_ext - (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 - firstBankLayout.extension, // src1_bank_ext - secondBankLayout.extension, // src2_bank_ext - second.getSwizzleIndex(), // src2_swiz - 0, // nosched - destination.getSwizzleMask(), // dest_mask - 0b00, // src1_mod - 0b0, // src2_mod - (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 - destBankLayout.number, // dest_bank_sel - firstBankLayout.number, // src1_bank_sel - secondBankLayout.number, // src2_bank_sel - destBankLayout.getIndex(destination), // dest_n - (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 - static_cast(usse::InstructionVNMADOp::Min), // op2 - firstBankLayout.getIndex(first), // src1_n - secondBankLayout.getIndex(second) // src2_n - )); - } - - void Block::createMax( - usse::RegisterReference first, - usse::RegisterReference second, - usse::RegisterReference destination) { - usse::BankLayout firstBankLayout = usse::BankLayout::srcLayout(first.bank); - usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); - usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - - uint32_t shift = 0; - uint32_t firstSwizzle = 0; - for (usse::SwizzleChannel channel : first.swizzle) { - firstSwizzle |= static_cast(channel) << shift; - shift += 3; - } - - fmt::print("{}\n", usse::disasm::disassemble("max", { first, second }, &destination)); - instructions.push_back(usse::makeVNMAD32( - 0, // pred - 0, // skipinv - (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 - 0, // syncstart - destBankLayout.extension, // dest_bank_ext - (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 - firstBankLayout.extension, // src1_bank_ext - secondBankLayout.extension, // src2_bank_ext - second.getSwizzleIndex(), // src2_swiz - 0, // nosched - destination.getSwizzleMask(), // dest_mask - 0b00, // src1_mod - 0b0, // src2_mod - (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 - destBankLayout.number, // dest_bank_sel - firstBankLayout.number, // src1_bank_sel - secondBankLayout.number, // src2_bank_sel - destBankLayout.getIndex(destination), // dest_n - (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 - static_cast(usse::InstructionVNMADOp::Max), // op2 - firstBankLayout.getIndex(first), // src1_n - secondBankLayout.getIndex(second) // src2_n - )); - } - - Block::Block(gxp::Builder &parent) : parent(parent) { } - usse::RegisterBank Parameter::getBank() { switch (category) { case ParameterCategory::Attribute: @@ -643,8 +81,10 @@ namespace gxp { throw std::runtime_error("Missing allocation method for bank."); } - fmt::print("Allocating {} registers of type {} (vec{}[{}]), size {} at index {}.\n", - usse::getBankName(bank), usse::getTypeName(type.type), type.components, type.arraySize, size, index); + if (printAllocations) { + fmt::print("[alloc] {} registers of type {} (vec{}[{}]), size {} at index {}.\n", + usse::getBankName(bank), usse::getTypeName(type.type), type.components, type.arraySize, size, index); + } return usse::RegisterReference(type, bank, index); } @@ -691,7 +131,8 @@ namespace gxp { - static_cast(ProgramVarying::TexCoord0); uint32_t texCoordBits = 0; - texCoordBits |= (texCoord.components - 1) & 0b11u; + assert(texCoord.components > 1); + texCoordBits |= (0b111u << (texCoord.components - 1)) >> 3u; varyings.vertex_outputs2 |= texCoordBits << (texCoordIndex * 3u); } @@ -925,7 +366,10 @@ namespace gxp { return data; } - Builder::Builder() { + Builder::Builder() : Builder(false, false) { } + + Builder::Builder(bool printDisassembly, bool printAllocations) + : printDisassembly(printDisassembly), printAllocations(printAllocations) { header.magic = gxpMagic; header.majorVersion = 1; header.minorVersion = 4; diff --git a/src/interface/include/interface/interface.h b/src/interface/include/interface/interface.h index 1bea41e..940dfab 100644 --- a/src/interface/include/interface/interface.h +++ b/src/interface/include/interface/interface.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include @@ -11,6 +13,8 @@ class Interface { std::string inputFilePath; std::string outputFilePath; + CompilerConfig config; + bool parseParams(int count, char **args); public: diff --git a/src/interface/src/interface.cpp b/src/interface/src/interface.cpp index f3f866c..faea79c 100644 --- a/src/interface/src/interface.cpp +++ b/src/interface/src/interface.cpp @@ -14,6 +14,10 @@ bool Interface::parseParams(int count, char **args) { ERROR_RETURN_IF(!outputFilePath.empty(), "Multiple output files specified.") outputFilePath = args[a + 1]; a++; + } else if (strcmp(args[a], "-disasm") == 0) { + config.printDisassembly = true; + } else if (strcmp(args[a], "-alloc") == 0) { + config.printAllocations = true; } else { ERROR_RETURN_IF(!inputFilePath.empty(), "Multiple input files specified.") inputFilePath = args[a]; @@ -30,16 +34,17 @@ bool Interface::parseParams(int count, char **args) { int Interface::exec(int count, char **args) { if (!parseParams(count, args)) return 1; - auto spirvData = loadFileData(inputFilePath); #ifdef NDEBUG try { #endif - CompilerGXP compiler(spirvData); + auto spirvData = loadFileData(inputFilePath); + CompilerGXP compiler(spirvData, config); std::vector gxpData = compiler.compileData(); std::ofstream stream(outputFilePath); stream.write(reinterpret_cast(gxpData.data()), gxpData.size()); stream.close(); + fmt::printf("Done."); #ifdef NDEBUG } catch (std::runtime_error &e) { fmt::print("{}\n", e.what()); diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index d0a0a94..dfdb289 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -1,4 +1,5 @@ add_library(translator + include/translator/config.h include/translator/translator.h src/codes.cpp diff --git a/src/translator/include/translator/config.h b/src/translator/include/translator/config.h new file mode 100644 index 0000000..848aa14 --- /dev/null +++ b/src/translator/include/translator/config.h @@ -0,0 +1,7 @@ +#pragma once + +class CompilerConfig { +public: + bool printDisassembly = false; + bool printAllocations = false; +}; diff --git a/src/translator/include/translator/translator.h b/src/translator/include/translator/translator.h index 6774cff..d8a6fcb 100644 --- a/src/translator/include/translator/translator.h +++ b/src/translator/include/translator/translator.h @@ -1,12 +1,16 @@ #pragma once -#include +#include + #include +#include + #include namespace gxp { class Block; } class CompilerGXP; +class CompilerConfig; class TranslatorArguments; typedef void(CompilerGXP::*TranslatorImplementation)(const TranslatorArguments &arguments); @@ -42,6 +46,7 @@ class TranslatorReference { class CompilerGXP : public Compiler { gxp::Builder builder; + CompilerConfig config; std::vector codes; std::map> extensions; @@ -105,5 +110,5 @@ class CompilerGXP : public Compiler { std::vector compileData(); - explicit CompilerGXP(const std::vector &data); + explicit CompilerGXP(const std::vector &data, CompilerConfig config); }; diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp index 6ea8c4a..872be9c 100644 --- a/src/translator/src/codes.cpp +++ b/src/translator/src/codes.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include diff --git a/src/translator/src/translator.cpp b/src/translator/src/translator.cpp index 9b2f347..f55214b 100644 --- a/src/translator/src/translator.cpp +++ b/src/translator/src/translator.cpp @@ -347,7 +347,8 @@ std::vector CompilerGXP::compileData() { return builder.build(); } -CompilerGXP::CompilerGXP(const std::vector &data) : Compiler(data) { +CompilerGXP::CompilerGXP(const std::vector &data, CompilerConfig config) + : Compiler(data), config(config), builder(config.printDisassembly, config.printAllocations) { createTranslators(); createExtensions(); } diff --git a/src/util/include/util/util.h b/src/util/include/util/util.h index 6608452..daadc68 100644 --- a/src/util/include/util/util.h +++ b/src/util/include/util/util.h @@ -1,11 +1,16 @@ #pragma once +#include + #include #include template std::vector loadFileData(const std::string &path) { std::ifstream stream(path, std::ios::binary | std::ios::ate); + if (!stream.is_open()) + throw std::runtime_error(fmt::format("Cannot load file from '{}'.", path)); + size_t size = stream.tellg(); assert(size % sizeof(T) == 0); std::vector data(size / sizeof(T)); From e695957ae3c69075cc55470ad35edb8c7f9c3db5 Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Fri, 1 Nov 2019 15:11:50 -0400 Subject: [PATCH 13/19] Basic register space optimization support --- src/gxp/include/gxp/builder.h | 20 ++- src/gxp/include/gxp/usse.h | 4 +- src/gxp/src/block.cpp | 50 ++++--- src/gxp/src/builder.cpp | 44 ++++-- src/gxp/src/usse.cpp | 16 +- src/interface/include/interface/interface.h | 4 +- src/interface/src/interface.cpp | 10 +- src/translator/include/translator/config.h | 7 - .../include/translator/translator.h | 29 +++- src/translator/src/codes.cpp | 140 +++++++++++++----- src/translator/src/translator.cpp | 90 ++++++++++- src/util/include/util/util.h | 24 ++- 12 files changed, 325 insertions(+), 113 deletions(-) delete mode 100644 src/translator/include/translator/config.h diff --git a/src/gxp/include/gxp/builder.h b/src/gxp/include/gxp/builder.h index ec0d1d0..d6cee19 100644 --- a/src/gxp/include/gxp/builder.h +++ b/src/gxp/include/gxp/builder.h @@ -14,6 +14,9 @@ namespace gxp { Fragment = 1, }; + // Should be 64 (some instructions only have 6 bit index), at most 128. Set higher for non -Oreg-space shaders. + constexpr size_t maxTemporaryRegisters = 128; + class Parameter { public: std::string name; @@ -29,19 +32,26 @@ namespace gxp { usse::RegisterBank getBank(); }; + class BuilderConfig { + public: + bool printDisassembly = false; + bool printAllocations = false; + }; + class Builder { + BuilderConfig config; + ProgramHeader header; ProgramVaryings varyings; - bool printDisassembly = false; - bool printAllocations = false; - uint32_t paRegPointer = 0; uint32_t saRegPointer = 0; uint32_t oRegPointer = 0; - uint32_t tRegPointer = 0; uint32_t iRegPointer = 0; + std::array tRegSpace; + uint32_t tMaxRegs = 0; + std::vector> primaryBlocks; std::vector> secondaryBlocks; std::vector parameters; @@ -70,6 +80,6 @@ namespace gxp { std::vector build(); Builder(); - Builder(bool printDisassembly, bool printAllocations); + explicit Builder(BuilderConfig config); }; } diff --git a/src/gxp/include/gxp/usse.h b/src/gxp/include/gxp/usse.h index e2e565e..4f62400 100644 --- a/src/gxp/include/gxp/usse.h +++ b/src/gxp/include/gxp/usse.h @@ -130,8 +130,8 @@ namespace usse { uint8_t extension = 0; uint8_t number = 0; - bool isHalf(Type type); - uint32_t getIndex(RegisterReference reference, uint32_t bits = 7); + bool isHalfType(Type type); + uint32_t getIndex(RegisterReference reference, bool enableDoubleRegs = true, uint32_t bits = 6); static BankLayout destLayout(RegisterBank bank); static BankLayout src0Layout(RegisterBank bank); diff --git a/src/gxp/src/block.cpp b/src/gxp/src/block.cpp index 58105e8..d00c6ff 100644 --- a/src/gxp/src/block.cpp +++ b/src/gxp/src/block.cpp @@ -10,7 +10,7 @@ namespace gxp { void Block::printDisassembly(const std::string &name, const std::vector &sources, const usse::RegisterReference *destination) { - if (parent.printDisassembly) + if (parent.config.printDisassembly) fmt::print("[disasm] {}\n", usse::disasm::disassemble(name, sources, destination)); } @@ -97,7 +97,7 @@ namespace gxp { destBankLayout.number, // dest_bank_sel srcBankLayout.number, // src1_bank_sel srcBankLayout.number, // src2_bank_sel - destBankLayout.getIndex(destination), // dest_n + destBankLayout.getIndex(destination, false, 7), // dest_n destination.type.components > 3 ? static_cast(destination.swizzle[3]) & 0b11u : 0, // comp_sel_3 false, // scale static_cast(destination.swizzle[1]) & 0b11u, // comp_sel_1 @@ -157,11 +157,18 @@ namespace gxp { usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - uint32_t shift = 0; + uint32_t swizzleIndex = 0; uint32_t firstSwizzle = 0; - for (usse::SwizzleChannel channel : first.swizzle) { - firstSwizzle |= static_cast(channel) << shift; - shift += 3; + uint8_t destMask = destination.getSwizzleMask(); + for (uint32_t a = 0; a < 4; a++) { + if (destMask & (1u << a)) { + usse::SwizzleChannel channel = first.swizzle[swizzleIndex++]; + + // Swizzle does not have appropriate value for writing to destination. + assert(channel != usse::SwizzleChannel::DontCare); + + firstSwizzle |= static_cast(channel) << (a * 3); + } } printDisassembly("add", { first, second }, &destination); @@ -172,8 +179,8 @@ namespace gxp { 0, // syncstart destBankLayout.extension, // dest_bank_ext (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 - secondBankLayout.extension, // src1_bank_ext - firstBankLayout.extension, // src2_bank_ext + firstBankLayout.extension, // src1_bank_ext + secondBankLayout.extension, // src2_bank_ext second.getSwizzleIndex(), // src2_swiz 0, // nosched destination.getSwizzleMask(), // dest_mask @@ -181,13 +188,13 @@ namespace gxp { 0b0, // src2_mod (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 destBankLayout.number, // dest_bank_sel - secondBankLayout.number, // src1_bank_sel - firstBankLayout.number, // src2_bank_sel + firstBankLayout.number, // src1_bank_sel + secondBankLayout.number, // src2_bank_sel destBankLayout.getIndex(destination), // dest_n (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 static_cast(usse::InstructionVNMADOp::Add), // op2 - secondBankLayout.getIndex(second), // src1_n - firstBankLayout.getIndex(first) // src2_n + firstBankLayout.getIndex(first), // src1_n + secondBankLayout.getIndex(second) // src2_n )); } @@ -199,11 +206,18 @@ namespace gxp { usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - uint32_t shift = 0; + uint32_t swizzleIndex = 0; uint32_t firstSwizzle = 0; - for (usse::SwizzleChannel channel : first.swizzle) { - firstSwizzle |= static_cast(channel) << shift; - shift += 3; + uint8_t destMask = destination.getSwizzleMask(); + for (uint32_t a = 0; a < 4; a++) { + if (destMask & (1u << a)) { + usse::SwizzleChannel channel = first.swizzle[swizzleIndex++]; + + // Swizzle does not have appropriate value for writing to destination. + assert(channel != usse::SwizzleChannel::DontCare); + + firstSwizzle |= static_cast(channel) << (a * 3); + } } // First/Second sources are flipped so negative effect can be applied to src1. -x + y = y - x @@ -396,8 +410,8 @@ namespace gxp { static_cast(source.swizzle[0]), // src_comp destBankLayout.number, // dest_bank srcBankLayout.number, // src1_bank - destBankLayout.getIndex(destination), // dest_n - srcBankLayout.getIndex(source), // src1_n + destBankLayout.getIndex(destination, false, 7), // dest_n + srcBankLayout.getIndex(source, false, 7), // src1_n destination.getSwizzleMask() // write_mask )); } diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp index eac8d98..f3c5e4f 100644 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -2,9 +2,10 @@ #include #include -#include #include +#include + #include #define OFFSET_OF(parent, child) (reinterpret_cast(&parent.child) - reinterpret_cast(&parent)) @@ -69,19 +70,23 @@ namespace gxp { oRegPointer += size + needsAllocOffset(oRegPointer, size); break; case usse::RegisterBank::Temporary: - index = tRegPointer + needsAllocOffset(tRegPointer, size); - tRegPointer += size + needsAllocOffset(tRegPointer, size); + index = allocate(tRegSpace.data(), tRegSpace.size(), size, size == 4 ? 2 : 1); + if (index == ~0u) + throw std::runtime_error(fmt::format("Cannot allocate space for temporary register size {}.", size)); + memset(&tRegSpace[index], true, sizeof(bool) * size); + if (tMaxRegs < index + size) + tMaxRegs = index + size; break; case usse::RegisterBank::Internal: - index = iRegPointer + needsAllocOffset(iRegPointer, size); - iRegPointer += size + needsAllocOffset(iRegPointer, size); - assert(iRegPointer <= 8); + // 1 internal register = 1 vec4 + index = iRegPointer++; + assert(iRegPointer <= 4); break; default: throw std::runtime_error("Missing allocation method for bank."); } - if (printAllocations) { + if (config.printAllocations) { fmt::print("[alloc] {} registers of type {} (vec{}[{}]), size {} at index {}.\n", usse::getBankName(bank), usse::getTypeName(type.type), type.components, type.arraySize, size, index); } @@ -90,10 +95,19 @@ namespace gxp { } void Builder::freeRegister(usse::RegisterReference reg) { - if (reg.bank == usse::RegisterBank::Internal && reg.getEffectiveIndex() + reg.size == iRegPointer) { - iRegPointer -= reg.size; - } else { + switch (reg.bank) { + case usse::RegisterBank::Internal: + if (reg.index + 1 == iRegPointer) + iRegPointer--; + else + throw std::runtime_error("Latest internal register must be freed first."); + break; + case usse::RegisterBank::Temporary: + memset(&tRegSpace[reg.index], false, sizeof(bool) * reg.size); + break; + default: assert(false); + break; } } @@ -289,8 +303,8 @@ namespace gxp { // Code header.primaryRegCount = paRegPointer; header.secondaryRegCount = saRegPointer; - header.tempRegCount1 = tRegPointer; - header.tempRegCount2 = tRegPointer; // Difference between both reg counts? + header.tempRegCount1 = tMaxRegs; + header.tempRegCount2 = tMaxRegs; // Difference between both reg counts? { header.secondaryProgramOffset = data.size() - OFFSET_OF(header, secondaryProgramOffset); for (const std::unique_ptr &block : secondaryBlocks) { @@ -366,10 +380,8 @@ namespace gxp { return data; } - Builder::Builder() : Builder(false, false) { } - - Builder::Builder(bool printDisassembly, bool printAllocations) - : printDisassembly(printDisassembly), printAllocations(printAllocations) { + Builder::Builder() : Builder(BuilderConfig { }) { } + Builder::Builder(BuilderConfig config) : config(config) { header.magic = gxpMagic; header.majorVersion = 1; header.minorVersion = 4; diff --git a/src/gxp/src/usse.cpp b/src/gxp/src/usse.cpp index 9a11d16..9d3c42c 100644 --- a/src/gxp/src/usse.cpp +++ b/src/gxp/src/usse.cpp @@ -151,7 +151,7 @@ namespace usse { }, }; - bool BankLayout::isHalf(Type type) { + bool BankLayout::isHalfType(Type type) { return ( type == Type::Float32 || type == Type::Float16 || @@ -161,15 +161,14 @@ namespace usse { bank != usse::RegisterBank::Immediate; } - uint32_t BankLayout::getIndex(RegisterReference reference, uint32_t bits) { + uint32_t BankLayout::getIndex(RegisterReference reference, bool enableDoubleRegs, uint32_t bits) { uint32_t index = reference.index; - bool doubleReg = isHalf(reference.type.type); + bool doubleReg = enableDoubleRegs && isHalfType(reference.type.type); - if (doubleReg) - index /= 2; - // Top Bit, is this wrong? Looks more complex in V3K source. if (bank == RegisterBank::Internal) - index += 120 + (doubleReg ? 4 : 0); + index += (60 + (doubleReg ? 0 : 2)) * (bits == 7 ? 2 : 1); + else if (doubleReg) + index /= 2; return index; } @@ -322,7 +321,8 @@ namespace usse { RegisterReference::RegisterReference(DataType type, RegisterBank bank, uint32_t regIndex) : type(type), bank(bank), size(getTypeSize(type.type) * type.components * type.arraySize / 4) { bool swizzleUp = false; - if (regIndex % 2 == 1) { + + if (bank != usse::RegisterBank::Internal && regIndex % 2 == 1) { regIndex--; swizzleUp = true; } diff --git a/src/interface/include/interface/interface.h b/src/interface/include/interface/interface.h index 940dfab..0c1a374 100644 --- a/src/interface/include/interface/interface.h +++ b/src/interface/include/interface/interface.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include @@ -13,7 +13,7 @@ class Interface { std::string inputFilePath; std::string outputFilePath; - CompilerConfig config; + TranslatorConfig config; bool parseParams(int count, char **args); diff --git a/src/interface/src/interface.cpp b/src/interface/src/interface.cpp index faea79c..c35ad5d 100644 --- a/src/interface/src/interface.cpp +++ b/src/interface/src/interface.cpp @@ -14,10 +14,14 @@ bool Interface::parseParams(int count, char **args) { ERROR_RETURN_IF(!outputFilePath.empty(), "Multiple output files specified.") outputFilePath = args[a + 1]; a++; - } else if (strcmp(args[a], "-disasm") == 0) { + } else if (strcmp(args[a], "-S") == 0) { // Print Disassembly config.printDisassembly = true; - } else if (strcmp(args[a], "-alloc") == 0) { + } else if (strcmp(args[a], "-A") == 0) { // Print Register Allocations config.printAllocations = true; + } else if (strcmp(args[a], "-L") == 0) { // Print Optimization Debug Messages + config.logDebug = true; + } else if (strcmp(args[a], "-Oreg-space") == 0) { // Optimize Register Space + config.optimizeRegisterSpace = true; } else { ERROR_RETURN_IF(!inputFilePath.empty(), "Multiple input files specified.") inputFilePath = args[a]; @@ -44,7 +48,7 @@ int Interface::exec(int count, char **args) { std::ofstream stream(outputFilePath); stream.write(reinterpret_cast(gxpData.data()), gxpData.size()); stream.close(); - fmt::printf("Done."); + fmt::print("Done.\n"); #ifdef NDEBUG } catch (std::runtime_error &e) { fmt::print("{}\n", e.what()); diff --git a/src/translator/include/translator/config.h b/src/translator/include/translator/config.h deleted file mode 100644 index 848aa14..0000000 --- a/src/translator/include/translator/config.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -class CompilerConfig { -public: - bool printDisassembly = false; - bool printAllocations = false; -}; diff --git a/src/translator/include/translator/translator.h b/src/translator/include/translator/translator.h index d8a6fcb..33e034c 100644 --- a/src/translator/include/translator/translator.h +++ b/src/translator/include/translator/translator.h @@ -1,7 +1,5 @@ #pragma once -#include - #include #include @@ -10,7 +8,6 @@ namespace gxp { class Block; } class CompilerGXP; -class CompilerConfig; class TranslatorArguments; typedef void(CompilerGXP::*TranslatorImplementation)(const TranslatorArguments &arguments); @@ -44,15 +41,29 @@ class TranslatorReference { bool isStruct(); }; +class TranslatorConfig { +public: + bool printDisassembly = false; + bool printAllocations = false; + + bool optimizeRegisterSpace = false; + + bool logDebug = false; +}; + class CompilerGXP : public Compiler { gxp::Builder builder; - CompilerConfig config; + TranslatorConfig config; std::vector codes; std::map> extensions; std::map idVaryings; std::map idRegisters; + std::map idUseCounts; + std::map idUsesLeft; + std::map idAliases; + std::vector idsCleaned; std::map varyingReferences; static usse::Type translateType(SPIRType::BaseType baseType); @@ -65,11 +76,18 @@ class CompilerGXP : public Compiler { std::vector &availableTexCoords, uint32_t components); + void createIdUseCounts(const SPIRFunction &function); + TranslatorReference createVariable(usse::RegisterBank bank, const SPIRType &type); TranslatorReference createParameter(gxp::ParameterCategory category, const SPIRType &type, const std::string &name); + spv::Id resolveAlias(spv::Id id); + void useRegister(spv::Id id); usse::RegisterReference getRegister(spv::Id id); + void writeRegister(spv::Id id, TranslatorReference reg); + void aliasRegister(spv::Id empty, spv::Id value); + void cleanupRegisters(); spv::Id createBlock(const SPIRBlock &block); spv::Id createFunction(const SPIRFunction &function); @@ -87,7 +105,6 @@ class CompilerGXP : public Compiler { void opStore(const TranslatorArguments &arguments); void opMatrixTimesVector(const TranslatorArguments &arguments); void opVectorTimesScalar(const TranslatorArguments &arguments); - void opConvertUToF(const TranslatorArguments &arguments); void opCompositeExtract(const TranslatorArguments &arguments); void opCompositeConstruct(const TranslatorArguments &arguments); void opAccessChain(const TranslatorArguments &arguments); @@ -110,5 +127,5 @@ class CompilerGXP : public Compiler { std::vector compileData(); - explicit CompilerGXP(const std::vector &data, CompilerConfig config); + explicit CompilerGXP(const std::vector &data, TranslatorConfig config); }; diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp index 872be9c..b4ed9e5 100644 --- a/src/translator/src/codes.cpp +++ b/src/translator/src/codes.cpp @@ -6,14 +6,35 @@ #include +spv::Id CompilerGXP::resolveAlias(spv::Id id) { + while (idAliases.find(id) != idAliases.end()) + id = idAliases[id]; + + return id; +} + +void CompilerGXP::useRegister(spv::Id id) { + if (!config.optimizeRegisterSpace) + return; + + id = resolveAlias(id); + + if (idUsesLeft[id] == 0) + throw std::runtime_error(fmt::format("Id {} has no more uses.", id)); + + idUsesLeft[id]--; +} + usse::RegisterReference CompilerGXP::getRegister(spv::Id id) { auto varying = idVaryings.find(id); if (varying != idVaryings.end()) return getOrThrow(varyingReferences, getOrThrow(idVaryings, id)); - auto reg = idRegisters.find(id); - if (reg != idRegisters.end()) - return getOrThrow(idRegisters, id).reference; + auto reg = idRegisters.find(resolveAlias(id)); + if (reg != idRegisters.end()) { + useRegister(id); + return reg->second.reference; + } auto *constant = maybe_get(id); if (constant) { @@ -31,6 +52,43 @@ usse::RegisterReference CompilerGXP::getRegister(spv::Id id) { throw std::runtime_error(fmt::format("Cannot find register, varying or constant with id {}.", id)); } +void CompilerGXP::writeRegister(spv::Id id, TranslatorReference reg) { + if (idRegisters.find(id) == idRegisters.end()) { + idRegisters[id] = std::move(reg); + useRegister(id); + } else { + throw std::runtime_error(fmt::format("SSA Violation, id {} was assigned twice.", id)); + } +} + +void CompilerGXP::aliasRegister(spv::Id empty, spv::Id value) { + // Resolve aliases (so you can make aliases of aliases). + empty = resolveAlias(empty); + value = resolveAlias(value); + + // Link source -> destination. + idAliases[empty] = value; + idRegisters.erase(empty); + + if (config.optimizeRegisterSpace) { + // Keep destination alive until source is fully used. + idUsesLeft[value] += idUsesLeft[empty]; + } +} + +void CompilerGXP::cleanupRegisters() { + for (const auto &uses : idUsesLeft) { + if (uses.second == 0 && !contains(idsCleaned, uses.first) && idAliases.find(uses.first) == idAliases.end()) { + usse::RegisterReference reg = getOrThrow(idRegisters, uses.first).reference; + + if (reg.bank == usse::RegisterBank::Temporary) { + builder.freeRegister(reg); + idsCleaned.push_back(uses.first); + } + } + } +} + void CompilerGXP::unimplemented(const TranslatorArguments &arguments) { throw std::runtime_error(fmt::format("{} is not implemented.", arguments.code.name)); } @@ -44,9 +102,17 @@ void CompilerGXP::opLoad(const TranslatorArguments &arguments) { spv::Id result = arguments.instruction[1]; spv::Id pointer = arguments.instruction[2]; - // This is a redirect, but it should really load into temp. - // Maybe let the user chose if there want to assume redirect or copy until we can introduce analysis. - idRegisters[result] = { getRegister(pointer) }; + usse::RegisterReference reg = getRegister(pointer); + + if (config.optimizeRegisterSpace && idUsesLeft[pointer] == 0) { + // If result allocation is going to be freed right after, just alias. + aliasRegister(result, pointer); + } else { + // If not, allocate more space. + usse::RegisterReference destination = builder.allocateRegister(usse::RegisterBank::Temporary, reg.type); + arguments.block.createPack(reg, destination); + writeRegister(result, { destination }); + } } void CompilerGXP::opStore(const TranslatorArguments &arguments) { @@ -87,7 +153,7 @@ void CompilerGXP::opMatrixTimesVector(const TranslatorArguments &arguments) { } builder.freeRegister(internal); - idRegisters[result] = { temp }; + writeRegister(result, { temp }); } void CompilerGXP::opVectorTimesScalar(const TranslatorArguments &arguments) { @@ -106,21 +172,7 @@ void CompilerGXP::opVectorTimesScalar(const TranslatorArguments &arguments) { arguments.block.createMul(vector, scalar, destination); - idRegisters[result] = { destination }; -} - -void CompilerGXP::opConvertUToF(const TranslatorArguments &arguments) { - spv::Id type = arguments.instruction[0]; - spv::Id destination = arguments.instruction[1]; - spv::Id source = arguments.instruction[2]; - - usse::RegisterReference srcReg = getRegister(source); - usse::RegisterReference destReg = builder.allocateRegister( - usse::RegisterBank::Temporary, { usse::Type::Float32, 4, 1 }); - - arguments.block.createPack(srcReg, destReg); - - idRegisters[destination] = { destReg }; + writeRegister(result, { destination }); } void CompilerGXP::opCompositeExtract(const TranslatorArguments &arguments) { @@ -131,7 +183,7 @@ void CompilerGXP::opCompositeExtract(const TranslatorArguments &arguments) { usse::RegisterReference source = getRegister(sourceId); - idRegisters[result] = { source.getComponents(index, 1) }; + writeRegister(result, { source.getComponents(index, 1) }); } void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { @@ -170,7 +222,7 @@ void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { a += size; } - idRegisters[result] = { output }; + writeRegister(result, { output }); } void CompilerGXP::opAccessChain(const TranslatorArguments &arguments) { @@ -186,7 +238,7 @@ void CompilerGXP::opAccessChain(const TranslatorArguments &arguments) { SPIRType type = get_type_from_variable(base); if (type.basetype == SPIRType::Struct && is_member_builtin(type, builtInValue, &builtIn)) { - idRegisters[result] = { getOrThrow(varyingReferences, translateVarying(builtIn)) }; + writeRegister(result, { getOrThrow(varyingReferences, translateVarying(builtIn)) }); return; } else { ref = getOrThrow(idRegisters, base); @@ -209,7 +261,7 @@ void CompilerGXP::opAccessChain(const TranslatorArguments &arguments) { } } - idRegisters[result] = ref; + writeRegister(result, ref); } @@ -240,7 +292,7 @@ void CompilerGXP::opVectorShuffle(const TranslatorArguments &arguments) { arguments.block.createMov(source, temp.getComponents(a, 1)); } - idRegisters[result] = { temp }; + writeRegister(result, { temp }); } void CompilerGXP::opFNegate(const TranslatorArguments &arguments) { @@ -256,7 +308,7 @@ void CompilerGXP::opFNegate(const TranslatorArguments &arguments) { arguments.block.createSub(zero, source, destination); - idRegisters[result] = { destination }; + writeRegister(result, { destination }); } void CompilerGXP::opFAdd(const TranslatorArguments &arguments) { @@ -273,7 +325,7 @@ void CompilerGXP::opFAdd(const TranslatorArguments &arguments) { arguments.block.createAdd(first, second, destination); - idRegisters[result] = { destination }; + writeRegister(result, { destination }); } void CompilerGXP::opFSub(const TranslatorArguments &arguments) { @@ -290,7 +342,7 @@ void CompilerGXP::opFSub(const TranslatorArguments &arguments) { arguments.block.createSub(first, second, destination); - idRegisters[result] = { destination }; + writeRegister(result, { destination }); } void CompilerGXP::opFMul(const TranslatorArguments &arguments) { @@ -307,7 +359,7 @@ void CompilerGXP::opFMul(const TranslatorArguments &arguments) { arguments.block.createMul(first, second, destination); - idRegisters[result] = { destination }; + writeRegister(result, { destination }); } void CompilerGXP::opDot(const TranslatorArguments &arguments) { @@ -330,7 +382,7 @@ void CompilerGXP::opDot(const TranslatorArguments &arguments) { builder.freeRegister(internal); - idRegisters[result] = { destination }; + writeRegister(result, { destination }); } void CompilerGXP::opFunctionCall(const TranslatorArguments &arguments) { @@ -341,12 +393,20 @@ void CompilerGXP::opFunctionCall(const TranslatorArguments &arguments) { SPIRFunction function = get(functionId); for (size_t a = 0; a < function.arguments.size(); a++) { - idRegisters[function.arguments[a].id] = { getRegister(arguments.instruction[3 + a]) }; + // Should be alias here, not writeRegister. + spv::Id moveToId = function.arguments[a].id; + spv::Id moveFromId = arguments.instruction[3 + a]; + + // Function use counts have not been created yet (until createFunction). Allow this assignment. + if (config.optimizeRegisterSpace) + idUsesLeft[moveToId]++; + + aliasRegister(moveToId, moveFromId); } spv::Id returnValue = createFunction(function); if (returnValue != 0) - idRegisters[result] = { getRegister(returnValue) }; + aliasRegister(result, returnValue); } void CompilerGXP::opExtInst(const TranslatorArguments &arguments) { @@ -381,7 +441,7 @@ void CompilerGXP::extGLSLNormalize(const TranslatorArguments &arguments) { builder.freeRegister(magnitude); builder.freeRegister(temporary); - idRegisters[result] = { destination }; + writeRegister(result, { destination }); } void CompilerGXP::extGLSLFMin(const TranslatorArguments &arguments) { @@ -397,7 +457,7 @@ void CompilerGXP::extGLSLFMin(const TranslatorArguments &arguments) { arguments.block.createMin(first, second, destination); - idRegisters[result] = { destination }; + writeRegister(result, { destination }); } void CompilerGXP::extGLSLFMax(const TranslatorArguments &arguments) { @@ -413,7 +473,7 @@ void CompilerGXP::extGLSLFMax(const TranslatorArguments &arguments) { arguments.block.createMax(first, second, destination); - idRegisters[result] = { destination }; + writeRegister(result, { destination }); } void CompilerGXP::extGLSLReflect(const TranslatorArguments &arguments) { @@ -444,7 +504,7 @@ void CompilerGXP::extGLSLReflect(const TranslatorArguments &arguments) { builder.freeRegister(magnitude); builder.freeRegister(internal); - idRegisters[result] = { destination }; + writeRegister(result, { destination }); } void CompilerGXP::extGLSLPow(const TranslatorArguments &arguments) { @@ -465,7 +525,7 @@ void CompilerGXP::extGLSLPow(const TranslatorArguments &arguments) { arguments.block.createMul(destination, second, destination); arguments.block.createExp(destination, destination); - idRegisters[result] = { destination }; + writeRegister(result, { destination }); } TranslatorArguments::TranslatorArguments( @@ -594,7 +654,7 @@ void CompilerGXP::createTranslators() { { spv::Op::OpConvertFToU, "OpConvertFToU", &CompilerGXP::unimplemented }, { spv::Op::OpConvertFToS, "OpConvertFToS", &CompilerGXP::unimplemented }, { spv::Op::OpConvertSToF, "OpConvertSToF", &CompilerGXP::unimplemented }, - { spv::Op::OpConvertUToF, "OpConvertUToF", &CompilerGXP::opConvertUToF }, + { spv::Op::OpConvertUToF, "OpConvertUToF", &CompilerGXP::unimplemented }, { spv::Op::OpUConvert, "OpUConvert", &CompilerGXP::unimplemented }, { spv::Op::OpSConvert, "OpSConvert", &CompilerGXP::unimplemented }, { spv::Op::OpFConvert, "OpFConvert", &CompilerGXP::unimplemented }, diff --git a/src/translator/src/translator.cpp b/src/translator/src/translator.cpp index f55214b..cbb4787 100644 --- a/src/translator/src/translator.cpp +++ b/src/translator/src/translator.cpp @@ -1,11 +1,21 @@ #include +#include #include #include #define VERTEX_POSITION_OUTPUT_ONLY +gxp::BuilderConfig translateConfig(TranslatorConfig config) { + gxp::BuilderConfig result; + + result.printAllocations = config.printAllocations; + result.printDisassembly = config.printDisassembly; + + return result; +} + // Position, PointSize, Clip0 and Clip1 are removed so they are not allocated over. const std::vector allVaryings = { gxp::ProgramVarying::Color0, @@ -117,6 +127,55 @@ gxp::ProgramVarying CompilerGXP::allocateVarying( return selected; } +void CompilerGXP::createIdUseCounts(const SPIRFunction &function) { + for (uint32_t blockId : function.blocks) { + const SPIRBlock &block = get(blockId); + + for (Instruction instruction : block.ops) { + int32_t idCount = 0; + std::vector excludeIndices; + + switch (instruction.op) { + case spv::OpCompositeExtract: + idCount = 3; + break; + case spv::OpVectorShuffle: + idCount = 4; + break; + case spv::OpExtInst: + excludeIndices = { 3 }; + idCount = instruction.count - 1; + break; + default: + idCount = instruction.count - 1; + break; + } + + // Negative id count allows for manual analysis of ids in switch statement. + if (idCount > 0) { + for (uint32_t a = 0; a < idCount; a++) { + // Should resolve alias since some function parameters are aliased before their uses are counted. + spv::Id current = resolveAlias(ir.spirv[instruction.offset + a]); + + if (contains(excludeIndices, a)) + continue; + if (ir.ids[current].get_type() == Types::TypeType) + continue; + + idUseCounts[current]++; + idUsesLeft[current]++; + } + } + } + + + if (block.terminator == SPIRBlock::Return && block.return_value != 0) { + idUseCounts[block.return_value]++; + idUsesLeft[block.return_value]++; + } + } +} + TranslatorReference CompilerGXP::createVariable(usse::RegisterBank bank, const SPIRType &type) { if (type.basetype == SPIRType::Struct) { TranslatorReference reference; @@ -163,10 +222,16 @@ spv::Id CompilerGXP::createBlock(const SPIRBlock &block) { *gxpBlock, code, &ir.spirv[instruction.offset], - instruction.count - ); + instruction.count); (this->*code.implementation)(arguments); + + // TODO: This is supposed to start a new block, but CompilerGXP::opFunctionCall should probably be responsible for that. + if (instruction.op == spv::OpFunctionCall) + gxpBlock = builder.createPrimaryBlock(); + + if (config.optimizeRegisterSpace) + cleanupRegisters(); } if (block.terminator == SPIRBlock::Return) @@ -176,6 +241,9 @@ spv::Id CompilerGXP::createBlock(const SPIRBlock &block) { } spv::Id CompilerGXP::createFunction(const SPIRFunction &function) { + if (config.optimizeRegisterSpace) + createIdUseCounts(function); + for (uint32_t local : function.local_variables) { SPIRType type = get_type_from_variable(local); @@ -341,14 +409,26 @@ std::vector CompilerGXP::compileData() { // try { createFunction(entryFunction); // } catch (std::runtime_error &e) { -// fmt::print("{}\n", e.what()); +// fmt::print("Error: {}\n", e.what()); // } + if (config.logDebug) { + if (config.optimizeRegisterSpace) { + fmt::print("All Detected Usages:\n"); + for (const auto &pair : idUseCounts) { + if (idUsesLeft[pair.first] == 0) + fmt::print("[id: {}] uses: {}\n", pair.first, pair.second); + else + fmt::print("[id: {}] uses: {}, remaining: {}\n", pair.first, pair.second, idUsesLeft[pair.first]); + } + } + } + return builder.build(); } -CompilerGXP::CompilerGXP(const std::vector &data, CompilerConfig config) - : Compiler(data), config(config), builder(config.printDisassembly, config.printAllocations) { +CompilerGXP::CompilerGXP(const std::vector &data, TranslatorConfig config) + : Compiler(data), config(config), builder(translateConfig(config)) { createTranslators(); createExtensions(); } diff --git a/src/util/include/util/util.h b/src/util/include/util/util.h index daadc68..6d526b8 100644 --- a/src/util/include/util/util.h +++ b/src/util/include/util/util.h @@ -20,7 +20,7 @@ std::vector loadFileData(const std::string &path) { return data; } -template +template typename T::mapped_type &getOrThrow(T &map, typename T::key_type key) { auto reference = map.find(key); if (reference == map.end()) @@ -28,3 +28,25 @@ typename T::mapped_type &getOrThrow(T &map, typename T::key_type key) { return reference->second; } + +inline size_t allocate(const bool *map, size_t mapSize, size_t size, size_t alignment = 1) { + for (size_t a = 0; a < mapSize - size + 1; a++) { + if (a % alignment == 0 && !map[a]) { + size_t start = a; + + do { + a++; + } while (a < mapSize && !map[a] && a - start < size); + + if (a - start == size) + return start; + } + } + + return ~0u; +} + +template +bool contains(std::vector array, T content) { + return std::find(array.begin(), array.end(), content) != array.end(); +} From 20e4c9c38cff073884c550b50c3b226693ccd16e Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Fri, 1 Nov 2019 15:12:31 -0400 Subject: [PATCH 14/19] Instruction parameter checking support --- generate-usse.py | 16 +- src/gxp/include/gxp/instructions.h | 832 ++++----- src/gxp/src/instructions.cpp | 2606 ++++++++++++++++++---------- 3 files changed, 2124 insertions(+), 1330 deletions(-) diff --git a/generate-usse.py b/generate-usse.py index 401b86c..96d4ab1 100644 --- a/generate-usse.py +++ b/generate-usse.py @@ -8,7 +8,7 @@ class Protection(Enum): DEBUG = 2 -protection = Protection.SAFE +protection = Protection.DEBUG bit_types = """ typedef uint64_t Instruction; @@ -39,7 +39,7 @@ class Protection(Enum): elif type(member_info) is int: index -= member_info if not first: - parameters += ',\n\t\t\t' + parameters += ',\n\t\t' parameters += 'Param/*' + str(member_info) + '*/ ' + member_name first = False if protection == Protection.DEBUG: @@ -60,7 +60,7 @@ class Protection(Enum): function += '\t\tinst |= 0b' + member_info['match'] + 'ull << ' + str(index) + 'u;\n' else: if not first: - parameters += ',\n\t\t\t' + parameters += ',\n\t\t' parameters += 'Param/*' + str(member_info['size']) + '*/ ' + member_name first = False if protection == Protection.DEBUG: @@ -74,14 +74,14 @@ class Protection(Enum): + ('1' * member_info['size']) + 'ull) << ' + str(index) + 'u;\n' if parameters: - declaration += '\n\t\t\t' + parameters + declaration += '\n\t\t' + parameters declaration += ')' header += '\t' + declaration + ';\n\n' source += '\t' + declaration + ' {\n' + function + '\t\treturn inst;\n\t}\n\n' - header += '}' - source += '}' - with open('instructions.h', 'w+') as header_out: + header += '}\n' + source += '}\n' + with open('src/gxp/include/gxp/instructions.h', 'w+') as header_out: header_out.write(header) - with open('instructions.cpp', 'w+') as source_out: + with open('src/gxp/src/instructions.cpp', 'w+') as source_out: source_out.write(source) diff --git a/src/gxp/include/gxp/instructions.h b/src/gxp/include/gxp/instructions.h index e4f5836..75f852a 100644 --- a/src/gxp/include/gxp/instructions.h +++ b/src/gxp/include/gxp/instructions.h @@ -6,440 +6,440 @@ namespace usse { typedef uint64_t Instruction; typedef uint64_t Param; - Instruction makeVMOV( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ test_bit_2, - Param/*1*/ src0_comp_sel, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ end_or_src0_bank_ext, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*2*/ move_type, - Param/*2*/ repeat_count, - Param/*1*/ nosched, - Param/*3*/ move_data_type, - Param/*1*/ test_bit_1, - Param/*4*/ src0_swiz, - Param/*1*/ src0_bank_sel, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*4*/ dest_mask, - Param/*6*/ dest_n, - Param/*6*/ src0_n, - Param/*6*/ src1_n, - Param/*6*/ src2_n); + Instruction makeVMOV( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ test_bit_2, + Param/*1*/ src0_comp_sel, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end_or_src0_bank_ext, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*2*/ move_type, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*3*/ move_data_type, + Param/*1*/ test_bit_1, + Param/*4*/ src0_swiz, + Param/*1*/ src0_bank_sel, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*4*/ dest_mask, + Param/*6*/ dest_n, + Param/*6*/ src0_n, + Param/*6*/ src1_n, + Param/*6*/ src2_n); - Instruction makeVMAD( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ gpi1_swiz_ext, - Param/*1*/ opcode2, - Param/*1*/ dest_use_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*2*/ increment_mode, - Param/*1*/ gpi0_abs, - Param/*2*/ repeat_count, - Param/*1*/ nosched, - Param/*4*/ write_mask, - Param/*1*/ src1_neg, - Param/*1*/ src1_abs, - Param/*1*/ gpi1_neg, - Param/*1*/ gpi1_abs, - Param/*1*/ gpi0_swiz_ext, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ gpi0_n, - Param/*6*/ dest_n, - Param/*4*/ gpi0_swiz, - Param/*4*/ gpi1_swiz, - Param/*2*/ gpi1_n, - Param/*1*/ gpi0_neg, - Param/*1*/ src1_swiz_ext, - Param/*4*/ src1_swiz, - Param/*6*/ src1_n); + Instruction makeVMAD( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ gpi1_swiz_ext, + Param/*1*/ opcode2, + Param/*1*/ dest_use_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*2*/ increment_mode, + Param/*1*/ gpi0_abs, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*4*/ write_mask, + Param/*1*/ src1_neg, + Param/*1*/ src1_abs, + Param/*1*/ gpi1_neg, + Param/*1*/ gpi1_abs, + Param/*1*/ gpi0_swiz_ext, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ gpi0_n, + Param/*6*/ dest_n, + Param/*4*/ gpi0_swiz, + Param/*4*/ gpi1_swiz, + Param/*2*/ gpi1_n, + Param/*1*/ gpi0_neg, + Param/*1*/ src1_swiz_ext, + Param/*4*/ src1_swiz, + Param/*6*/ src1_n); - Instruction makeVMAD2( - Param/*1*/ dat_fmt, - Param/*2*/ pred, - Param/*1*/ skipinv, - Param/*1*/ src0_swiz_bits2, - Param/*1*/ syncstart, - Param/*1*/ src0_abs, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*3*/ src2_swiz, - Param/*1*/ src1_swiz_bit2, - Param/*1*/ nosched, - Param/*4*/ dest_mask, - Param/*2*/ src1_mod, - Param/*2*/ src2_mod, - Param/*1*/ src0_bank, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*6*/ dest_n, - Param/*2*/ src1_swiz_bits01, - Param/*2*/ src0_swiz_bits01, - Param/*6*/ src0_n, - Param/*6*/ src1_n, - Param/*6*/ src2_n); + Instruction makeVMAD2( + Param/*1*/ dat_fmt, + Param/*2*/ pred, + Param/*1*/ skipinv, + Param/*1*/ src0_swiz_bits2, + Param/*1*/ syncstart, + Param/*1*/ src0_abs, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*3*/ src2_swiz, + Param/*1*/ src1_swiz_bit2, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*2*/ src2_mod, + Param/*1*/ src0_bank, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ dest_n, + Param/*2*/ src1_swiz_bits01, + Param/*2*/ src0_swiz_bits01, + Param/*6*/ src0_n, + Param/*6*/ src1_n, + Param/*6*/ src2_n); - Instruction makeVDP( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ clip_plane_enable, - Param/*1*/ opcode2, - Param/*1*/ dest_use_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*2*/ increment_mode, - Param/*1*/ gpi0_abs, - Param/*2*/ repeat_count, - Param/*1*/ nosched, - Param/*4*/ write_mask, - Param/*1*/ src1_neg, - Param/*1*/ src1_abs, - Param/*3*/ clip_plane_n, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ gpi0_n, - Param/*6*/ dest_n, - Param/*4*/ gpi0_swiz, - Param/*3*/ src1_swiz_w, - Param/*3*/ src1_swiz_z, - Param/*3*/ src1_swiz_y, - Param/*3*/ src1_swiz_x, - Param/*6*/ src1_n); + Instruction makeVDP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ clip_plane_enable, + Param/*1*/ opcode2, + Param/*1*/ dest_use_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*2*/ increment_mode, + Param/*1*/ gpi0_abs, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*4*/ write_mask, + Param/*1*/ src1_neg, + Param/*1*/ src1_abs, + Param/*3*/ clip_plane_n, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ gpi0_n, + Param/*6*/ dest_n, + Param/*4*/ gpi0_swiz, + Param/*3*/ src1_swiz_w, + Param/*3*/ src1_swiz_z, + Param/*3*/ src1_swiz_y, + Param/*3*/ src1_swiz_x, + Param/*6*/ src1_n); - Instruction makeVNMAD32( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*2*/ src1_swiz_10_11, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ src1_swiz_9, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*4*/ src2_swiz, - Param/*1*/ nosched, - Param/*4*/ dest_mask, - Param/*2*/ src1_mod, - Param/*1*/ src2_mod, - Param/*2*/ src1_swiz_7_8, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*6*/ dest_n, - Param/*7*/ src1_swiz_0_6, - Param/*3*/ op2, - Param/*6*/ src1_n, - Param/*6*/ src2_n); + Instruction makeVNMAD32( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ src1_swiz_10_11, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ src1_swiz_9, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ src2_swiz, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*1*/ src2_mod, + Param/*2*/ src1_swiz_7_8, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*6*/ dest_n, + Param/*7*/ src1_swiz_0_6, + Param/*3*/ op2, + Param/*6*/ src1_n, + Param/*6*/ src2_n); - Instruction makeVNMAD16( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*2*/ src1_swiz_10_11, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ src1_swiz_9, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*4*/ src2_swiz, - Param/*1*/ nosched, - Param/*4*/ dest_mask, - Param/*2*/ src1_mod, - Param/*1*/ src2_mod, - Param/*2*/ src1_swiz_7_8, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*6*/ dest_n, - Param/*7*/ src1_swiz_0_6, - Param/*3*/ op2, - Param/*6*/ src1_n, - Param/*6*/ src2_n); + Instruction makeVNMAD16( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ src1_swiz_10_11, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ src1_swiz_9, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ src2_swiz, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*1*/ src2_mod, + Param/*2*/ src1_swiz_7_8, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*6*/ dest_n, + Param/*7*/ src1_swiz_0_6, + Param/*3*/ op2, + Param/*6*/ src1_n, + Param/*6*/ src2_n); - Instruction makeVLDST( - Param/*2*/ op1, - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ moe_expand, - Param/*1*/ sync_start, - Param/*1*/ cache_ext, - Param/*1*/ src0_bank_ext, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*4*/ mask_count, - Param/*2*/ addr_mode, - Param/*2*/ mode, - Param/*1*/ dest_bank_primattr, - Param/*1*/ range_enable, - Param/*2*/ data_type, - Param/*1*/ increment_or_decrement, - Param/*1*/ src0_bank, - Param/*1*/ cache_by_pass12, - Param/*1*/ drc_sel, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*7*/ src0_n, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeVLDST( + Param/*2*/ op1, + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ moe_expand, + Param/*1*/ sync_start, + Param/*1*/ cache_ext, + Param/*1*/ src0_bank_ext, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ mask_count, + Param/*2*/ addr_mode, + Param/*2*/ mode, + Param/*1*/ dest_bank_primattr, + Param/*1*/ range_enable, + Param/*2*/ data_type, + Param/*1*/ increment_or_decrement, + Param/*1*/ src0_bank, + Param/*1*/ cache_by_pass12, + Param/*1*/ drc_sel, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeVTST( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ onceonly, - Param/*1*/ syncstart, - Param/*1*/ dest_ext, - Param/*1*/ src1_neg, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*1*/ prec, - Param/*1*/ src2_vscomp, - Param/*2*/ rpt_count, - Param/*2*/ sign_test, - Param/*2*/ zero_test, - Param/*1*/ test_crcomb_and, - Param/*3*/ chan_cc, - Param/*2*/ pdst_n, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*1*/ test_wben, - Param/*2*/ alu_sel, - Param/*4*/ alu_op, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeVTST( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ onceonly, + Param/*1*/ syncstart, + Param/*1*/ dest_ext, + Param/*1*/ src1_neg, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*1*/ prec, + Param/*1*/ src2_vscomp, + Param/*2*/ rpt_count, + Param/*2*/ sign_test, + Param/*2*/ zero_test, + Param/*1*/ test_crcomb_and, + Param/*3*/ chan_cc, + Param/*2*/ pdst_n, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ test_wben, + Param/*2*/ alu_sel, + Param/*4*/ alu_op, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeVTSTMSK( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ onceonly, - Param/*1*/ syncstart, - Param/*1*/ dest_ext, - Param/*1*/ test_flag_2, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*1*/ prec, - Param/*1*/ src2_vscomp, - Param/*2*/ rpt_count, - Param/*2*/ sign_test, - Param/*2*/ zero_test, - Param/*1*/ test_crcomb_and, - Param/*2*/ tst_mask_type, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*1*/ test_wben, - Param/*2*/ alu_sel, - Param/*4*/ alu_op, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeVTSTMSK( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ onceonly, + Param/*1*/ syncstart, + Param/*1*/ dest_ext, + Param/*1*/ test_flag_2, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*1*/ prec, + Param/*1*/ src2_vscomp, + Param/*2*/ rpt_count, + Param/*2*/ sign_test, + Param/*2*/ zero_test, + Param/*1*/ test_crcomb_and, + Param/*2*/ tst_mask_type, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ test_wben, + Param/*2*/ alu_sel, + Param/*4*/ alu_op, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeVPCK( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ unknown, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*3*/ repeat_count, - Param/*3*/ src_fmt, - Param/*3*/ dest_fmt, - Param/*4*/ dest_mask, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*7*/ dest_n, - Param/*2*/ comp_sel_3, - Param/*1*/ scale, - Param/*2*/ comp_sel_1, - Param/*2*/ comp_sel_2, - Param/*6*/ src1_n, - Param/*1*/ comp0_sel_bit1, - Param/*6*/ src2_n, - Param/*1*/ comp_sel_0_bit0); + Instruction makeVPCK( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ unknown, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*3*/ repeat_count, + Param/*3*/ src_fmt, + Param/*3*/ dest_fmt, + Param/*4*/ dest_mask, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*7*/ dest_n, + Param/*2*/ comp_sel_3, + Param/*1*/ scale, + Param/*2*/ comp_sel_1, + Param/*2*/ comp_sel_2, + Param/*6*/ src1_n, + Param/*1*/ comp0_sel_bit1, + Param/*6*/ src2_n, + Param/*1*/ comp_sel_0_bit0); - Instruction makeVBW( - Param/*3*/ op1, - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ repeat_count, - Param/*1*/ sync_start, - Param/*1*/ dest_ext, - Param/*1*/ end, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*4*/ mask_count, - Param/*1*/ src2_invert, - Param/*5*/ src2_rot, - Param/*2*/ src2_exth, - Param/*1*/ op2, - Param/*1*/ bitwise_partial, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*7*/ src2_sel, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeVBW( + Param/*3*/ op1, + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ repeat_count, + Param/*1*/ sync_start, + Param/*1*/ dest_ext, + Param/*1*/ end, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*4*/ mask_count, + Param/*1*/ src2_invert, + Param/*5*/ src2_rot, + Param/*2*/ src2_exth, + Param/*1*/ op2, + Param/*1*/ bitwise_partial, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src2_sel, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeSMP( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ syncstart, - Param/*1*/ minpack, - Param/*1*/ src0_ext, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*2*/ fconv_type, - Param/*2*/ mask_count, - Param/*2*/ dim, - Param/*2*/ lod_mode, - Param/*1*/ dest_use_pa, - Param/*2*/ sb_mode, - Param/*2*/ src0_type, - Param/*1*/ src0_bank, - Param/*2*/ drc_sel, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*7*/ src0_n, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeSMP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ syncstart, + Param/*1*/ minpack, + Param/*1*/ src0_ext, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*2*/ fconv_type, + Param/*2*/ mask_count, + Param/*2*/ dim, + Param/*2*/ lod_mode, + Param/*1*/ dest_use_pa, + Param/*2*/ sb_mode, + Param/*2*/ src0_type, + Param/*1*/ src0_bank, + Param/*2*/ drc_sel, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeVCOMP( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*2*/ dest_type, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*4*/ repeat_count, - Param/*1*/ nosched, - Param/*2*/ op2, - Param/*2*/ src_type, - Param/*2*/ src1_mod, - Param/*2*/ src_comp, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*7*/ dest_n, - Param/*7*/ src1_n, - Param/*4*/ write_mask); + Instruction makeVCOMP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ dest_type, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*4*/ repeat_count, + Param/*1*/ nosched, + Param/*2*/ op2, + Param/*2*/ src_type, + Param/*2*/ src1_mod, + Param/*2*/ src_comp, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*7*/ dest_n, + Param/*7*/ src1_n, + Param/*4*/ write_mask); - Instruction makeSOP2( - Param/*2*/ pred, - Param/*1*/ cmod1, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*2*/ asel1, - Param/*1*/ dest_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*1*/ cmod2, - Param/*3*/ count, - Param/*1*/ amod1, - Param/*2*/ asel2, - Param/*3*/ csel1, - Param/*3*/ csel2, - Param/*1*/ amod2, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*1*/ src1_mod, - Param/*2*/ cop, - Param/*2*/ aop, - Param/*1*/ asrc1_mod, - Param/*1*/ dest_mod, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeSOP2( + Param/*2*/ pred, + Param/*1*/ cmod1, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*2*/ asel1, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*1*/ cmod2, + Param/*3*/ count, + Param/*1*/ amod1, + Param/*2*/ asel2, + Param/*3*/ csel1, + Param/*3*/ csel2, + Param/*1*/ amod2, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ src1_mod, + Param/*2*/ cop, + Param/*2*/ aop, + Param/*1*/ asrc1_mod, + Param/*1*/ dest_mod, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeBR( - Param/*3*/ pred, - Param/*1*/ syncend, - Param/*1*/ exception, - Param/*1*/ pwait, - Param/*1*/ sync_ext, - Param/*1*/ nosched, - Param/*1*/ br_monitor, - Param/*1*/ save_link, - Param/*1*/ br_type, - Param/*1*/ any_inst, - Param/*1*/ all_inst, - Param/*20*/ br_off); + Instruction makeBR( + Param/*3*/ pred, + Param/*1*/ syncend, + Param/*1*/ exception, + Param/*1*/ pwait, + Param/*1*/ sync_ext, + Param/*1*/ nosched, + Param/*1*/ br_monitor, + Param/*1*/ save_link, + Param/*1*/ br_type, + Param/*1*/ any_inst, + Param/*1*/ all_inst, + Param/*20*/ br_off); - Instruction makePHAS( - Param/*1*/ sprvv, - Param/*1*/ end, - Param/*1*/ imm, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*1*/ mode, - Param/*1*/ rate_hi, - Param/*1*/ rate_lo_or_nosched, - Param/*3*/ wait_cond, - Param/*8*/ temp_count, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*6*/ exe_addr_high, - Param/*7*/ src1_n_or_exe_addr_mid, - Param/*7*/ src2_n_or_exe_addr_low); + Instruction makePHAS( + Param/*1*/ sprvv, + Param/*1*/ end, + Param/*1*/ imm, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*1*/ mode, + Param/*1*/ rate_hi, + Param/*1*/ rate_lo_or_nosched, + Param/*3*/ wait_cond, + Param/*8*/ temp_count, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ exe_addr_high, + Param/*7*/ src1_n_or_exe_addr_mid, + Param/*7*/ src2_n_or_exe_addr_low); - Instruction makeNOP(); + Instruction makeNOP(); - Instruction makeSMLSI( - Param/*1*/ nosched, - Param/*4*/ temp_limit, - Param/*4*/ pa_limit, - Param/*4*/ sa_limit, - Param/*1*/ dest_inc_mode, - Param/*1*/ src0_inc_mode, - Param/*1*/ src1_inc_mode, - Param/*1*/ src2_inc_mode, - Param/*8*/ dest_inc, - Param/*8*/ src0_inc, - Param/*8*/ src1_inc, - Param/*8*/ src2_inc); + Instruction makeSMLSI( + Param/*1*/ nosched, + Param/*4*/ temp_limit, + Param/*4*/ pa_limit, + Param/*4*/ sa_limit, + Param/*1*/ dest_inc_mode, + Param/*1*/ src0_inc_mode, + Param/*1*/ src1_inc_mode, + Param/*1*/ src2_inc_mode, + Param/*8*/ dest_inc, + Param/*8*/ src0_inc, + Param/*8*/ src1_inc, + Param/*8*/ src2_inc); - Instruction makeEMIT( - Param/*2*/ sideband_high, - Param/*1*/ src0_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*2*/ target, - Param/*1*/ task_start_or_mte_hi, - Param/*1*/ task_end_or_mte_lo, - Param/*1*/ nosched, - Param/*6*/ sideband_mid, - Param/*1*/ src0_bank, - Param/*2*/ incp, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*6*/ sideband_low, - Param/*1*/ freep, - Param/*7*/ src0_n, - Param/*7*/ src1_n, - Param/*7*/ src2_n); + Instruction makeEMIT( + Param/*2*/ sideband_high, + Param/*1*/ src0_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*2*/ target, + Param/*1*/ task_start_or_mte_hi, + Param/*1*/ task_end_or_mte_lo, + Param/*1*/ nosched, + Param/*6*/ sideband_mid, + Param/*1*/ src0_bank, + Param/*2*/ incp, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ sideband_low, + Param/*1*/ freep, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n); - Instruction makeSPEC( - Param/*1*/ special, - Param/*2*/ category); + Instruction makeSPEC( + Param/*1*/ special, + Param/*2*/ category); } diff --git a/src/gxp/src/instructions.cpp b/src/gxp/src/instructions.cpp index e9563fc..ab1a254 100644 --- a/src/gxp/src/instructions.cpp +++ b/src/gxp/src/instructions.cpp @@ -1,930 +1,1724 @@ #include +#include + namespace usse { - Instruction makeVMOV( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ test_bit_2, - Param/*1*/ src0_comp_sel, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ end_or_src0_bank_ext, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*2*/ move_type, - Param/*2*/ repeat_count, - Param/*1*/ nosched, - Param/*3*/ move_data_type, - Param/*1*/ test_bit_1, - Param/*4*/ src0_swiz, - Param/*1*/ src0_bank_sel, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*4*/ dest_mask, - Param/*6*/ dest_n, - Param/*6*/ src0_n, - Param/*6*/ src1_n, - Param/*6*/ src2_n) { - Instruction inst = 0; - inst |= 0b00111ull << 59u; - inst |= (pred & 0b111ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (test_bit_2 & 0b1ull) << 54u; - inst |= (src0_comp_sel & 0b1ull) << 53u; - inst |= (syncstart & 0b1ull) << 52u; - inst |= (dest_bank_ext & 0b1ull) << 51u; - inst |= (end_or_src0_bank_ext & 0b1ull) << 50u; - inst |= (src1_bank_ext & 0b1ull) << 49u; - inst |= (src2_bank_ext & 0b1ull) << 48u; - inst |= (move_type & 0b11ull) << 46u; - inst |= (repeat_count & 0b11ull) << 44u; - inst |= (nosched & 0b1ull) << 43u; - inst |= (move_data_type & 0b111ull) << 40u; - inst |= (test_bit_1 & 0b1ull) << 39u; - inst |= (src0_swiz & 0b1111ull) << 35u; - inst |= (src0_bank_sel & 0b1ull) << 34u; - inst |= (dest_bank_sel & 0b11ull) << 32u; - inst |= (src1_bank_sel & 0b11ull) << 30u; - inst |= (src2_bank_sel & 0b11ull) << 28u; - inst |= (dest_mask & 0b1111ull) << 24u; - inst |= (dest_n & 0b111111ull) << 18u; - inst |= (src0_n & 0b111111ull) << 12u; - inst |= (src1_n & 0b111111ull) << 6u; - inst |= (src2_n & 0b111111ull) << 0u; - return inst; - } + Instruction makeVMOV( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ test_bit_2, + Param/*1*/ src0_comp_sel, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end_or_src0_bank_ext, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*2*/ move_type, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*3*/ move_data_type, + Param/*1*/ test_bit_1, + Param/*4*/ src0_swiz, + Param/*1*/ src0_bank_sel, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*4*/ dest_mask, + Param/*6*/ dest_n, + Param/*6*/ src0_n, + Param/*6*/ src1_n, + Param/*6*/ src2_n) { + Instruction inst = 0; + inst |= 0b00111ull << 59u; + if ((pred & ~0b111ull) != 0) + throw std::runtime_error("Instruction field pred for VMOV out of bounds."); + inst |= (pred & 0b111ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for VMOV out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((test_bit_2 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field test_bit_2 for VMOV out of bounds."); + inst |= (test_bit_2 & 0b1ull) << 54u; + if ((src0_comp_sel & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src0_comp_sel for VMOV out of bounds."); + inst |= (src0_comp_sel & 0b1ull) << 53u; + if ((syncstart & ~0b1ull) != 0) + throw std::runtime_error("Instruction field syncstart for VMOV out of bounds."); + inst |= (syncstart & 0b1ull) << 52u; + if ((dest_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_bank_ext for VMOV out of bounds."); + inst |= (dest_bank_ext & 0b1ull) << 51u; + if ((end_or_src0_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field end_or_src0_bank_ext for VMOV out of bounds."); + inst |= (end_or_src0_bank_ext & 0b1ull) << 50u; + if ((src1_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_bank_ext for VMOV out of bounds."); + inst |= (src1_bank_ext & 0b1ull) << 49u; + if ((src2_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_bank_ext for VMOV out of bounds."); + inst |= (src2_bank_ext & 0b1ull) << 48u; + if ((move_type & ~0b11ull) != 0) + throw std::runtime_error("Instruction field move_type for VMOV out of bounds."); + inst |= (move_type & 0b11ull) << 46u; + if ((repeat_count & ~0b11ull) != 0) + throw std::runtime_error("Instruction field repeat_count for VMOV out of bounds."); + inst |= (repeat_count & 0b11ull) << 44u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for VMOV out of bounds."); + inst |= (nosched & 0b1ull) << 43u; + if ((move_data_type & ~0b111ull) != 0) + throw std::runtime_error("Instruction field move_data_type for VMOV out of bounds."); + inst |= (move_data_type & 0b111ull) << 40u; + if ((test_bit_1 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field test_bit_1 for VMOV out of bounds."); + inst |= (test_bit_1 & 0b1ull) << 39u; + if ((src0_swiz & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field src0_swiz for VMOV out of bounds."); + inst |= (src0_swiz & 0b1111ull) << 35u; + if ((src0_bank_sel & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src0_bank_sel for VMOV out of bounds."); + inst |= (src0_bank_sel & 0b1ull) << 34u; + if ((dest_bank_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dest_bank_sel for VMOV out of bounds."); + inst |= (dest_bank_sel & 0b11ull) << 32u; + if ((src1_bank_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank_sel for VMOV out of bounds."); + inst |= (src1_bank_sel & 0b11ull) << 30u; + if ((src2_bank_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_bank_sel for VMOV out of bounds."); + inst |= (src2_bank_sel & 0b11ull) << 28u; + if ((dest_mask & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field dest_mask for VMOV out of bounds."); + inst |= (dest_mask & 0b1111ull) << 24u; + if ((dest_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for VMOV out of bounds."); + inst |= (dest_n & 0b111111ull) << 18u; + if ((src0_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src0_n for VMOV out of bounds."); + inst |= (src0_n & 0b111111ull) << 12u; + if ((src1_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for VMOV out of bounds."); + inst |= (src1_n & 0b111111ull) << 6u; + if ((src2_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src2_n for VMOV out of bounds."); + inst |= (src2_n & 0b111111ull) << 0u; + return inst; + } - Instruction makeVMAD( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ gpi1_swiz_ext, - Param/*1*/ opcode2, - Param/*1*/ dest_use_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*2*/ increment_mode, - Param/*1*/ gpi0_abs, - Param/*2*/ repeat_count, - Param/*1*/ nosched, - Param/*4*/ write_mask, - Param/*1*/ src1_neg, - Param/*1*/ src1_abs, - Param/*1*/ gpi1_neg, - Param/*1*/ gpi1_abs, - Param/*1*/ gpi0_swiz_ext, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ gpi0_n, - Param/*6*/ dest_n, - Param/*4*/ gpi0_swiz, - Param/*4*/ gpi1_swiz, - Param/*2*/ gpi1_n, - Param/*1*/ gpi0_neg, - Param/*1*/ src1_swiz_ext, - Param/*4*/ src1_swiz, - Param/*6*/ src1_n) { - Instruction inst = 0; - inst |= 0b00011ull << 59u; - inst |= (pred & 0b111ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (gpi1_swiz_ext & 0b1ull) << 54u; - inst |= 0b1ull << 53u; - inst |= (opcode2 & 0b1ull) << 52u; - inst |= (dest_use_bank_ext & 0b1ull) << 51u; - inst |= (end & 0b1ull) << 50u; - inst |= (src1_bank_ext & 0b1ull) << 49u; - inst |= (increment_mode & 0b11ull) << 47u; - inst |= (gpi0_abs & 0b1ull) << 46u; - inst |= (repeat_count & 0b11ull) << 44u; - inst |= (nosched & 0b1ull) << 43u; - inst |= (write_mask & 0b1111ull) << 39u; - inst |= (src1_neg & 0b1ull) << 38u; - inst |= (src1_abs & 0b1ull) << 37u; - inst |= (gpi1_neg & 0b1ull) << 36u; - inst |= (gpi1_abs & 0b1ull) << 35u; - inst |= (gpi0_swiz_ext & 0b1ull) << 34u; - inst |= (dest_bank & 0b11ull) << 32u; - inst |= (src1_bank & 0b11ull) << 30u; - inst |= (gpi0_n & 0b11ull) << 28u; - inst |= (dest_n & 0b111111ull) << 22u; - inst |= (gpi0_swiz & 0b1111ull) << 18u; - inst |= (gpi1_swiz & 0b1111ull) << 14u; - inst |= (gpi1_n & 0b11ull) << 12u; - inst |= (gpi0_neg & 0b1ull) << 11u; - inst |= (src1_swiz_ext & 0b1ull) << 10u; - inst |= (src1_swiz & 0b1111ull) << 6u; - inst |= (src1_n & 0b111111ull) << 0u; - return inst; - } + Instruction makeVMAD( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ gpi1_swiz_ext, + Param/*1*/ opcode2, + Param/*1*/ dest_use_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*2*/ increment_mode, + Param/*1*/ gpi0_abs, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*4*/ write_mask, + Param/*1*/ src1_neg, + Param/*1*/ src1_abs, + Param/*1*/ gpi1_neg, + Param/*1*/ gpi1_abs, + Param/*1*/ gpi0_swiz_ext, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ gpi0_n, + Param/*6*/ dest_n, + Param/*4*/ gpi0_swiz, + Param/*4*/ gpi1_swiz, + Param/*2*/ gpi1_n, + Param/*1*/ gpi0_neg, + Param/*1*/ src1_swiz_ext, + Param/*4*/ src1_swiz, + Param/*6*/ src1_n) { + Instruction inst = 0; + inst |= 0b00011ull << 59u; + if ((pred & ~0b111ull) != 0) + throw std::runtime_error("Instruction field pred for VMAD out of bounds."); + inst |= (pred & 0b111ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for VMAD out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((gpi1_swiz_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field gpi1_swiz_ext for VMAD out of bounds."); + inst |= (gpi1_swiz_ext & 0b1ull) << 54u; + inst |= 0b1ull << 53u; + if ((opcode2 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field opcode2 for VMAD out of bounds."); + inst |= (opcode2 & 0b1ull) << 52u; + if ((dest_use_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_use_bank_ext for VMAD out of bounds."); + inst |= (dest_use_bank_ext & 0b1ull) << 51u; + if ((end & ~0b1ull) != 0) + throw std::runtime_error("Instruction field end for VMAD out of bounds."); + inst |= (end & 0b1ull) << 50u; + if ((src1_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_bank_ext for VMAD out of bounds."); + inst |= (src1_bank_ext & 0b1ull) << 49u; + if ((increment_mode & ~0b11ull) != 0) + throw std::runtime_error("Instruction field increment_mode for VMAD out of bounds."); + inst |= (increment_mode & 0b11ull) << 47u; + if ((gpi0_abs & ~0b1ull) != 0) + throw std::runtime_error("Instruction field gpi0_abs for VMAD out of bounds."); + inst |= (gpi0_abs & 0b1ull) << 46u; + if ((repeat_count & ~0b11ull) != 0) + throw std::runtime_error("Instruction field repeat_count for VMAD out of bounds."); + inst |= (repeat_count & 0b11ull) << 44u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for VMAD out of bounds."); + inst |= (nosched & 0b1ull) << 43u; + if ((write_mask & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field write_mask for VMAD out of bounds."); + inst |= (write_mask & 0b1111ull) << 39u; + if ((src1_neg & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_neg for VMAD out of bounds."); + inst |= (src1_neg & 0b1ull) << 38u; + if ((src1_abs & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_abs for VMAD out of bounds."); + inst |= (src1_abs & 0b1ull) << 37u; + if ((gpi1_neg & ~0b1ull) != 0) + throw std::runtime_error("Instruction field gpi1_neg for VMAD out of bounds."); + inst |= (gpi1_neg & 0b1ull) << 36u; + if ((gpi1_abs & ~0b1ull) != 0) + throw std::runtime_error("Instruction field gpi1_abs for VMAD out of bounds."); + inst |= (gpi1_abs & 0b1ull) << 35u; + if ((gpi0_swiz_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field gpi0_swiz_ext for VMAD out of bounds."); + inst |= (gpi0_swiz_ext & 0b1ull) << 34u; + if ((dest_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dest_bank for VMAD out of bounds."); + inst |= (dest_bank & 0b11ull) << 32u; + if ((src1_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank for VMAD out of bounds."); + inst |= (src1_bank & 0b11ull) << 30u; + if ((gpi0_n & ~0b11ull) != 0) + throw std::runtime_error("Instruction field gpi0_n for VMAD out of bounds."); + inst |= (gpi0_n & 0b11ull) << 28u; + if ((dest_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for VMAD out of bounds."); + inst |= (dest_n & 0b111111ull) << 22u; + if ((gpi0_swiz & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field gpi0_swiz for VMAD out of bounds."); + inst |= (gpi0_swiz & 0b1111ull) << 18u; + if ((gpi1_swiz & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field gpi1_swiz for VMAD out of bounds."); + inst |= (gpi1_swiz & 0b1111ull) << 14u; + if ((gpi1_n & ~0b11ull) != 0) + throw std::runtime_error("Instruction field gpi1_n for VMAD out of bounds."); + inst |= (gpi1_n & 0b11ull) << 12u; + if ((gpi0_neg & ~0b1ull) != 0) + throw std::runtime_error("Instruction field gpi0_neg for VMAD out of bounds."); + inst |= (gpi0_neg & 0b1ull) << 11u; + if ((src1_swiz_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_ext for VMAD out of bounds."); + inst |= (src1_swiz_ext & 0b1ull) << 10u; + if ((src1_swiz & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field src1_swiz for VMAD out of bounds."); + inst |= (src1_swiz & 0b1111ull) << 6u; + if ((src1_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for VMAD out of bounds."); + inst |= (src1_n & 0b111111ull) << 0u; + return inst; + } - Instruction makeVMAD2( - Param/*1*/ dat_fmt, - Param/*2*/ pred, - Param/*1*/ skipinv, - Param/*1*/ src0_swiz_bits2, - Param/*1*/ syncstart, - Param/*1*/ src0_abs, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*3*/ src2_swiz, - Param/*1*/ src1_swiz_bit2, - Param/*1*/ nosched, - Param/*4*/ dest_mask, - Param/*2*/ src1_mod, - Param/*2*/ src2_mod, - Param/*1*/ src0_bank, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*6*/ dest_n, - Param/*2*/ src1_swiz_bits01, - Param/*2*/ src0_swiz_bits01, - Param/*6*/ src0_n, - Param/*6*/ src1_n, - Param/*6*/ src2_n) { - Instruction inst = 0; - inst |= 0b00000ull << 59u; - inst |= (dat_fmt & 0b1ull) << 58u; - inst |= (pred & 0b11ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (src0_swiz_bits2 & 0b1ull) << 53u; - inst |= (syncstart & 0b1ull) << 52u; - inst |= (src0_abs & 0b1ull) << 50u; - inst |= (src1_bank_ext & 0b1ull) << 49u; - inst |= (src2_bank_ext & 0b1ull) << 48u; - inst |= (src2_swiz & 0b111ull) << 45u; - inst |= (src1_swiz_bit2 & 0b1ull) << 44u; - inst |= (nosched & 0b1ull) << 43u; - inst |= (dest_mask & 0b1111ull) << 39u; - inst |= (src1_mod & 0b11ull) << 37u; - inst |= (src2_mod & 0b11ull) << 35u; - inst |= (src0_bank & 0b1ull) << 34u; - inst |= (dest_bank & 0b11ull) << 32u; - inst |= (src1_bank & 0b11ull) << 30u; - inst |= (src2_bank & 0b11ull) << 28u; - inst |= (dest_n & 0b111111ull) << 22u; - inst |= (src1_swiz_bits01 & 0b11ull) << 20u; - inst |= (src0_swiz_bits01 & 0b11ull) << 18u; - inst |= (src0_n & 0b111111ull) << 12u; - inst |= (src1_n & 0b111111ull) << 6u; - inst |= (src2_n & 0b111111ull) << 0u; - return inst; - } + Instruction makeVMAD2( + Param/*1*/ dat_fmt, + Param/*2*/ pred, + Param/*1*/ skipinv, + Param/*1*/ src0_swiz_bits2, + Param/*1*/ syncstart, + Param/*1*/ src0_abs, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*3*/ src2_swiz, + Param/*1*/ src1_swiz_bit2, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*2*/ src2_mod, + Param/*1*/ src0_bank, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ dest_n, + Param/*2*/ src1_swiz_bits01, + Param/*2*/ src0_swiz_bits01, + Param/*6*/ src0_n, + Param/*6*/ src1_n, + Param/*6*/ src2_n) { + Instruction inst = 0; + inst |= 0b00000ull << 59u; + if ((dat_fmt & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dat_fmt for VMAD2 out of bounds."); + inst |= (dat_fmt & 0b1ull) << 58u; + if ((pred & ~0b11ull) != 0) + throw std::runtime_error("Instruction field pred for VMAD2 out of bounds."); + inst |= (pred & 0b11ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for VMAD2 out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((src0_swiz_bits2 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src0_swiz_bits2 for VMAD2 out of bounds."); + inst |= (src0_swiz_bits2 & 0b1ull) << 53u; + if ((syncstart & ~0b1ull) != 0) + throw std::runtime_error("Instruction field syncstart for VMAD2 out of bounds."); + inst |= (syncstart & 0b1ull) << 52u; + if ((src0_abs & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src0_abs for VMAD2 out of bounds."); + inst |= (src0_abs & 0b1ull) << 50u; + if ((src1_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_bank_ext for VMAD2 out of bounds."); + inst |= (src1_bank_ext & 0b1ull) << 49u; + if ((src2_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_bank_ext for VMAD2 out of bounds."); + inst |= (src2_bank_ext & 0b1ull) << 48u; + if ((src2_swiz & ~0b111ull) != 0) + throw std::runtime_error("Instruction field src2_swiz for VMAD2 out of bounds."); + inst |= (src2_swiz & 0b111ull) << 45u; + if ((src1_swiz_bit2 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_bit2 for VMAD2 out of bounds."); + inst |= (src1_swiz_bit2 & 0b1ull) << 44u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for VMAD2 out of bounds."); + inst |= (nosched & 0b1ull) << 43u; + if ((dest_mask & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field dest_mask for VMAD2 out of bounds."); + inst |= (dest_mask & 0b1111ull) << 39u; + if ((src1_mod & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_mod for VMAD2 out of bounds."); + inst |= (src1_mod & 0b11ull) << 37u; + if ((src2_mod & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_mod for VMAD2 out of bounds."); + inst |= (src2_mod & 0b11ull) << 35u; + if ((src0_bank & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src0_bank for VMAD2 out of bounds."); + inst |= (src0_bank & 0b1ull) << 34u; + if ((dest_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dest_bank for VMAD2 out of bounds."); + inst |= (dest_bank & 0b11ull) << 32u; + if ((src1_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank for VMAD2 out of bounds."); + inst |= (src1_bank & 0b11ull) << 30u; + if ((src2_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_bank for VMAD2 out of bounds."); + inst |= (src2_bank & 0b11ull) << 28u; + if ((dest_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for VMAD2 out of bounds."); + inst |= (dest_n & 0b111111ull) << 22u; + if ((src1_swiz_bits01 & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_bits01 for VMAD2 out of bounds."); + inst |= (src1_swiz_bits01 & 0b11ull) << 20u; + if ((src0_swiz_bits01 & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src0_swiz_bits01 for VMAD2 out of bounds."); + inst |= (src0_swiz_bits01 & 0b11ull) << 18u; + if ((src0_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src0_n for VMAD2 out of bounds."); + inst |= (src0_n & 0b111111ull) << 12u; + if ((src1_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for VMAD2 out of bounds."); + inst |= (src1_n & 0b111111ull) << 6u; + if ((src2_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src2_n for VMAD2 out of bounds."); + inst |= (src2_n & 0b111111ull) << 0u; + return inst; + } - Instruction makeVDP( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ clip_plane_enable, - Param/*1*/ opcode2, - Param/*1*/ dest_use_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*2*/ increment_mode, - Param/*1*/ gpi0_abs, - Param/*2*/ repeat_count, - Param/*1*/ nosched, - Param/*4*/ write_mask, - Param/*1*/ src1_neg, - Param/*1*/ src1_abs, - Param/*3*/ clip_plane_n, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ gpi0_n, - Param/*6*/ dest_n, - Param/*4*/ gpi0_swiz, - Param/*3*/ src1_swiz_w, - Param/*3*/ src1_swiz_z, - Param/*3*/ src1_swiz_y, - Param/*3*/ src1_swiz_x, - Param/*6*/ src1_n) { - Instruction inst = 0; - inst |= 0b00011ull << 59u; - inst |= (pred & 0b111ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (clip_plane_enable & 0b1ull) << 54u; - inst |= 0b0ull << 53u; - inst |= (opcode2 & 0b1ull) << 52u; - inst |= (dest_use_bank_ext & 0b1ull) << 51u; - inst |= (end & 0b1ull) << 50u; - inst |= (src1_bank_ext & 0b1ull) << 49u; - inst |= (increment_mode & 0b11ull) << 47u; - inst |= (gpi0_abs & 0b1ull) << 46u; - inst |= (repeat_count & 0b11ull) << 44u; - inst |= (nosched & 0b1ull) << 43u; - inst |= (write_mask & 0b1111ull) << 39u; - inst |= (src1_neg & 0b1ull) << 38u; - inst |= (src1_abs & 0b1ull) << 37u; - inst |= (clip_plane_n & 0b111ull) << 34u; - inst |= (dest_bank & 0b11ull) << 32u; - inst |= (src1_bank & 0b11ull) << 30u; - inst |= (gpi0_n & 0b11ull) << 28u; - inst |= (dest_n & 0b111111ull) << 22u; - inst |= (gpi0_swiz & 0b1111ull) << 18u; - inst |= (src1_swiz_w & 0b111ull) << 15u; - inst |= (src1_swiz_z & 0b111ull) << 12u; - inst |= (src1_swiz_y & 0b111ull) << 9u; - inst |= (src1_swiz_x & 0b111ull) << 6u; - inst |= (src1_n & 0b111111ull) << 0u; - return inst; - } + Instruction makeVDP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ clip_plane_enable, + Param/*1*/ opcode2, + Param/*1*/ dest_use_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*2*/ increment_mode, + Param/*1*/ gpi0_abs, + Param/*2*/ repeat_count, + Param/*1*/ nosched, + Param/*4*/ write_mask, + Param/*1*/ src1_neg, + Param/*1*/ src1_abs, + Param/*3*/ clip_plane_n, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ gpi0_n, + Param/*6*/ dest_n, + Param/*4*/ gpi0_swiz, + Param/*3*/ src1_swiz_w, + Param/*3*/ src1_swiz_z, + Param/*3*/ src1_swiz_y, + Param/*3*/ src1_swiz_x, + Param/*6*/ src1_n) { + Instruction inst = 0; + inst |= 0b00011ull << 59u; + if ((pred & ~0b111ull) != 0) + throw std::runtime_error("Instruction field pred for VDP out of bounds."); + inst |= (pred & 0b111ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for VDP out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((clip_plane_enable & ~0b1ull) != 0) + throw std::runtime_error("Instruction field clip_plane_enable for VDP out of bounds."); + inst |= (clip_plane_enable & 0b1ull) << 54u; + inst |= 0b0ull << 53u; + if ((opcode2 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field opcode2 for VDP out of bounds."); + inst |= (opcode2 & 0b1ull) << 52u; + if ((dest_use_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_use_bank_ext for VDP out of bounds."); + inst |= (dest_use_bank_ext & 0b1ull) << 51u; + if ((end & ~0b1ull) != 0) + throw std::runtime_error("Instruction field end for VDP out of bounds."); + inst |= (end & 0b1ull) << 50u; + if ((src1_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_bank_ext for VDP out of bounds."); + inst |= (src1_bank_ext & 0b1ull) << 49u; + if ((increment_mode & ~0b11ull) != 0) + throw std::runtime_error("Instruction field increment_mode for VDP out of bounds."); + inst |= (increment_mode & 0b11ull) << 47u; + if ((gpi0_abs & ~0b1ull) != 0) + throw std::runtime_error("Instruction field gpi0_abs for VDP out of bounds."); + inst |= (gpi0_abs & 0b1ull) << 46u; + if ((repeat_count & ~0b11ull) != 0) + throw std::runtime_error("Instruction field repeat_count for VDP out of bounds."); + inst |= (repeat_count & 0b11ull) << 44u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for VDP out of bounds."); + inst |= (nosched & 0b1ull) << 43u; + if ((write_mask & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field write_mask for VDP out of bounds."); + inst |= (write_mask & 0b1111ull) << 39u; + if ((src1_neg & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_neg for VDP out of bounds."); + inst |= (src1_neg & 0b1ull) << 38u; + if ((src1_abs & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_abs for VDP out of bounds."); + inst |= (src1_abs & 0b1ull) << 37u; + if ((clip_plane_n & ~0b111ull) != 0) + throw std::runtime_error("Instruction field clip_plane_n for VDP out of bounds."); + inst |= (clip_plane_n & 0b111ull) << 34u; + if ((dest_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dest_bank for VDP out of bounds."); + inst |= (dest_bank & 0b11ull) << 32u; + if ((src1_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank for VDP out of bounds."); + inst |= (src1_bank & 0b11ull) << 30u; + if ((gpi0_n & ~0b11ull) != 0) + throw std::runtime_error("Instruction field gpi0_n for VDP out of bounds."); + inst |= (gpi0_n & 0b11ull) << 28u; + if ((dest_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for VDP out of bounds."); + inst |= (dest_n & 0b111111ull) << 22u; + if ((gpi0_swiz & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field gpi0_swiz for VDP out of bounds."); + inst |= (gpi0_swiz & 0b1111ull) << 18u; + if ((src1_swiz_w & ~0b111ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_w for VDP out of bounds."); + inst |= (src1_swiz_w & 0b111ull) << 15u; + if ((src1_swiz_z & ~0b111ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_z for VDP out of bounds."); + inst |= (src1_swiz_z & 0b111ull) << 12u; + if ((src1_swiz_y & ~0b111ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_y for VDP out of bounds."); + inst |= (src1_swiz_y & 0b111ull) << 9u; + if ((src1_swiz_x & ~0b111ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_x for VDP out of bounds."); + inst |= (src1_swiz_x & 0b111ull) << 6u; + if ((src1_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for VDP out of bounds."); + inst |= (src1_n & 0b111111ull) << 0u; + return inst; + } - Instruction makeVNMAD32( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*2*/ src1_swiz_10_11, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ src1_swiz_9, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*4*/ src2_swiz, - Param/*1*/ nosched, - Param/*4*/ dest_mask, - Param/*2*/ src1_mod, - Param/*1*/ src2_mod, - Param/*2*/ src1_swiz_7_8, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*6*/ dest_n, - Param/*7*/ src1_swiz_0_6, - Param/*3*/ op2, - Param/*6*/ src1_n, - Param/*6*/ src2_n) { - Instruction inst = 0; - inst |= 0b00001ull << 59u; - inst |= (pred & 0b111ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (src1_swiz_10_11 & 0b11ull) << 53u; - inst |= (syncstart & 0b1ull) << 52u; - inst |= (dest_bank_ext & 0b1ull) << 51u; - inst |= (src1_swiz_9 & 0b1ull) << 50u; - inst |= (src1_bank_ext & 0b1ull) << 49u; - inst |= (src2_bank_ext & 0b1ull) << 48u; - inst |= (src2_swiz & 0b1111ull) << 44u; - inst |= (nosched & 0b1ull) << 43u; - inst |= (dest_mask & 0b1111ull) << 39u; - inst |= (src1_mod & 0b11ull) << 37u; - inst |= (src2_mod & 0b1ull) << 36u; - inst |= (src1_swiz_7_8 & 0b11ull) << 34u; - inst |= (dest_bank_sel & 0b11ull) << 32u; - inst |= (src1_bank_sel & 0b11ull) << 30u; - inst |= (src2_bank_sel & 0b11ull) << 28u; - inst |= (dest_n & 0b111111ull) << 22u; - inst |= (src1_swiz_0_6 & 0b1111111ull) << 15u; - inst |= (op2 & 0b111ull) << 12u; - inst |= (src1_n & 0b111111ull) << 6u; - inst |= (src2_n & 0b111111ull) << 0u; - return inst; - } + Instruction makeVNMAD32( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ src1_swiz_10_11, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ src1_swiz_9, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ src2_swiz, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*1*/ src2_mod, + Param/*2*/ src1_swiz_7_8, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*6*/ dest_n, + Param/*7*/ src1_swiz_0_6, + Param/*3*/ op2, + Param/*6*/ src1_n, + Param/*6*/ src2_n) { + Instruction inst = 0; + inst |= 0b00001ull << 59u; + if ((pred & ~0b111ull) != 0) + throw std::runtime_error("Instruction field pred for VNMAD32 out of bounds."); + inst |= (pred & 0b111ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for VNMAD32 out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((src1_swiz_10_11 & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_10_11 for VNMAD32 out of bounds."); + inst |= (src1_swiz_10_11 & 0b11ull) << 53u; + if ((syncstart & ~0b1ull) != 0) + throw std::runtime_error("Instruction field syncstart for VNMAD32 out of bounds."); + inst |= (syncstart & 0b1ull) << 52u; + if ((dest_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_bank_ext for VNMAD32 out of bounds."); + inst |= (dest_bank_ext & 0b1ull) << 51u; + if ((src1_swiz_9 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_9 for VNMAD32 out of bounds."); + inst |= (src1_swiz_9 & 0b1ull) << 50u; + if ((src1_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_bank_ext for VNMAD32 out of bounds."); + inst |= (src1_bank_ext & 0b1ull) << 49u; + if ((src2_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_bank_ext for VNMAD32 out of bounds."); + inst |= (src2_bank_ext & 0b1ull) << 48u; + if ((src2_swiz & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field src2_swiz for VNMAD32 out of bounds."); + inst |= (src2_swiz & 0b1111ull) << 44u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for VNMAD32 out of bounds."); + inst |= (nosched & 0b1ull) << 43u; + if ((dest_mask & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field dest_mask for VNMAD32 out of bounds."); + inst |= (dest_mask & 0b1111ull) << 39u; + if ((src1_mod & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_mod for VNMAD32 out of bounds."); + inst |= (src1_mod & 0b11ull) << 37u; + if ((src2_mod & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_mod for VNMAD32 out of bounds."); + inst |= (src2_mod & 0b1ull) << 36u; + if ((src1_swiz_7_8 & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_7_8 for VNMAD32 out of bounds."); + inst |= (src1_swiz_7_8 & 0b11ull) << 34u; + if ((dest_bank_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dest_bank_sel for VNMAD32 out of bounds."); + inst |= (dest_bank_sel & 0b11ull) << 32u; + if ((src1_bank_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank_sel for VNMAD32 out of bounds."); + inst |= (src1_bank_sel & 0b11ull) << 30u; + if ((src2_bank_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_bank_sel for VNMAD32 out of bounds."); + inst |= (src2_bank_sel & 0b11ull) << 28u; + if ((dest_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for VNMAD32 out of bounds."); + inst |= (dest_n & 0b111111ull) << 22u; + if ((src1_swiz_0_6 & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_0_6 for VNMAD32 out of bounds."); + inst |= (src1_swiz_0_6 & 0b1111111ull) << 15u; + if ((op2 & ~0b111ull) != 0) + throw std::runtime_error("Instruction field op2 for VNMAD32 out of bounds."); + inst |= (op2 & 0b111ull) << 12u; + if ((src1_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for VNMAD32 out of bounds."); + inst |= (src1_n & 0b111111ull) << 6u; + if ((src2_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src2_n for VNMAD32 out of bounds."); + inst |= (src2_n & 0b111111ull) << 0u; + return inst; + } - Instruction makeVNMAD16( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*2*/ src1_swiz_10_11, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ src1_swiz_9, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*4*/ src2_swiz, - Param/*1*/ nosched, - Param/*4*/ dest_mask, - Param/*2*/ src1_mod, - Param/*1*/ src2_mod, - Param/*2*/ src1_swiz_7_8, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*6*/ dest_n, - Param/*7*/ src1_swiz_0_6, - Param/*3*/ op2, - Param/*6*/ src1_n, - Param/*6*/ src2_n) { - Instruction inst = 0; - inst |= 0b00010ull << 59u; - inst |= (pred & 0b111ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (src1_swiz_10_11 & 0b11ull) << 53u; - inst |= (syncstart & 0b1ull) << 52u; - inst |= (dest_bank_ext & 0b1ull) << 51u; - inst |= (src1_swiz_9 & 0b1ull) << 50u; - inst |= (src1_bank_ext & 0b1ull) << 49u; - inst |= (src2_bank_ext & 0b1ull) << 48u; - inst |= (src2_swiz & 0b1111ull) << 44u; - inst |= (nosched & 0b1ull) << 43u; - inst |= (dest_mask & 0b1111ull) << 39u; - inst |= (src1_mod & 0b11ull) << 37u; - inst |= (src2_mod & 0b1ull) << 36u; - inst |= (src1_swiz_7_8 & 0b11ull) << 34u; - inst |= (dest_bank_sel & 0b11ull) << 32u; - inst |= (src1_bank_sel & 0b11ull) << 30u; - inst |= (src2_bank_sel & 0b11ull) << 28u; - inst |= (dest_n & 0b111111ull) << 22u; - inst |= (src1_swiz_0_6 & 0b1111111ull) << 15u; - inst |= (op2 & 0b111ull) << 12u; - inst |= (src1_n & 0b111111ull) << 6u; - inst |= (src2_n & 0b111111ull) << 0u; - return inst; - } + Instruction makeVNMAD16( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ src1_swiz_10_11, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ src1_swiz_9, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ src2_swiz, + Param/*1*/ nosched, + Param/*4*/ dest_mask, + Param/*2*/ src1_mod, + Param/*1*/ src2_mod, + Param/*2*/ src1_swiz_7_8, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*6*/ dest_n, + Param/*7*/ src1_swiz_0_6, + Param/*3*/ op2, + Param/*6*/ src1_n, + Param/*6*/ src2_n) { + Instruction inst = 0; + inst |= 0b00010ull << 59u; + if ((pred & ~0b111ull) != 0) + throw std::runtime_error("Instruction field pred for VNMAD16 out of bounds."); + inst |= (pred & 0b111ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for VNMAD16 out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((src1_swiz_10_11 & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_10_11 for VNMAD16 out of bounds."); + inst |= (src1_swiz_10_11 & 0b11ull) << 53u; + if ((syncstart & ~0b1ull) != 0) + throw std::runtime_error("Instruction field syncstart for VNMAD16 out of bounds."); + inst |= (syncstart & 0b1ull) << 52u; + if ((dest_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_bank_ext for VNMAD16 out of bounds."); + inst |= (dest_bank_ext & 0b1ull) << 51u; + if ((src1_swiz_9 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_9 for VNMAD16 out of bounds."); + inst |= (src1_swiz_9 & 0b1ull) << 50u; + if ((src1_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_bank_ext for VNMAD16 out of bounds."); + inst |= (src1_bank_ext & 0b1ull) << 49u; + if ((src2_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_bank_ext for VNMAD16 out of bounds."); + inst |= (src2_bank_ext & 0b1ull) << 48u; + if ((src2_swiz & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field src2_swiz for VNMAD16 out of bounds."); + inst |= (src2_swiz & 0b1111ull) << 44u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for VNMAD16 out of bounds."); + inst |= (nosched & 0b1ull) << 43u; + if ((dest_mask & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field dest_mask for VNMAD16 out of bounds."); + inst |= (dest_mask & 0b1111ull) << 39u; + if ((src1_mod & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_mod for VNMAD16 out of bounds."); + inst |= (src1_mod & 0b11ull) << 37u; + if ((src2_mod & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_mod for VNMAD16 out of bounds."); + inst |= (src2_mod & 0b1ull) << 36u; + if ((src1_swiz_7_8 & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_7_8 for VNMAD16 out of bounds."); + inst |= (src1_swiz_7_8 & 0b11ull) << 34u; + if ((dest_bank_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dest_bank_sel for VNMAD16 out of bounds."); + inst |= (dest_bank_sel & 0b11ull) << 32u; + if ((src1_bank_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank_sel for VNMAD16 out of bounds."); + inst |= (src1_bank_sel & 0b11ull) << 30u; + if ((src2_bank_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_bank_sel for VNMAD16 out of bounds."); + inst |= (src2_bank_sel & 0b11ull) << 28u; + if ((dest_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for VNMAD16 out of bounds."); + inst |= (dest_n & 0b111111ull) << 22u; + if ((src1_swiz_0_6 & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src1_swiz_0_6 for VNMAD16 out of bounds."); + inst |= (src1_swiz_0_6 & 0b1111111ull) << 15u; + if ((op2 & ~0b111ull) != 0) + throw std::runtime_error("Instruction field op2 for VNMAD16 out of bounds."); + inst |= (op2 & 0b111ull) << 12u; + if ((src1_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for VNMAD16 out of bounds."); + inst |= (src1_n & 0b111111ull) << 6u; + if ((src2_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src2_n for VNMAD16 out of bounds."); + inst |= (src2_n & 0b111111ull) << 0u; + return inst; + } - Instruction makeVLDST( - Param/*2*/ op1, - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ moe_expand, - Param/*1*/ sync_start, - Param/*1*/ cache_ext, - Param/*1*/ src0_bank_ext, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*4*/ mask_count, - Param/*2*/ addr_mode, - Param/*2*/ mode, - Param/*1*/ dest_bank_primattr, - Param/*1*/ range_enable, - Param/*2*/ data_type, - Param/*1*/ increment_or_decrement, - Param/*1*/ src0_bank, - Param/*1*/ cache_by_pass12, - Param/*1*/ drc_sel, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*7*/ src0_n, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b111ull << 61u; - inst |= (op1 & 0b11ull) << 59u; - inst |= (pred & 0b111ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (nosched & 0b1ull) << 54u; - inst |= (moe_expand & 0b1ull) << 53u; - inst |= (sync_start & 0b1ull) << 52u; - inst |= (cache_ext & 0b1ull) << 51u; - inst |= (src0_bank_ext & 0b1ull) << 50u; - inst |= (src1_bank_ext & 0b1ull) << 49u; - inst |= (src2_bank_ext & 0b1ull) << 48u; - inst |= (mask_count & 0b1111ull) << 44u; - inst |= (addr_mode & 0b11ull) << 42u; - inst |= (mode & 0b11ull) << 40u; - inst |= (dest_bank_primattr & 0b1ull) << 39u; - inst |= (range_enable & 0b1ull) << 38u; - inst |= (data_type & 0b11ull) << 36u; - inst |= (increment_or_decrement & 0b1ull) << 35u; - inst |= (src0_bank & 0b1ull) << 34u; - inst |= (cache_by_pass12 & 0b1ull) << 33u; - inst |= (drc_sel & 0b1ull) << 32u; - inst |= (src1_bank & 0b11ull) << 30u; - inst |= (src2_bank & 0b11ull) << 28u; - inst |= (dest_n & 0b1111111ull) << 21u; - inst |= (src0_n & 0b1111111ull) << 14u; - inst |= (src1_n & 0b1111111ull) << 7u; - inst |= (src2_n & 0b1111111ull) << 0u; - return inst; - } + Instruction makeVLDST( + Param/*2*/ op1, + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ moe_expand, + Param/*1*/ sync_start, + Param/*1*/ cache_ext, + Param/*1*/ src0_bank_ext, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*4*/ mask_count, + Param/*2*/ addr_mode, + Param/*2*/ mode, + Param/*1*/ dest_bank_primattr, + Param/*1*/ range_enable, + Param/*2*/ data_type, + Param/*1*/ increment_or_decrement, + Param/*1*/ src0_bank, + Param/*1*/ cache_by_pass12, + Param/*1*/ drc_sel, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b111ull << 61u; + if ((op1 & ~0b11ull) != 0) + throw std::runtime_error("Instruction field op1 for VLDST out of bounds."); + inst |= (op1 & 0b11ull) << 59u; + if ((pred & ~0b111ull) != 0) + throw std::runtime_error("Instruction field pred for VLDST out of bounds."); + inst |= (pred & 0b111ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for VLDST out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for VLDST out of bounds."); + inst |= (nosched & 0b1ull) << 54u; + if ((moe_expand & ~0b1ull) != 0) + throw std::runtime_error("Instruction field moe_expand for VLDST out of bounds."); + inst |= (moe_expand & 0b1ull) << 53u; + if ((sync_start & ~0b1ull) != 0) + throw std::runtime_error("Instruction field sync_start for VLDST out of bounds."); + inst |= (sync_start & 0b1ull) << 52u; + if ((cache_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field cache_ext for VLDST out of bounds."); + inst |= (cache_ext & 0b1ull) << 51u; + if ((src0_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src0_bank_ext for VLDST out of bounds."); + inst |= (src0_bank_ext & 0b1ull) << 50u; + if ((src1_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_bank_ext for VLDST out of bounds."); + inst |= (src1_bank_ext & 0b1ull) << 49u; + if ((src2_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_bank_ext for VLDST out of bounds."); + inst |= (src2_bank_ext & 0b1ull) << 48u; + if ((mask_count & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field mask_count for VLDST out of bounds."); + inst |= (mask_count & 0b1111ull) << 44u; + if ((addr_mode & ~0b11ull) != 0) + throw std::runtime_error("Instruction field addr_mode for VLDST out of bounds."); + inst |= (addr_mode & 0b11ull) << 42u; + if ((mode & ~0b11ull) != 0) + throw std::runtime_error("Instruction field mode for VLDST out of bounds."); + inst |= (mode & 0b11ull) << 40u; + if ((dest_bank_primattr & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_bank_primattr for VLDST out of bounds."); + inst |= (dest_bank_primattr & 0b1ull) << 39u; + if ((range_enable & ~0b1ull) != 0) + throw std::runtime_error("Instruction field range_enable for VLDST out of bounds."); + inst |= (range_enable & 0b1ull) << 38u; + if ((data_type & ~0b11ull) != 0) + throw std::runtime_error("Instruction field data_type for VLDST out of bounds."); + inst |= (data_type & 0b11ull) << 36u; + if ((increment_or_decrement & ~0b1ull) != 0) + throw std::runtime_error("Instruction field increment_or_decrement for VLDST out of bounds."); + inst |= (increment_or_decrement & 0b1ull) << 35u; + if ((src0_bank & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src0_bank for VLDST out of bounds."); + inst |= (src0_bank & 0b1ull) << 34u; + if ((cache_by_pass12 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field cache_by_pass12 for VLDST out of bounds."); + inst |= (cache_by_pass12 & 0b1ull) << 33u; + if ((drc_sel & ~0b1ull) != 0) + throw std::runtime_error("Instruction field drc_sel for VLDST out of bounds."); + inst |= (drc_sel & 0b1ull) << 32u; + if ((src1_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank for VLDST out of bounds."); + inst |= (src1_bank & 0b11ull) << 30u; + if ((src2_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_bank for VLDST out of bounds."); + inst |= (src2_bank & 0b11ull) << 28u; + if ((dest_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for VLDST out of bounds."); + inst |= (dest_n & 0b1111111ull) << 21u; + if ((src0_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src0_n for VLDST out of bounds."); + inst |= (src0_n & 0b1111111ull) << 14u; + if ((src1_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for VLDST out of bounds."); + inst |= (src1_n & 0b1111111ull) << 7u; + if ((src2_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src2_n for VLDST out of bounds."); + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeVTST( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ onceonly, - Param/*1*/ syncstart, - Param/*1*/ dest_ext, - Param/*1*/ src1_neg, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*1*/ prec, - Param/*1*/ src2_vscomp, - Param/*2*/ rpt_count, - Param/*2*/ sign_test, - Param/*2*/ zero_test, - Param/*1*/ test_crcomb_and, - Param/*3*/ chan_cc, - Param/*2*/ pdst_n, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*1*/ test_wben, - Param/*2*/ alu_sel, - Param/*4*/ alu_op, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b01001ull << 59u; - inst |= (pred & 0b111ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (onceonly & 0b1ull) << 53u; - inst |= (syncstart & 0b1ull) << 52u; - inst |= (dest_ext & 0b1ull) << 51u; - inst |= (src1_neg & 0b1ull) << 50u; - inst |= (src1_ext & 0b1ull) << 49u; - inst |= (src2_ext & 0b1ull) << 48u; - inst |= (prec & 0b1ull) << 47u; - inst |= (src2_vscomp & 0b1ull) << 46u; - inst |= (rpt_count & 0b11ull) << 44u; - inst |= (sign_test & 0b11ull) << 42u; - inst |= (zero_test & 0b11ull) << 40u; - inst |= (test_crcomb_and & 0b1ull) << 39u; - inst |= (chan_cc & 0b111ull) << 36u; - inst |= (pdst_n & 0b11ull) << 34u; - inst |= (dest_bank & 0b11ull) << 32u; - inst |= (src1_bank & 0b11ull) << 30u; - inst |= (src2_bank & 0b11ull) << 28u; - inst |= (dest_n & 0b1111111ull) << 21u; - inst |= (test_wben & 0b1ull) << 20u; - inst |= (alu_sel & 0b11ull) << 18u; - inst |= (alu_op & 0b1111ull) << 14u; - inst |= (src1_n & 0b1111111ull) << 7u; - inst |= (src2_n & 0b1111111ull) << 0u; - return inst; - } + Instruction makeVTST( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ onceonly, + Param/*1*/ syncstart, + Param/*1*/ dest_ext, + Param/*1*/ src1_neg, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*1*/ prec, + Param/*1*/ src2_vscomp, + Param/*2*/ rpt_count, + Param/*2*/ sign_test, + Param/*2*/ zero_test, + Param/*1*/ test_crcomb_and, + Param/*3*/ chan_cc, + Param/*2*/ pdst_n, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ test_wben, + Param/*2*/ alu_sel, + Param/*4*/ alu_op, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b01001ull << 59u; + if ((pred & ~0b111ull) != 0) + throw std::runtime_error("Instruction field pred for VTST out of bounds."); + inst |= (pred & 0b111ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for VTST out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((onceonly & ~0b1ull) != 0) + throw std::runtime_error("Instruction field onceonly for VTST out of bounds."); + inst |= (onceonly & 0b1ull) << 53u; + if ((syncstart & ~0b1ull) != 0) + throw std::runtime_error("Instruction field syncstart for VTST out of bounds."); + inst |= (syncstart & 0b1ull) << 52u; + if ((dest_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_ext for VTST out of bounds."); + inst |= (dest_ext & 0b1ull) << 51u; + if ((src1_neg & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_neg for VTST out of bounds."); + inst |= (src1_neg & 0b1ull) << 50u; + if ((src1_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_ext for VTST out of bounds."); + inst |= (src1_ext & 0b1ull) << 49u; + if ((src2_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_ext for VTST out of bounds."); + inst |= (src2_ext & 0b1ull) << 48u; + if ((prec & ~0b1ull) != 0) + throw std::runtime_error("Instruction field prec for VTST out of bounds."); + inst |= (prec & 0b1ull) << 47u; + if ((src2_vscomp & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_vscomp for VTST out of bounds."); + inst |= (src2_vscomp & 0b1ull) << 46u; + if ((rpt_count & ~0b11ull) != 0) + throw std::runtime_error("Instruction field rpt_count for VTST out of bounds."); + inst |= (rpt_count & 0b11ull) << 44u; + if ((sign_test & ~0b11ull) != 0) + throw std::runtime_error("Instruction field sign_test for VTST out of bounds."); + inst |= (sign_test & 0b11ull) << 42u; + if ((zero_test & ~0b11ull) != 0) + throw std::runtime_error("Instruction field zero_test for VTST out of bounds."); + inst |= (zero_test & 0b11ull) << 40u; + if ((test_crcomb_and & ~0b1ull) != 0) + throw std::runtime_error("Instruction field test_crcomb_and for VTST out of bounds."); + inst |= (test_crcomb_and & 0b1ull) << 39u; + if ((chan_cc & ~0b111ull) != 0) + throw std::runtime_error("Instruction field chan_cc for VTST out of bounds."); + inst |= (chan_cc & 0b111ull) << 36u; + if ((pdst_n & ~0b11ull) != 0) + throw std::runtime_error("Instruction field pdst_n for VTST out of bounds."); + inst |= (pdst_n & 0b11ull) << 34u; + if ((dest_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dest_bank for VTST out of bounds."); + inst |= (dest_bank & 0b11ull) << 32u; + if ((src1_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank for VTST out of bounds."); + inst |= (src1_bank & 0b11ull) << 30u; + if ((src2_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_bank for VTST out of bounds."); + inst |= (src2_bank & 0b11ull) << 28u; + if ((dest_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for VTST out of bounds."); + inst |= (dest_n & 0b1111111ull) << 21u; + if ((test_wben & ~0b1ull) != 0) + throw std::runtime_error("Instruction field test_wben for VTST out of bounds."); + inst |= (test_wben & 0b1ull) << 20u; + if ((alu_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field alu_sel for VTST out of bounds."); + inst |= (alu_sel & 0b11ull) << 18u; + if ((alu_op & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field alu_op for VTST out of bounds."); + inst |= (alu_op & 0b1111ull) << 14u; + if ((src1_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for VTST out of bounds."); + inst |= (src1_n & 0b1111111ull) << 7u; + if ((src2_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src2_n for VTST out of bounds."); + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeVTSTMSK( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ onceonly, - Param/*1*/ syncstart, - Param/*1*/ dest_ext, - Param/*1*/ test_flag_2, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*1*/ prec, - Param/*1*/ src2_vscomp, - Param/*2*/ rpt_count, - Param/*2*/ sign_test, - Param/*2*/ zero_test, - Param/*1*/ test_crcomb_and, - Param/*2*/ tst_mask_type, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*1*/ test_wben, - Param/*2*/ alu_sel, - Param/*4*/ alu_op, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b01111ull << 59u; - inst |= (pred & 0b111ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (onceonly & 0b1ull) << 53u; - inst |= (syncstart & 0b1ull) << 52u; - inst |= (dest_ext & 0b1ull) << 51u; - inst |= (test_flag_2 & 0b1ull) << 50u; - inst |= (src1_ext & 0b1ull) << 49u; - inst |= (src2_ext & 0b1ull) << 48u; - inst |= (prec & 0b1ull) << 47u; - inst |= (src2_vscomp & 0b1ull) << 46u; - inst |= (rpt_count & 0b11ull) << 44u; - inst |= (sign_test & 0b11ull) << 42u; - inst |= (zero_test & 0b11ull) << 40u; - inst |= (test_crcomb_and & 0b1ull) << 39u; - inst |= (tst_mask_type & 0b11ull) << 36u; - inst |= (dest_bank & 0b11ull) << 32u; - inst |= (src1_bank & 0b11ull) << 30u; - inst |= (src2_bank & 0b11ull) << 28u; - inst |= (dest_n & 0b1111111ull) << 21u; - inst |= (test_wben & 0b1ull) << 20u; - inst |= (alu_sel & 0b11ull) << 18u; - inst |= (alu_op & 0b1111ull) << 14u; - inst |= (src1_n & 0b1111111ull) << 7u; - inst |= (src2_n & 0b1111111ull) << 0u; - return inst; - } + Instruction makeVTSTMSK( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ onceonly, + Param/*1*/ syncstart, + Param/*1*/ dest_ext, + Param/*1*/ test_flag_2, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*1*/ prec, + Param/*1*/ src2_vscomp, + Param/*2*/ rpt_count, + Param/*2*/ sign_test, + Param/*2*/ zero_test, + Param/*1*/ test_crcomb_and, + Param/*2*/ tst_mask_type, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ test_wben, + Param/*2*/ alu_sel, + Param/*4*/ alu_op, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b01111ull << 59u; + if ((pred & ~0b111ull) != 0) + throw std::runtime_error("Instruction field pred for VTSTMSK out of bounds."); + inst |= (pred & 0b111ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for VTSTMSK out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((onceonly & ~0b1ull) != 0) + throw std::runtime_error("Instruction field onceonly for VTSTMSK out of bounds."); + inst |= (onceonly & 0b1ull) << 53u; + if ((syncstart & ~0b1ull) != 0) + throw std::runtime_error("Instruction field syncstart for VTSTMSK out of bounds."); + inst |= (syncstart & 0b1ull) << 52u; + if ((dest_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_ext for VTSTMSK out of bounds."); + inst |= (dest_ext & 0b1ull) << 51u; + if ((test_flag_2 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field test_flag_2 for VTSTMSK out of bounds."); + inst |= (test_flag_2 & 0b1ull) << 50u; + if ((src1_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_ext for VTSTMSK out of bounds."); + inst |= (src1_ext & 0b1ull) << 49u; + if ((src2_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_ext for VTSTMSK out of bounds."); + inst |= (src2_ext & 0b1ull) << 48u; + if ((prec & ~0b1ull) != 0) + throw std::runtime_error("Instruction field prec for VTSTMSK out of bounds."); + inst |= (prec & 0b1ull) << 47u; + if ((src2_vscomp & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_vscomp for VTSTMSK out of bounds."); + inst |= (src2_vscomp & 0b1ull) << 46u; + if ((rpt_count & ~0b11ull) != 0) + throw std::runtime_error("Instruction field rpt_count for VTSTMSK out of bounds."); + inst |= (rpt_count & 0b11ull) << 44u; + if ((sign_test & ~0b11ull) != 0) + throw std::runtime_error("Instruction field sign_test for VTSTMSK out of bounds."); + inst |= (sign_test & 0b11ull) << 42u; + if ((zero_test & ~0b11ull) != 0) + throw std::runtime_error("Instruction field zero_test for VTSTMSK out of bounds."); + inst |= (zero_test & 0b11ull) << 40u; + if ((test_crcomb_and & ~0b1ull) != 0) + throw std::runtime_error("Instruction field test_crcomb_and for VTSTMSK out of bounds."); + inst |= (test_crcomb_and & 0b1ull) << 39u; + if ((tst_mask_type & ~0b11ull) != 0) + throw std::runtime_error("Instruction field tst_mask_type for VTSTMSK out of bounds."); + inst |= (tst_mask_type & 0b11ull) << 36u; + if ((dest_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dest_bank for VTSTMSK out of bounds."); + inst |= (dest_bank & 0b11ull) << 32u; + if ((src1_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank for VTSTMSK out of bounds."); + inst |= (src1_bank & 0b11ull) << 30u; + if ((src2_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_bank for VTSTMSK out of bounds."); + inst |= (src2_bank & 0b11ull) << 28u; + if ((dest_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for VTSTMSK out of bounds."); + inst |= (dest_n & 0b1111111ull) << 21u; + if ((test_wben & ~0b1ull) != 0) + throw std::runtime_error("Instruction field test_wben for VTSTMSK out of bounds."); + inst |= (test_wben & 0b1ull) << 20u; + if ((alu_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field alu_sel for VTSTMSK out of bounds."); + inst |= (alu_sel & 0b11ull) << 18u; + if ((alu_op & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field alu_op for VTSTMSK out of bounds."); + inst |= (alu_op & 0b1111ull) << 14u; + if ((src1_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for VTSTMSK out of bounds."); + inst |= (src1_n & 0b1111111ull) << 7u; + if ((src2_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src2_n for VTSTMSK out of bounds."); + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeVPCK( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ unknown, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*3*/ repeat_count, - Param/*3*/ src_fmt, - Param/*3*/ dest_fmt, - Param/*4*/ dest_mask, - Param/*2*/ dest_bank_sel, - Param/*2*/ src1_bank_sel, - Param/*2*/ src2_bank_sel, - Param/*7*/ dest_n, - Param/*2*/ comp_sel_3, - Param/*1*/ scale, - Param/*2*/ comp_sel_1, - Param/*2*/ comp_sel_2, - Param/*6*/ src1_n, - Param/*1*/ comp0_sel_bit1, - Param/*6*/ src2_n, - Param/*1*/ comp_sel_0_bit0) { - Instruction inst = 0; - inst |= 0b01000ull << 59u; - inst |= (pred & 0b111ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (nosched & 0b1ull) << 54u; - inst |= (unknown & 0b1ull) << 53u; - inst |= (syncstart & 0b1ull) << 52u; - inst |= (dest_bank_ext & 0b1ull) << 51u; - inst |= (end & 0b1ull) << 50u; - inst |= (src1_bank_ext & 0b1ull) << 49u; - inst |= (src2_bank_ext & 0b1ull) << 48u; - inst |= (repeat_count & 0b111ull) << 44u; - inst |= (src_fmt & 0b111ull) << 41u; - inst |= (dest_fmt & 0b111ull) << 38u; - inst |= (dest_mask & 0b1111ull) << 34u; - inst |= (dest_bank_sel & 0b11ull) << 32u; - inst |= (src1_bank_sel & 0b11ull) << 30u; - inst |= (src2_bank_sel & 0b11ull) << 28u; - inst |= (dest_n & 0b1111111ull) << 21u; - inst |= (comp_sel_3 & 0b11ull) << 19u; - inst |= (scale & 0b1ull) << 18u; - inst |= (comp_sel_1 & 0b11ull) << 16u; - inst |= (comp_sel_2 & 0b11ull) << 14u; - inst |= (src1_n & 0b111111ull) << 8u; - inst |= (comp0_sel_bit1 & 0b1ull) << 7u; - inst |= (src2_n & 0b111111ull) << 1u; - inst |= (comp_sel_0_bit0 & 0b1ull) << 0u; - return inst; - } + Instruction makeVPCK( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ unknown, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*3*/ repeat_count, + Param/*3*/ src_fmt, + Param/*3*/ dest_fmt, + Param/*4*/ dest_mask, + Param/*2*/ dest_bank_sel, + Param/*2*/ src1_bank_sel, + Param/*2*/ src2_bank_sel, + Param/*7*/ dest_n, + Param/*2*/ comp_sel_3, + Param/*1*/ scale, + Param/*2*/ comp_sel_1, + Param/*2*/ comp_sel_2, + Param/*6*/ src1_n, + Param/*1*/ comp0_sel_bit1, + Param/*6*/ src2_n, + Param/*1*/ comp_sel_0_bit0) { + Instruction inst = 0; + inst |= 0b01000ull << 59u; + if ((pred & ~0b111ull) != 0) + throw std::runtime_error("Instruction field pred for VPCK out of bounds."); + inst |= (pred & 0b111ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for VPCK out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for VPCK out of bounds."); + inst |= (nosched & 0b1ull) << 54u; + if ((unknown & ~0b1ull) != 0) + throw std::runtime_error("Instruction field unknown for VPCK out of bounds."); + inst |= (unknown & 0b1ull) << 53u; + if ((syncstart & ~0b1ull) != 0) + throw std::runtime_error("Instruction field syncstart for VPCK out of bounds."); + inst |= (syncstart & 0b1ull) << 52u; + if ((dest_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_bank_ext for VPCK out of bounds."); + inst |= (dest_bank_ext & 0b1ull) << 51u; + if ((end & ~0b1ull) != 0) + throw std::runtime_error("Instruction field end for VPCK out of bounds."); + inst |= (end & 0b1ull) << 50u; + if ((src1_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_bank_ext for VPCK out of bounds."); + inst |= (src1_bank_ext & 0b1ull) << 49u; + if ((src2_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_bank_ext for VPCK out of bounds."); + inst |= (src2_bank_ext & 0b1ull) << 48u; + if ((repeat_count & ~0b111ull) != 0) + throw std::runtime_error("Instruction field repeat_count for VPCK out of bounds."); + inst |= (repeat_count & 0b111ull) << 44u; + if ((src_fmt & ~0b111ull) != 0) + throw std::runtime_error("Instruction field src_fmt for VPCK out of bounds."); + inst |= (src_fmt & 0b111ull) << 41u; + if ((dest_fmt & ~0b111ull) != 0) + throw std::runtime_error("Instruction field dest_fmt for VPCK out of bounds."); + inst |= (dest_fmt & 0b111ull) << 38u; + if ((dest_mask & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field dest_mask for VPCK out of bounds."); + inst |= (dest_mask & 0b1111ull) << 34u; + if ((dest_bank_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dest_bank_sel for VPCK out of bounds."); + inst |= (dest_bank_sel & 0b11ull) << 32u; + if ((src1_bank_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank_sel for VPCK out of bounds."); + inst |= (src1_bank_sel & 0b11ull) << 30u; + if ((src2_bank_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_bank_sel for VPCK out of bounds."); + inst |= (src2_bank_sel & 0b11ull) << 28u; + if ((dest_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for VPCK out of bounds."); + inst |= (dest_n & 0b1111111ull) << 21u; + if ((comp_sel_3 & ~0b11ull) != 0) + throw std::runtime_error("Instruction field comp_sel_3 for VPCK out of bounds."); + inst |= (comp_sel_3 & 0b11ull) << 19u; + if ((scale & ~0b1ull) != 0) + throw std::runtime_error("Instruction field scale for VPCK out of bounds."); + inst |= (scale & 0b1ull) << 18u; + if ((comp_sel_1 & ~0b11ull) != 0) + throw std::runtime_error("Instruction field comp_sel_1 for VPCK out of bounds."); + inst |= (comp_sel_1 & 0b11ull) << 16u; + if ((comp_sel_2 & ~0b11ull) != 0) + throw std::runtime_error("Instruction field comp_sel_2 for VPCK out of bounds."); + inst |= (comp_sel_2 & 0b11ull) << 14u; + if ((src1_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for VPCK out of bounds."); + inst |= (src1_n & 0b111111ull) << 8u; + if ((comp0_sel_bit1 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field comp0_sel_bit1 for VPCK out of bounds."); + inst |= (comp0_sel_bit1 & 0b1ull) << 7u; + if ((src2_n & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field src2_n for VPCK out of bounds."); + inst |= (src2_n & 0b111111ull) << 1u; + if ((comp_sel_0_bit0 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field comp_sel_0_bit0 for VPCK out of bounds."); + inst |= (comp_sel_0_bit0 & 0b1ull) << 0u; + return inst; + } - Instruction makeVBW( - Param/*3*/ op1, - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ repeat_count, - Param/*1*/ sync_start, - Param/*1*/ dest_ext, - Param/*1*/ end, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*4*/ mask_count, - Param/*1*/ src2_invert, - Param/*5*/ src2_rot, - Param/*2*/ src2_exth, - Param/*1*/ op2, - Param/*1*/ bitwise_partial, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*7*/ src2_sel, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b01ull << 62u; - inst |= (op1 & 0b111ull) << 59u; - inst |= (pred & 0b111ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (nosched & 0b1ull) << 54u; - inst |= (repeat_count & 0b1ull) << 53u; - inst |= (sync_start & 0b1ull) << 52u; - inst |= (dest_ext & 0b1ull) << 51u; - inst |= (end & 0b1ull) << 50u; - inst |= (src1_ext & 0b1ull) << 49u; - inst |= (src2_ext & 0b1ull) << 48u; - inst |= (mask_count & 0b1111ull) << 44u; - inst |= (src2_invert & 0b1ull) << 43u; - inst |= (src2_rot & 0b11111ull) << 38u; - inst |= (src2_exth & 0b11ull) << 36u; - inst |= (op2 & 0b1ull) << 35u; - inst |= (bitwise_partial & 0b1ull) << 34u; - inst |= (dest_bank & 0b11ull) << 32u; - inst |= (src1_bank & 0b11ull) << 30u; - inst |= (src2_bank & 0b11ull) << 28u; - inst |= (dest_n & 0b1111111ull) << 21u; - inst |= (src2_sel & 0b1111111ull) << 14u; - inst |= (src1_n & 0b1111111ull) << 7u; - inst |= (src2_n & 0b1111111ull) << 0u; - return inst; - } + Instruction makeVBW( + Param/*3*/ op1, + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ repeat_count, + Param/*1*/ sync_start, + Param/*1*/ dest_ext, + Param/*1*/ end, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*4*/ mask_count, + Param/*1*/ src2_invert, + Param/*5*/ src2_rot, + Param/*2*/ src2_exth, + Param/*1*/ op2, + Param/*1*/ bitwise_partial, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src2_sel, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b01ull << 62u; + if ((op1 & ~0b111ull) != 0) + throw std::runtime_error("Instruction field op1 for VBW out of bounds."); + inst |= (op1 & 0b111ull) << 59u; + if ((pred & ~0b111ull) != 0) + throw std::runtime_error("Instruction field pred for VBW out of bounds."); + inst |= (pred & 0b111ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for VBW out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for VBW out of bounds."); + inst |= (nosched & 0b1ull) << 54u; + if ((repeat_count & ~0b1ull) != 0) + throw std::runtime_error("Instruction field repeat_count for VBW out of bounds."); + inst |= (repeat_count & 0b1ull) << 53u; + if ((sync_start & ~0b1ull) != 0) + throw std::runtime_error("Instruction field sync_start for VBW out of bounds."); + inst |= (sync_start & 0b1ull) << 52u; + if ((dest_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_ext for VBW out of bounds."); + inst |= (dest_ext & 0b1ull) << 51u; + if ((end & ~0b1ull) != 0) + throw std::runtime_error("Instruction field end for VBW out of bounds."); + inst |= (end & 0b1ull) << 50u; + if ((src1_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_ext for VBW out of bounds."); + inst |= (src1_ext & 0b1ull) << 49u; + if ((src2_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_ext for VBW out of bounds."); + inst |= (src2_ext & 0b1ull) << 48u; + if ((mask_count & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field mask_count for VBW out of bounds."); + inst |= (mask_count & 0b1111ull) << 44u; + if ((src2_invert & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_invert for VBW out of bounds."); + inst |= (src2_invert & 0b1ull) << 43u; + if ((src2_rot & ~0b11111ull) != 0) + throw std::runtime_error("Instruction field src2_rot for VBW out of bounds."); + inst |= (src2_rot & 0b11111ull) << 38u; + if ((src2_exth & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_exth for VBW out of bounds."); + inst |= (src2_exth & 0b11ull) << 36u; + if ((op2 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field op2 for VBW out of bounds."); + inst |= (op2 & 0b1ull) << 35u; + if ((bitwise_partial & ~0b1ull) != 0) + throw std::runtime_error("Instruction field bitwise_partial for VBW out of bounds."); + inst |= (bitwise_partial & 0b1ull) << 34u; + if ((dest_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dest_bank for VBW out of bounds."); + inst |= (dest_bank & 0b11ull) << 32u; + if ((src1_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank for VBW out of bounds."); + inst |= (src1_bank & 0b11ull) << 30u; + if ((src2_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_bank for VBW out of bounds."); + inst |= (src2_bank & 0b11ull) << 28u; + if ((dest_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for VBW out of bounds."); + inst |= (dest_n & 0b1111111ull) << 21u; + if ((src2_sel & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src2_sel for VBW out of bounds."); + inst |= (src2_sel & 0b1111111ull) << 14u; + if ((src1_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for VBW out of bounds."); + inst |= (src1_n & 0b1111111ull) << 7u; + if ((src2_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src2_n for VBW out of bounds."); + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeSMP( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*1*/ syncstart, - Param/*1*/ minpack, - Param/*1*/ src0_ext, - Param/*1*/ src1_ext, - Param/*1*/ src2_ext, - Param/*2*/ fconv_type, - Param/*2*/ mask_count, - Param/*2*/ dim, - Param/*2*/ lod_mode, - Param/*1*/ dest_use_pa, - Param/*2*/ sb_mode, - Param/*2*/ src0_type, - Param/*1*/ src0_bank, - Param/*2*/ drc_sel, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*7*/ src0_n, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b11100ull << 59u; - inst |= (pred & 0b111ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (nosched & 0b1ull) << 54u; - inst |= (syncstart & 0b1ull) << 52u; - inst |= (minpack & 0b1ull) << 51u; - inst |= (src0_ext & 0b1ull) << 50u; - inst |= (src1_ext & 0b1ull) << 49u; - inst |= (src2_ext & 0b1ull) << 48u; - inst |= (fconv_type & 0b11ull) << 46u; - inst |= (mask_count & 0b11ull) << 44u; - inst |= (dim & 0b11ull) << 42u; - inst |= (lod_mode & 0b11ull) << 40u; - inst |= (dest_use_pa & 0b1ull) << 39u; - inst |= (sb_mode & 0b11ull) << 37u; - inst |= (src0_type & 0b11ull) << 35u; - inst |= (src0_bank & 0b1ull) << 34u; - inst |= (drc_sel & 0b11ull) << 32u; - inst |= (src1_bank & 0b11ull) << 30u; - inst |= (src2_bank & 0b11ull) << 28u; - inst |= (dest_n & 0b1111111ull) << 21u; - inst |= (src0_n & 0b1111111ull) << 14u; - inst |= (src1_n & 0b1111111ull) << 7u; - inst |= (src2_n & 0b1111111ull) << 0u; - return inst; - } + Instruction makeSMP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*1*/ syncstart, + Param/*1*/ minpack, + Param/*1*/ src0_ext, + Param/*1*/ src1_ext, + Param/*1*/ src2_ext, + Param/*2*/ fconv_type, + Param/*2*/ mask_count, + Param/*2*/ dim, + Param/*2*/ lod_mode, + Param/*1*/ dest_use_pa, + Param/*2*/ sb_mode, + Param/*2*/ src0_type, + Param/*1*/ src0_bank, + Param/*2*/ drc_sel, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b11100ull << 59u; + if ((pred & ~0b111ull) != 0) + throw std::runtime_error("Instruction field pred for SMP out of bounds."); + inst |= (pred & 0b111ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for SMP out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for SMP out of bounds."); + inst |= (nosched & 0b1ull) << 54u; + if ((syncstart & ~0b1ull) != 0) + throw std::runtime_error("Instruction field syncstart for SMP out of bounds."); + inst |= (syncstart & 0b1ull) << 52u; + if ((minpack & ~0b1ull) != 0) + throw std::runtime_error("Instruction field minpack for SMP out of bounds."); + inst |= (minpack & 0b1ull) << 51u; + if ((src0_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src0_ext for SMP out of bounds."); + inst |= (src0_ext & 0b1ull) << 50u; + if ((src1_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_ext for SMP out of bounds."); + inst |= (src1_ext & 0b1ull) << 49u; + if ((src2_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_ext for SMP out of bounds."); + inst |= (src2_ext & 0b1ull) << 48u; + if ((fconv_type & ~0b11ull) != 0) + throw std::runtime_error("Instruction field fconv_type for SMP out of bounds."); + inst |= (fconv_type & 0b11ull) << 46u; + if ((mask_count & ~0b11ull) != 0) + throw std::runtime_error("Instruction field mask_count for SMP out of bounds."); + inst |= (mask_count & 0b11ull) << 44u; + if ((dim & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dim for SMP out of bounds."); + inst |= (dim & 0b11ull) << 42u; + if ((lod_mode & ~0b11ull) != 0) + throw std::runtime_error("Instruction field lod_mode for SMP out of bounds."); + inst |= (lod_mode & 0b11ull) << 40u; + if ((dest_use_pa & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_use_pa for SMP out of bounds."); + inst |= (dest_use_pa & 0b1ull) << 39u; + if ((sb_mode & ~0b11ull) != 0) + throw std::runtime_error("Instruction field sb_mode for SMP out of bounds."); + inst |= (sb_mode & 0b11ull) << 37u; + if ((src0_type & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src0_type for SMP out of bounds."); + inst |= (src0_type & 0b11ull) << 35u; + if ((src0_bank & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src0_bank for SMP out of bounds."); + inst |= (src0_bank & 0b1ull) << 34u; + if ((drc_sel & ~0b11ull) != 0) + throw std::runtime_error("Instruction field drc_sel for SMP out of bounds."); + inst |= (drc_sel & 0b11ull) << 32u; + if ((src1_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank for SMP out of bounds."); + inst |= (src1_bank & 0b11ull) << 30u; + if ((src2_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_bank for SMP out of bounds."); + inst |= (src2_bank & 0b11ull) << 28u; + if ((dest_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for SMP out of bounds."); + inst |= (dest_n & 0b1111111ull) << 21u; + if ((src0_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src0_n for SMP out of bounds."); + inst |= (src0_n & 0b1111111ull) << 14u; + if ((src1_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for SMP out of bounds."); + inst |= (src1_n & 0b1111111ull) << 7u; + if ((src2_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src2_n for SMP out of bounds."); + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeVCOMP( - Param/*3*/ pred, - Param/*1*/ skipinv, - Param/*2*/ dest_type, - Param/*1*/ syncstart, - Param/*1*/ dest_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*4*/ repeat_count, - Param/*1*/ nosched, - Param/*2*/ op2, - Param/*2*/ src_type, - Param/*2*/ src1_mod, - Param/*2*/ src_comp, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*7*/ dest_n, - Param/*7*/ src1_n, - Param/*4*/ write_mask) { - Instruction inst = 0; - inst |= 0b00110ull << 59u; - inst |= (pred & 0b111ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (dest_type & 0b11ull) << 53u; - inst |= (syncstart & 0b1ull) << 52u; - inst |= (dest_bank_ext & 0b1ull) << 51u; - inst |= (end & 0b1ull) << 50u; - inst |= (src1_bank_ext & 0b1ull) << 49u; - inst |= (repeat_count & 0b1111ull) << 44u; - inst |= (nosched & 0b1ull) << 43u; - inst |= (op2 & 0b11ull) << 41u; - inst |= (src_type & 0b11ull) << 39u; - inst |= (src1_mod & 0b11ull) << 37u; - inst |= (src_comp & 0b11ull) << 35u; - inst |= (dest_bank & 0b11ull) << 32u; - inst |= (src1_bank & 0b11ull) << 30u; - inst |= (dest_n & 0b1111111ull) << 21u; - inst |= (src1_n & 0b1111111ull) << 7u; - inst |= (write_mask & 0b1111ull) << 0u; - return inst; - } + Instruction makeVCOMP( + Param/*3*/ pred, + Param/*1*/ skipinv, + Param/*2*/ dest_type, + Param/*1*/ syncstart, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*4*/ repeat_count, + Param/*1*/ nosched, + Param/*2*/ op2, + Param/*2*/ src_type, + Param/*2*/ src1_mod, + Param/*2*/ src_comp, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*7*/ dest_n, + Param/*7*/ src1_n, + Param/*4*/ write_mask) { + Instruction inst = 0; + inst |= 0b00110ull << 59u; + if ((pred & ~0b111ull) != 0) + throw std::runtime_error("Instruction field pred for VCOMP out of bounds."); + inst |= (pred & 0b111ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for VCOMP out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((dest_type & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dest_type for VCOMP out of bounds."); + inst |= (dest_type & 0b11ull) << 53u; + if ((syncstart & ~0b1ull) != 0) + throw std::runtime_error("Instruction field syncstart for VCOMP out of bounds."); + inst |= (syncstart & 0b1ull) << 52u; + if ((dest_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_bank_ext for VCOMP out of bounds."); + inst |= (dest_bank_ext & 0b1ull) << 51u; + if ((end & ~0b1ull) != 0) + throw std::runtime_error("Instruction field end for VCOMP out of bounds."); + inst |= (end & 0b1ull) << 50u; + if ((src1_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_bank_ext for VCOMP out of bounds."); + inst |= (src1_bank_ext & 0b1ull) << 49u; + if ((repeat_count & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field repeat_count for VCOMP out of bounds."); + inst |= (repeat_count & 0b1111ull) << 44u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for VCOMP out of bounds."); + inst |= (nosched & 0b1ull) << 43u; + if ((op2 & ~0b11ull) != 0) + throw std::runtime_error("Instruction field op2 for VCOMP out of bounds."); + inst |= (op2 & 0b11ull) << 41u; + if ((src_type & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src_type for VCOMP out of bounds."); + inst |= (src_type & 0b11ull) << 39u; + if ((src1_mod & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_mod for VCOMP out of bounds."); + inst |= (src1_mod & 0b11ull) << 37u; + if ((src_comp & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src_comp for VCOMP out of bounds."); + inst |= (src_comp & 0b11ull) << 35u; + if ((dest_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dest_bank for VCOMP out of bounds."); + inst |= (dest_bank & 0b11ull) << 32u; + if ((src1_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank for VCOMP out of bounds."); + inst |= (src1_bank & 0b11ull) << 30u; + if ((dest_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for VCOMP out of bounds."); + inst |= (dest_n & 0b1111111ull) << 21u; + if ((src1_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for VCOMP out of bounds."); + inst |= (src1_n & 0b1111111ull) << 7u; + if ((write_mask & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field write_mask for VCOMP out of bounds."); + inst |= (write_mask & 0b1111ull) << 0u; + return inst; + } - Instruction makeSOP2( - Param/*2*/ pred, - Param/*1*/ cmod1, - Param/*1*/ skipinv, - Param/*1*/ nosched, - Param/*2*/ asel1, - Param/*1*/ dest_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*1*/ cmod2, - Param/*3*/ count, - Param/*1*/ amod1, - Param/*2*/ asel2, - Param/*3*/ csel1, - Param/*3*/ csel2, - Param/*1*/ amod2, - Param/*2*/ dest_bank, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*7*/ dest_n, - Param/*1*/ src1_mod, - Param/*2*/ cop, - Param/*2*/ aop, - Param/*1*/ asrc1_mod, - Param/*1*/ dest_mod, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b10000ull << 59u; - inst |= (pred & 0b11ull) << 57u; - inst |= (cmod1 & 0b1ull) << 56u; - inst |= (skipinv & 0b1ull) << 55u; - inst |= (nosched & 0b1ull) << 54u; - inst |= (asel1 & 0b11ull) << 52u; - inst |= (dest_bank_ext & 0b1ull) << 51u; - inst |= (end & 0b1ull) << 50u; - inst |= (src1_bank_ext & 0b1ull) << 49u; - inst |= (src2_bank_ext & 0b1ull) << 48u; - inst |= (cmod2 & 0b1ull) << 47u; - inst |= (count & 0b111ull) << 44u; - inst |= (amod1 & 0b1ull) << 43u; - inst |= (asel2 & 0b11ull) << 41u; - inst |= (csel1 & 0b111ull) << 38u; - inst |= (csel2 & 0b111ull) << 35u; - inst |= (amod2 & 0b1ull) << 34u; - inst |= (dest_bank & 0b11ull) << 32u; - inst |= (src1_bank & 0b11ull) << 30u; - inst |= (src2_bank & 0b11ull) << 28u; - inst |= (dest_n & 0b1111111ull) << 21u; - inst |= (src1_mod & 0b1ull) << 20u; - inst |= (cop & 0b11ull) << 18u; - inst |= (aop & 0b11ull) << 16u; - inst |= (asrc1_mod & 0b1ull) << 15u; - inst |= (dest_mod & 0b1ull) << 14u; - inst |= (src1_n & 0b1111111ull) << 7u; - inst |= (src2_n & 0b1111111ull) << 0u; - return inst; - } + Instruction makeSOP2( + Param/*2*/ pred, + Param/*1*/ cmod1, + Param/*1*/ skipinv, + Param/*1*/ nosched, + Param/*2*/ asel1, + Param/*1*/ dest_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*1*/ cmod2, + Param/*3*/ count, + Param/*1*/ amod1, + Param/*2*/ asel2, + Param/*3*/ csel1, + Param/*3*/ csel2, + Param/*1*/ amod2, + Param/*2*/ dest_bank, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*7*/ dest_n, + Param/*1*/ src1_mod, + Param/*2*/ cop, + Param/*2*/ aop, + Param/*1*/ asrc1_mod, + Param/*1*/ dest_mod, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b10000ull << 59u; + if ((pred & ~0b11ull) != 0) + throw std::runtime_error("Instruction field pred for SOP2 out of bounds."); + inst |= (pred & 0b11ull) << 57u; + if ((cmod1 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field cmod1 for SOP2 out of bounds."); + inst |= (cmod1 & 0b1ull) << 56u; + if ((skipinv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field skipinv for SOP2 out of bounds."); + inst |= (skipinv & 0b1ull) << 55u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for SOP2 out of bounds."); + inst |= (nosched & 0b1ull) << 54u; + if ((asel1 & ~0b11ull) != 0) + throw std::runtime_error("Instruction field asel1 for SOP2 out of bounds."); + inst |= (asel1 & 0b11ull) << 52u; + if ((dest_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_bank_ext for SOP2 out of bounds."); + inst |= (dest_bank_ext & 0b1ull) << 51u; + if ((end & ~0b1ull) != 0) + throw std::runtime_error("Instruction field end for SOP2 out of bounds."); + inst |= (end & 0b1ull) << 50u; + if ((src1_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_bank_ext for SOP2 out of bounds."); + inst |= (src1_bank_ext & 0b1ull) << 49u; + if ((src2_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_bank_ext for SOP2 out of bounds."); + inst |= (src2_bank_ext & 0b1ull) << 48u; + if ((cmod2 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field cmod2 for SOP2 out of bounds."); + inst |= (cmod2 & 0b1ull) << 47u; + if ((count & ~0b111ull) != 0) + throw std::runtime_error("Instruction field count for SOP2 out of bounds."); + inst |= (count & 0b111ull) << 44u; + if ((amod1 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field amod1 for SOP2 out of bounds."); + inst |= (amod1 & 0b1ull) << 43u; + if ((asel2 & ~0b11ull) != 0) + throw std::runtime_error("Instruction field asel2 for SOP2 out of bounds."); + inst |= (asel2 & 0b11ull) << 41u; + if ((csel1 & ~0b111ull) != 0) + throw std::runtime_error("Instruction field csel1 for SOP2 out of bounds."); + inst |= (csel1 & 0b111ull) << 38u; + if ((csel2 & ~0b111ull) != 0) + throw std::runtime_error("Instruction field csel2 for SOP2 out of bounds."); + inst |= (csel2 & 0b111ull) << 35u; + if ((amod2 & ~0b1ull) != 0) + throw std::runtime_error("Instruction field amod2 for SOP2 out of bounds."); + inst |= (amod2 & 0b1ull) << 34u; + if ((dest_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field dest_bank for SOP2 out of bounds."); + inst |= (dest_bank & 0b11ull) << 32u; + if ((src1_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank for SOP2 out of bounds."); + inst |= (src1_bank & 0b11ull) << 30u; + if ((src2_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_bank for SOP2 out of bounds."); + inst |= (src2_bank & 0b11ull) << 28u; + if ((dest_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field dest_n for SOP2 out of bounds."); + inst |= (dest_n & 0b1111111ull) << 21u; + if ((src1_mod & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_mod for SOP2 out of bounds."); + inst |= (src1_mod & 0b1ull) << 20u; + if ((cop & ~0b11ull) != 0) + throw std::runtime_error("Instruction field cop for SOP2 out of bounds."); + inst |= (cop & 0b11ull) << 18u; + if ((aop & ~0b11ull) != 0) + throw std::runtime_error("Instruction field aop for SOP2 out of bounds."); + inst |= (aop & 0b11ull) << 16u; + if ((asrc1_mod & ~0b1ull) != 0) + throw std::runtime_error("Instruction field asrc1_mod for SOP2 out of bounds."); + inst |= (asrc1_mod & 0b1ull) << 15u; + if ((dest_mod & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_mod for SOP2 out of bounds."); + inst |= (dest_mod & 0b1ull) << 14u; + if ((src1_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for SOP2 out of bounds."); + inst |= (src1_n & 0b1111111ull) << 7u; + if ((src2_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src2_n for SOP2 out of bounds."); + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeBR( - Param/*3*/ pred, - Param/*1*/ syncend, - Param/*1*/ exception, - Param/*1*/ pwait, - Param/*1*/ sync_ext, - Param/*1*/ nosched, - Param/*1*/ br_monitor, - Param/*1*/ save_link, - Param/*1*/ br_type, - Param/*1*/ any_inst, - Param/*1*/ all_inst, - Param/*20*/ br_off) { - Instruction inst = 0; - inst |= 0b11111ull << 59u; - inst |= (pred & 0b111ull) << 56u; - inst |= (syncend & 0b1ull) << 55u; - inst |= 0b0ull << 54u; - inst |= 0b00ull << 52u; - inst |= (exception & 0b1ull) << 51u; - inst |= (pwait & 0b1ull) << 45u; - inst |= (sync_ext & 0b1ull) << 44u; - inst |= (nosched & 0b1ull) << 43u; - inst |= (br_monitor & 0b1ull) << 42u; - inst |= (save_link & 0b1ull) << 41u; - inst |= 0b00ull << 39u; - inst |= (br_type & 0b1ull) << 38u; - inst |= (any_inst & 0b1ull) << 21u; - inst |= (all_inst & 0b1ull) << 20u; - inst |= (br_off & 0b11111111111111111111ull) << 0u; - return inst; - } + Instruction makeBR( + Param/*3*/ pred, + Param/*1*/ syncend, + Param/*1*/ exception, + Param/*1*/ pwait, + Param/*1*/ sync_ext, + Param/*1*/ nosched, + Param/*1*/ br_monitor, + Param/*1*/ save_link, + Param/*1*/ br_type, + Param/*1*/ any_inst, + Param/*1*/ all_inst, + Param/*20*/ br_off) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + if ((pred & ~0b111ull) != 0) + throw std::runtime_error("Instruction field pred for BR out of bounds."); + inst |= (pred & 0b111ull) << 56u; + if ((syncend & ~0b1ull) != 0) + throw std::runtime_error("Instruction field syncend for BR out of bounds."); + inst |= (syncend & 0b1ull) << 55u; + inst |= 0b0ull << 54u; + inst |= 0b00ull << 52u; + if ((exception & ~0b1ull) != 0) + throw std::runtime_error("Instruction field exception for BR out of bounds."); + inst |= (exception & 0b1ull) << 51u; + if ((pwait & ~0b1ull) != 0) + throw std::runtime_error("Instruction field pwait for BR out of bounds."); + inst |= (pwait & 0b1ull) << 45u; + if ((sync_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field sync_ext for BR out of bounds."); + inst |= (sync_ext & 0b1ull) << 44u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for BR out of bounds."); + inst |= (nosched & 0b1ull) << 43u; + if ((br_monitor & ~0b1ull) != 0) + throw std::runtime_error("Instruction field br_monitor for BR out of bounds."); + inst |= (br_monitor & 0b1ull) << 42u; + if ((save_link & ~0b1ull) != 0) + throw std::runtime_error("Instruction field save_link for BR out of bounds."); + inst |= (save_link & 0b1ull) << 41u; + inst |= 0b00ull << 39u; + if ((br_type & ~0b1ull) != 0) + throw std::runtime_error("Instruction field br_type for BR out of bounds."); + inst |= (br_type & 0b1ull) << 38u; + if ((any_inst & ~0b1ull) != 0) + throw std::runtime_error("Instruction field any_inst for BR out of bounds."); + inst |= (any_inst & 0b1ull) << 21u; + if ((all_inst & ~0b1ull) != 0) + throw std::runtime_error("Instruction field all_inst for BR out of bounds."); + inst |= (all_inst & 0b1ull) << 20u; + if ((br_off & ~0b11111111111111111111ull) != 0) + throw std::runtime_error("Instruction field br_off for BR out of bounds."); + inst |= (br_off & 0b11111111111111111111ull) << 0u; + return inst; + } - Instruction makePHAS( - Param/*1*/ sprvv, - Param/*1*/ end, - Param/*1*/ imm, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*1*/ mode, - Param/*1*/ rate_hi, - Param/*1*/ rate_lo_or_nosched, - Param/*3*/ wait_cond, - Param/*8*/ temp_count, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*6*/ exe_addr_high, - Param/*7*/ src1_n_or_exe_addr_mid, - Param/*7*/ src2_n_or_exe_addr_low) { - Instruction inst = 0; - inst |= 0b11111ull << 59u; - inst |= 0b010ull << 56u; - inst |= (sprvv & 0b1ull) << 55u; - inst |= 0b100ull << 52u; - inst |= (end & 0b1ull) << 51u; - inst |= (imm & 0b1ull) << 50u; - inst |= (src1_bank_ext & 0b1ull) << 49u; - inst |= (src2_bank_ext & 0b1ull) << 48u; - inst |= (mode & 0b1ull) << 45u; - inst |= (rate_hi & 0b1ull) << 44u; - inst |= (rate_lo_or_nosched & 0b1ull) << 43u; - inst |= (wait_cond & 0b111ull) << 40u; - inst |= (temp_count & 0b11111111ull) << 32u; - inst |= (src1_bank & 0b11ull) << 30u; - inst |= (src2_bank & 0b11ull) << 28u; - inst |= (exe_addr_high & 0b111111ull) << 14u; - inst |= (src1_n_or_exe_addr_mid & 0b1111111ull) << 7u; - inst |= (src2_n_or_exe_addr_low & 0b1111111ull) << 0u; - return inst; - } + Instruction makePHAS( + Param/*1*/ sprvv, + Param/*1*/ end, + Param/*1*/ imm, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*1*/ mode, + Param/*1*/ rate_hi, + Param/*1*/ rate_lo_or_nosched, + Param/*3*/ wait_cond, + Param/*8*/ temp_count, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ exe_addr_high, + Param/*7*/ src1_n_or_exe_addr_mid, + Param/*7*/ src2_n_or_exe_addr_low) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= 0b010ull << 56u; + if ((sprvv & ~0b1ull) != 0) + throw std::runtime_error("Instruction field sprvv for PHAS out of bounds."); + inst |= (sprvv & 0b1ull) << 55u; + inst |= 0b100ull << 52u; + if ((end & ~0b1ull) != 0) + throw std::runtime_error("Instruction field end for PHAS out of bounds."); + inst |= (end & 0b1ull) << 51u; + if ((imm & ~0b1ull) != 0) + throw std::runtime_error("Instruction field imm for PHAS out of bounds."); + inst |= (imm & 0b1ull) << 50u; + if ((src1_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_bank_ext for PHAS out of bounds."); + inst |= (src1_bank_ext & 0b1ull) << 49u; + if ((src2_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_bank_ext for PHAS out of bounds."); + inst |= (src2_bank_ext & 0b1ull) << 48u; + if ((mode & ~0b1ull) != 0) + throw std::runtime_error("Instruction field mode for PHAS out of bounds."); + inst |= (mode & 0b1ull) << 45u; + if ((rate_hi & ~0b1ull) != 0) + throw std::runtime_error("Instruction field rate_hi for PHAS out of bounds."); + inst |= (rate_hi & 0b1ull) << 44u; + if ((rate_lo_or_nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field rate_lo_or_nosched for PHAS out of bounds."); + inst |= (rate_lo_or_nosched & 0b1ull) << 43u; + if ((wait_cond & ~0b111ull) != 0) + throw std::runtime_error("Instruction field wait_cond for PHAS out of bounds."); + inst |= (wait_cond & 0b111ull) << 40u; + if ((temp_count & ~0b11111111ull) != 0) + throw std::runtime_error("Instruction field temp_count for PHAS out of bounds."); + inst |= (temp_count & 0b11111111ull) << 32u; + if ((src1_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank for PHAS out of bounds."); + inst |= (src1_bank & 0b11ull) << 30u; + if ((src2_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_bank for PHAS out of bounds."); + inst |= (src2_bank & 0b11ull) << 28u; + if ((exe_addr_high & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field exe_addr_high for PHAS out of bounds."); + inst |= (exe_addr_high & 0b111111ull) << 14u; + if ((src1_n_or_exe_addr_mid & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src1_n_or_exe_addr_mid for PHAS out of bounds."); + inst |= (src1_n_or_exe_addr_mid & 0b1111111ull) << 7u; + if ((src2_n_or_exe_addr_low & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src2_n_or_exe_addr_low for PHAS out of bounds."); + inst |= (src2_n_or_exe_addr_low & 0b1111111ull) << 0u; + return inst; + } - Instruction makeNOP() { - Instruction inst = 0; - inst |= 0b11111ull << 59u; - inst |= 0b0ull << 54u; - inst |= 0b00ull << 52u; - inst |= 0b101ull << 38u; - return inst; - } + Instruction makeNOP() { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= 0b0ull << 54u; + inst |= 0b00ull << 52u; + inst |= 0b101ull << 38u; + return inst; + } - Instruction makeSMLSI( - Param/*1*/ nosched, - Param/*4*/ temp_limit, - Param/*4*/ pa_limit, - Param/*4*/ sa_limit, - Param/*1*/ dest_inc_mode, - Param/*1*/ src0_inc_mode, - Param/*1*/ src1_inc_mode, - Param/*1*/ src2_inc_mode, - Param/*8*/ dest_inc, - Param/*8*/ src0_inc, - Param/*8*/ src1_inc, - Param/*8*/ src2_inc) { - Instruction inst = 0; - inst |= 0b11111ull << 59u; - inst |= 0b010ull << 56u; - inst |= 0b01ull << 52u; - inst |= (nosched & 0b1ull) << 50u; - inst |= (temp_limit & 0b1111ull) << 44u; - inst |= (pa_limit & 0b1111ull) << 40u; - inst |= (sa_limit & 0b1111ull) << 36u; - inst |= (dest_inc_mode & 0b1ull) << 35u; - inst |= (src0_inc_mode & 0b1ull) << 34u; - inst |= (src1_inc_mode & 0b1ull) << 33u; - inst |= (src2_inc_mode & 0b1ull) << 32u; - inst |= (dest_inc & 0b11111111ull) << 24u; - inst |= (src0_inc & 0b11111111ull) << 16u; - inst |= (src1_inc & 0b11111111ull) << 8u; - inst |= (src2_inc & 0b11111111ull) << 0u; - return inst; - } + Instruction makeSMLSI( + Param/*1*/ nosched, + Param/*4*/ temp_limit, + Param/*4*/ pa_limit, + Param/*4*/ sa_limit, + Param/*1*/ dest_inc_mode, + Param/*1*/ src0_inc_mode, + Param/*1*/ src1_inc_mode, + Param/*1*/ src2_inc_mode, + Param/*8*/ dest_inc, + Param/*8*/ src0_inc, + Param/*8*/ src1_inc, + Param/*8*/ src2_inc) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= 0b010ull << 56u; + inst |= 0b01ull << 52u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for SMLSI out of bounds."); + inst |= (nosched & 0b1ull) << 50u; + if ((temp_limit & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field temp_limit for SMLSI out of bounds."); + inst |= (temp_limit & 0b1111ull) << 44u; + if ((pa_limit & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field pa_limit for SMLSI out of bounds."); + inst |= (pa_limit & 0b1111ull) << 40u; + if ((sa_limit & ~0b1111ull) != 0) + throw std::runtime_error("Instruction field sa_limit for SMLSI out of bounds."); + inst |= (sa_limit & 0b1111ull) << 36u; + if ((dest_inc_mode & ~0b1ull) != 0) + throw std::runtime_error("Instruction field dest_inc_mode for SMLSI out of bounds."); + inst |= (dest_inc_mode & 0b1ull) << 35u; + if ((src0_inc_mode & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src0_inc_mode for SMLSI out of bounds."); + inst |= (src0_inc_mode & 0b1ull) << 34u; + if ((src1_inc_mode & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_inc_mode for SMLSI out of bounds."); + inst |= (src1_inc_mode & 0b1ull) << 33u; + if ((src2_inc_mode & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_inc_mode for SMLSI out of bounds."); + inst |= (src2_inc_mode & 0b1ull) << 32u; + if ((dest_inc & ~0b11111111ull) != 0) + throw std::runtime_error("Instruction field dest_inc for SMLSI out of bounds."); + inst |= (dest_inc & 0b11111111ull) << 24u; + if ((src0_inc & ~0b11111111ull) != 0) + throw std::runtime_error("Instruction field src0_inc for SMLSI out of bounds."); + inst |= (src0_inc & 0b11111111ull) << 16u; + if ((src1_inc & ~0b11111111ull) != 0) + throw std::runtime_error("Instruction field src1_inc for SMLSI out of bounds."); + inst |= (src1_inc & 0b11111111ull) << 8u; + if ((src2_inc & ~0b11111111ull) != 0) + throw std::runtime_error("Instruction field src2_inc for SMLSI out of bounds."); + inst |= (src2_inc & 0b11111111ull) << 0u; + return inst; + } - Instruction makeEMIT( - Param/*2*/ sideband_high, - Param/*1*/ src0_bank_ext, - Param/*1*/ end, - Param/*1*/ src1_bank_ext, - Param/*1*/ src2_bank_ext, - Param/*2*/ target, - Param/*1*/ task_start_or_mte_hi, - Param/*1*/ task_end_or_mte_lo, - Param/*1*/ nosched, - Param/*6*/ sideband_mid, - Param/*1*/ src0_bank, - Param/*2*/ incp, - Param/*2*/ src1_bank, - Param/*2*/ src2_bank, - Param/*6*/ sideband_low, - Param/*1*/ freep, - Param/*7*/ src0_n, - Param/*7*/ src1_n, - Param/*7*/ src2_n) { - Instruction inst = 0; - inst |= 0b11111ull << 59u; - inst |= 0b011ull << 56u; - inst |= (sideband_high & 0b11ull) << 54u; - inst |= 0b10ull << 52u; - inst |= (src0_bank_ext & 0b1ull) << 51u; - inst |= (end & 0b1ull) << 50u; - inst |= (src1_bank_ext & 0b1ull) << 49u; - inst |= (src2_bank_ext & 0b1ull) << 48u; - inst |= (target & 0b11ull) << 46u; - inst |= (task_start_or_mte_hi & 0b1ull) << 45u; - inst |= (task_end_or_mte_lo & 0b1ull) << 44u; - inst |= (nosched & 0b1ull) << 43u; - inst |= (sideband_mid & 0b111111ull) << 35u; - inst |= (src0_bank & 0b1ull) << 34u; - inst |= (incp & 0b11ull) << 32u; - inst |= (src1_bank & 0b11ull) << 30u; - inst |= (src2_bank & 0b11ull) << 28u; - inst |= (sideband_low & 0b111111ull) << 22u; - inst |= (freep & 0b1ull) << 21u; - inst |= (src0_n & 0b1111111ull) << 14u; - inst |= (src1_n & 0b1111111ull) << 7u; - inst |= (src2_n & 0b1111111ull) << 0u; - return inst; - } + Instruction makeEMIT( + Param/*2*/ sideband_high, + Param/*1*/ src0_bank_ext, + Param/*1*/ end, + Param/*1*/ src1_bank_ext, + Param/*1*/ src2_bank_ext, + Param/*2*/ target, + Param/*1*/ task_start_or_mte_hi, + Param/*1*/ task_end_or_mte_lo, + Param/*1*/ nosched, + Param/*6*/ sideband_mid, + Param/*1*/ src0_bank, + Param/*2*/ incp, + Param/*2*/ src1_bank, + Param/*2*/ src2_bank, + Param/*6*/ sideband_low, + Param/*1*/ freep, + Param/*7*/ src0_n, + Param/*7*/ src1_n, + Param/*7*/ src2_n) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + inst |= 0b011ull << 56u; + if ((sideband_high & ~0b11ull) != 0) + throw std::runtime_error("Instruction field sideband_high for EMIT out of bounds."); + inst |= (sideband_high & 0b11ull) << 54u; + inst |= 0b10ull << 52u; + if ((src0_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src0_bank_ext for EMIT out of bounds."); + inst |= (src0_bank_ext & 0b1ull) << 51u; + if ((end & ~0b1ull) != 0) + throw std::runtime_error("Instruction field end for EMIT out of bounds."); + inst |= (end & 0b1ull) << 50u; + if ((src1_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src1_bank_ext for EMIT out of bounds."); + inst |= (src1_bank_ext & 0b1ull) << 49u; + if ((src2_bank_ext & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src2_bank_ext for EMIT out of bounds."); + inst |= (src2_bank_ext & 0b1ull) << 48u; + if ((target & ~0b11ull) != 0) + throw std::runtime_error("Instruction field target for EMIT out of bounds."); + inst |= (target & 0b11ull) << 46u; + if ((task_start_or_mte_hi & ~0b1ull) != 0) + throw std::runtime_error("Instruction field task_start_or_mte_hi for EMIT out of bounds."); + inst |= (task_start_or_mte_hi & 0b1ull) << 45u; + if ((task_end_or_mte_lo & ~0b1ull) != 0) + throw std::runtime_error("Instruction field task_end_or_mte_lo for EMIT out of bounds."); + inst |= (task_end_or_mte_lo & 0b1ull) << 44u; + if ((nosched & ~0b1ull) != 0) + throw std::runtime_error("Instruction field nosched for EMIT out of bounds."); + inst |= (nosched & 0b1ull) << 43u; + if ((sideband_mid & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field sideband_mid for EMIT out of bounds."); + inst |= (sideband_mid & 0b111111ull) << 35u; + if ((src0_bank & ~0b1ull) != 0) + throw std::runtime_error("Instruction field src0_bank for EMIT out of bounds."); + inst |= (src0_bank & 0b1ull) << 34u; + if ((incp & ~0b11ull) != 0) + throw std::runtime_error("Instruction field incp for EMIT out of bounds."); + inst |= (incp & 0b11ull) << 32u; + if ((src1_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src1_bank for EMIT out of bounds."); + inst |= (src1_bank & 0b11ull) << 30u; + if ((src2_bank & ~0b11ull) != 0) + throw std::runtime_error("Instruction field src2_bank for EMIT out of bounds."); + inst |= (src2_bank & 0b11ull) << 28u; + if ((sideband_low & ~0b111111ull) != 0) + throw std::runtime_error("Instruction field sideband_low for EMIT out of bounds."); + inst |= (sideband_low & 0b111111ull) << 22u; + if ((freep & ~0b1ull) != 0) + throw std::runtime_error("Instruction field freep for EMIT out of bounds."); + inst |= (freep & 0b1ull) << 21u; + if ((src0_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src0_n for EMIT out of bounds."); + inst |= (src0_n & 0b1111111ull) << 14u; + if ((src1_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src1_n for EMIT out of bounds."); + inst |= (src1_n & 0b1111111ull) << 7u; + if ((src2_n & ~0b1111111ull) != 0) + throw std::runtime_error("Instruction field src2_n for EMIT out of bounds."); + inst |= (src2_n & 0b1111111ull) << 0u; + return inst; + } - Instruction makeSPEC( - Param/*1*/ special, - Param/*2*/ category) { - Instruction inst = 0; - inst |= 0b11111ull << 59u; - inst |= (special & 0b1ull) << 54u; - inst |= (category & 0b11ull) << 52u; - return inst; - } + Instruction makeSPEC( + Param/*1*/ special, + Param/*2*/ category) { + Instruction inst = 0; + inst |= 0b11111ull << 59u; + if ((special & ~0b1ull) != 0) + throw std::runtime_error("Instruction field special for SPEC out of bounds."); + inst |= (special & 0b1ull) << 54u; + if ((category & ~0b11ull) != 0) + throw std::runtime_error("Instruction field category for SPEC out of bounds."); + inst |= (category & 0b11ull) << 52u; + return inst; + } } From 7ae5df0859d3f323b3f7b10d77df04f26e29dd22 Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Mon, 4 Nov 2019 09:30:20 -0500 Subject: [PATCH 15/19] Improve swizzle accuracy --- src/gxp/include/gxp/usse.h | 3 +- src/gxp/src/block.cpp | 83 ++++++++++++++++++++++++-------------- src/gxp/src/usse.cpp | 29 ++++++++----- 3 files changed, 75 insertions(+), 40 deletions(-) diff --git a/src/gxp/include/gxp/usse.h b/src/gxp/include/gxp/usse.h index 4f62400..0fa2820 100644 --- a/src/gxp/include/gxp/usse.h +++ b/src/gxp/include/gxp/usse.h @@ -112,12 +112,13 @@ namespace usse { usse::SwizzleVec4 swizzle = usse::getSwizzleVec4All(usse::SwizzleChannel::DontCare); uint32_t getSwizzleMask(); - int32_t getSwizzleIndex(bool extended = false); + int32_t getSwizzleIndex(bool extended = false, int32_t components = -1); uint32_t getEffectiveIndex(); RegisterReference getHalf(uint32_t half); RegisterReference getComponents(uint32_t component, uint32_t count); RegisterReference getElement(uint32_t element); RegisterReference getExpanded(uint32_t count); + RegisterReference getAligned(uint8_t writeMask); RegisterReference() = default; RegisterReference(DataType type, RegisterBank bank, uint32_t regIndex); diff --git a/src/gxp/src/block.cpp b/src/gxp/src/block.cpp index d00c6ff..d5fe36c 100644 --- a/src/gxp/src/block.cpp +++ b/src/gxp/src/block.cpp @@ -79,6 +79,8 @@ namespace gxp { 2, // Output8 }; + usse::RegisterReference alignedSource = source.getAligned(destination.getSwizzleMask()); + printDisassembly("pck", { source }, &destination); instructions.push_back(usse::makeVPCK( 0, // pred @@ -98,14 +100,14 @@ namespace gxp { srcBankLayout.number, // src1_bank_sel srcBankLayout.number, // src2_bank_sel destBankLayout.getIndex(destination, false, 7), // dest_n - destination.type.components > 3 ? static_cast(destination.swizzle[3]) & 0b11u : 0, // comp_sel_3 + static_cast(alignedSource.swizzle[3]) & 0b11u, // comp_sel_3 false, // scale - static_cast(destination.swizzle[1]) & 0b11u, // comp_sel_1 - static_cast(destination.swizzle[2]) & 0b11u, // comp_sel_2 + static_cast(alignedSource.swizzle[1]) & 0b11u, // comp_sel_1 + static_cast(alignedSource.swizzle[2]) & 0b11u, // comp_sel_2 srcBankLayout.getIndex(source.getHalf(0)), // src1_n - static_cast(destination.swizzle[0]) & 0b10u >> 1u, // comp0_sel_bit1 + (static_cast(alignedSource.swizzle[0]) & 0b10u) >> 1u, // comp0_sel_bit1 srcBankLayout.getIndex(source.getHalf(1)), // src2_n - static_cast(destination.swizzle[0]) & 0b01u // comp_sel_0_bit0 + static_cast(alignedSource.swizzle[0]) & 0b01u // comp_sel_0_bit0 )); } @@ -146,7 +148,7 @@ namespace gxp { static_cast(first.swizzle[1]), // src1_swiz_y static_cast(first.swizzle[0]), // src1_swiz_x firstBankLayout.getIndex(first) // src1_n - )); + )); } void Block::createAdd( @@ -181,7 +183,7 @@ namespace gxp { (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 firstBankLayout.extension, // src1_bank_ext secondBankLayout.extension, // src2_bank_ext - second.getSwizzleIndex(), // src2_swiz + second.getAligned(destination.getSwizzleMask()).getSwizzleIndex(false, 4), // src2_swiz 0, // nosched destination.getSwizzleMask(), // dest_mask 0b00, // src1_mod @@ -207,16 +209,16 @@ namespace gxp { usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); uint32_t swizzleIndex = 0; - uint32_t firstSwizzle = 0; + uint32_t secondSwizzle = 0; uint8_t destMask = destination.getSwizzleMask(); for (uint32_t a = 0; a < 4; a++) { if (destMask & (1u << a)) { - usse::SwizzleChannel channel = first.swizzle[swizzleIndex++]; + usse::SwizzleChannel channel = second.swizzle[swizzleIndex++]; // Swizzle does not have appropriate value for writing to destination. assert(channel != usse::SwizzleChannel::DontCare); - firstSwizzle |= static_cast(channel) << (a * 3); + secondSwizzle |= static_cast(channel) << (a * 3); } } @@ -225,23 +227,23 @@ namespace gxp { instructions.push_back(usse::makeVNMAD32( 0, // pred 0, // skipinv - (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 + (secondSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 0, // syncstart destBankLayout.extension, // dest_bank_ext - (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 + (secondSwizzle >> 9u) & 0b1u, // src1_swiz_9 secondBankLayout.extension, // src1_bank_ext firstBankLayout.extension, // src2_bank_ext - second.getSwizzleIndex(), // src2_swiz + first.getAligned(destination.getSwizzleMask()).getSwizzleIndex(false, 4), // src2_swiz 0, // nosched destination.getSwizzleMask(), // dest_mask 0b01, // src1_mod 0b0, // src2_mod - (firstSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 + (secondSwizzle >> 7u) & 0b11u, // src1_swiz_7_8 destBankLayout.number, // dest_bank_sel secondBankLayout.number, // src1_bank_sel firstBankLayout.number, // src2_bank_sel destBankLayout.getIndex(destination), // dest_n - (firstSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 + (secondSwizzle >> 0u) & 0b1111111u, // src1_swiz_0_6 static_cast(usse::InstructionVNMADOp::Add), // op2 secondBankLayout.getIndex(second), // src1_n firstBankLayout.getIndex(first) // src2_n @@ -256,11 +258,18 @@ namespace gxp { usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - uint32_t shift = 0; + uint32_t swizzleIndex = 0; uint32_t firstSwizzle = 0; - for (usse::SwizzleChannel channel : first.swizzle) { - firstSwizzle |= static_cast(channel) << shift; - shift += 3; + uint8_t destMask = destination.getSwizzleMask(); + for (uint32_t a = 0; a < 4; a++) { + if (destMask & (1u << a)) { + usse::SwizzleChannel channel = first.swizzle[swizzleIndex++]; + + // Swizzle does not have appropriate value for writing to destination. + assert(channel != usse::SwizzleChannel::DontCare); + + firstSwizzle |= static_cast(channel) << (a * 3); + } } printDisassembly("mul", { first, second }, &destination); @@ -273,7 +282,7 @@ namespace gxp { (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 firstBankLayout.extension, // src1_bank_ext secondBankLayout.extension, // src2_bank_ext - second.getSwizzleIndex(), // src2_swiz + second.getAligned(destination.getSwizzleMask()).getSwizzleIndex(false, 4), // src2_swiz 0, // nosched destination.getSwizzleMask(), // dest_mask 0b00, // src1_mod @@ -424,11 +433,18 @@ namespace gxp { usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - uint32_t shift = 0; + uint32_t swizzleIndex = 0; uint32_t firstSwizzle = 0; - for (usse::SwizzleChannel channel : first.swizzle) { - firstSwizzle |= static_cast(channel) << shift; - shift += 3; + uint8_t destMask = destination.getSwizzleMask(); + for (uint32_t a = 0; a < 4; a++) { + if (destMask & (1u << a)) { + usse::SwizzleChannel channel = first.swizzle[swizzleIndex++]; + + // Swizzle does not have appropriate value for writing to destination. + assert(channel != usse::SwizzleChannel::DontCare); + + firstSwizzle |= static_cast(channel) << (a * 3); + } } printDisassembly("min", { first, second }, &destination); @@ -441,7 +457,7 @@ namespace gxp { (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 firstBankLayout.extension, // src1_bank_ext secondBankLayout.extension, // src2_bank_ext - second.getSwizzleIndex(), // src2_swiz + second.getAligned(destination.getSwizzleMask()).getSwizzleIndex(false, 4), // src2_swiz 0, // nosched destination.getSwizzleMask(), // dest_mask 0b00, // src1_mod @@ -466,11 +482,18 @@ namespace gxp { usse::BankLayout secondBankLayout = usse::BankLayout::srcLayout(second.bank); usse::BankLayout destBankLayout = usse::BankLayout::destLayout(destination.bank); - uint32_t shift = 0; + uint32_t swizzleIndex = 0; uint32_t firstSwizzle = 0; - for (usse::SwizzleChannel channel : first.swizzle) { - firstSwizzle |= static_cast(channel) << shift; - shift += 3; + uint8_t destMask = destination.getSwizzleMask(); + for (uint32_t a = 0; a < 4; a++) { + if (destMask & (1u << a)) { + usse::SwizzleChannel channel = first.swizzle[swizzleIndex++]; + + // Swizzle does not have appropriate value for writing to destination. + assert(channel != usse::SwizzleChannel::DontCare); + + firstSwizzle |= static_cast(channel) << (a * 3); + } } printDisassembly("max", { first, second }, &destination); @@ -483,7 +506,7 @@ namespace gxp { (firstSwizzle >> 9u) & 0b1u, // src1_swiz_9 firstBankLayout.extension, // src1_bank_ext secondBankLayout.extension, // src2_bank_ext - second.getSwizzleIndex(), // src2_swiz + second.getAligned(destination.getSwizzleMask()).getSwizzleIndex(false, 4), // src2_swiz 0, // nosched destination.getSwizzleMask(), // dest_mask 0b00, // src1_mod diff --git a/src/gxp/src/usse.cpp b/src/gxp/src/usse.cpp index 9d3c42c..b07dcc9 100644 --- a/src/gxp/src/usse.cpp +++ b/src/gxp/src/usse.cpp @@ -243,8 +243,8 @@ namespace usse { return mask; } - int32_t RegisterReference::getSwizzleIndex(bool extended) { - switch (type.components) { + int32_t RegisterReference::getSwizzleIndex(bool extended, int32_t components) { + switch (components == -1 ? type.components : components) { case 4: { usse::SwizzleVec4 vec; std::copy(swizzle.begin(), swizzle.end(), vec.begin()); @@ -252,7 +252,8 @@ namespace usse { } case 3: { usse::SwizzleVec3 vec; - std::copy(swizzle.begin(), swizzle.end(), vec.begin()); + bool firstIsBlank = swizzle[0] == usse::SwizzleChannel::DontCare; + std::copy(swizzle.begin() + firstIsBlank, swizzle.end() - !firstIsBlank, vec.begin()); return usse::getSwizzleVec3Index(vec, extended); } case 1: @@ -299,13 +300,8 @@ namespace usse { RegisterReference RegisterReference::getElement(uint32_t element) { if (element >= type.arraySize) throw std::runtime_error("Register reference array out of bounds."); - RegisterReference reg = *this; - - reg.type.arraySize = 1; - reg.size = size / type.arraySize; - reg.index += reg.size * element; - return reg; + return RegisterReference({ type.type, type.components, 1 }, bank, index + size / type.arraySize * element); } RegisterReference RegisterReference::getExpanded(uint32_t count) { @@ -318,6 +314,21 @@ namespace usse { return reg; } + RegisterReference RegisterReference::getAligned(uint8_t writeMask) { + RegisterReference reg = *this; + + reg.swizzle = usse::getSwizzleVec4All(usse::SwizzleChannel::DontCare); + + uint32_t swizzleIndex = 0; + for (uint32_t a = 0; a < 4; a++) { + if (writeMask & (1u << a)) { + reg.swizzle[a] = swizzle[swizzleIndex++]; + } + } + + return reg; + } + RegisterReference::RegisterReference(DataType type, RegisterBank bank, uint32_t regIndex) : type(type), bank(bank), size(getTypeSize(type.type) * type.components * type.arraySize / 4) { bool swizzleUp = false; From 8ee642d2bd1834c67c7d8f5eeacece2fce0f9f22 Mon Sep 17 00:00:00 2001 From: Taylor Whatley <1whatleytay@hdsb.ca> Date: Wed, 6 Nov 2019 08:43:34 -0500 Subject: [PATCH 16/19] Literals support --- src/gxp/include/gxp/builder.h | 2 + src/gxp/include/gxp/gxp.h | 24 +++++++++++- src/gxp/src/builder.cpp | 72 ++++++++++++++++++++++++++--------- src/gxp/src/gxp.cpp | 4 +- src/translator/CMakeLists.txt | 1 - src/translator/src/codes.cpp | 23 ++++++++--- src/util/include/util/util.h | 4 +- 7 files changed, 103 insertions(+), 27 deletions(-) diff --git a/src/gxp/include/gxp/builder.h b/src/gxp/include/gxp/builder.h index d6cee19..7a28dc2 100644 --- a/src/gxp/include/gxp/builder.h +++ b/src/gxp/include/gxp/builder.h @@ -55,6 +55,7 @@ namespace gxp { std::vector> primaryBlocks; std::vector> secondaryBlocks; std::vector parameters; + std::vector literals; std::vector fragmentInputs; friend class Block; @@ -69,6 +70,7 @@ namespace gxp { void freeRegister(usse::RegisterReference reg); usse::RegisterReference registerParameter(const Parameter ¶meter); + usse::RegisterReference registerLiteral(const std::vector &literal); std::map registerVertexVaryings( const std::vector &outputs, const std::vector &texCoords); diff --git a/src/gxp/include/gxp/gxp.h b/src/gxp/include/gxp/gxp.h index c85c4b9..27da53f 100644 --- a/src/gxp/include/gxp/gxp.h +++ b/src/gxp/include/gxp/gxp.h @@ -128,6 +128,28 @@ namespace gxp { uint32_t getVertexVaryingBits(ProgramVarying varying); uint32_t getFragmentVaryingBits(ProgramVarying varying); + enum class ContainerIndex : uint16_t { + Buffer0, + Buffer1, + Buffer2, + Buffer3, + Buffer4, + Buffer5, + Buffer6, + Buffer7, + Buffer8, + Buffer9, + Buffer10, + Buffer12, + Buffer13, + Default, + Texture, + Literal, + Scratch, + Thread, + Data, + }; + class ProgramVectorInfo { public: ProgramVarying varying; @@ -239,5 +261,5 @@ namespace gxp { }; uint16_t createParameterConfig(ParameterCategory category, ParameterType type, - uint32_t components, uint32_t containerIndex); + uint32_t components, ContainerIndex containerIndex); } diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp index f3c5e4f..be847d5 100644 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -11,8 +11,6 @@ #define OFFSET_OF(parent, child) (reinterpret_cast(&parent.child) - reinterpret_cast(&parent)) namespace gxp { - constexpr uint16_t containerIndexSA = 14; - void Builder::setType(gxp::ShaderType type) { header.type = static_cast(type); } @@ -122,6 +120,18 @@ namespace gxp { return reg; } + usse::RegisterReference Builder::registerLiteral(const std::vector &literal) { + assert(!literal.empty() && literal.size() <= 4); + + usse::RegisterReference reg({ usse::Type::Float32, static_cast(literal.size()), 1 }, + usse::RegisterBank::Secondary, literals.size()); + + for (float a : literal) + literals.push_back(a); + + return reg; + } + std::map Builder::registerVertexVaryings( const std::vector &outputs, const std::vector &texCoords) { varyings.varyings_count = outputs.size() + texCoords.size(); @@ -249,20 +259,47 @@ namespace gxp { stringDB.push_back(entry); } + std::vector containers; + + // Default Container + containers.push_back({ + static_cast(ContainerIndex::Default), // Container Index + 0, // ?? + 0, // Register Index + static_cast(saRegPointer) // Register Count + }); + + // Literals + if (!literals.empty()) { + containers.push_back({ + static_cast(ContainerIndex::Literal), + 0, // ?? + static_cast(saRegPointer), // Register Index + static_cast(literals.size()) + }); + + header.literalsCount = literals.size(); + header.literalsOffset = data.size() - OFFSET_OF(header, literalsOffset); + for (uint32_t a = 0; a < literals.size(); a++) { + float literal = literals[a]; + + data.insert(data.end(), + reinterpret_cast(&a), + reinterpret_cast(&a) + sizeof(uint32_t)); + data.insert(data.end(), + reinterpret_cast(&literal), + reinterpret_cast(&literal) + sizeof(float)); + } + } + // Containers - header.containerCount = 1; + header.containerCount = containers.size(); header.containerOffset = data.size() - OFFSET_OF(header, containerOffset); { - ProgramContainerInfo info = { - containerIndexSA, // Container Index - 0, // ?? - 0, // Register Index - static_cast(saRegPointer) // Register Count - }; data.insert(data.end(), - reinterpret_cast(&info), - reinterpret_cast(&info) - + sizeof(ProgramContainerInfo)); + reinterpret_cast(containers.data()), + reinterpret_cast(containers.data()) + + containers.size() * sizeof(ProgramContainerInfo)); } // Parameters @@ -274,7 +311,7 @@ namespace gxp { parameter.arraySize = param.type.arraySize; parameter.semantic = static_cast(param.semantic); parameter.config = createParameterConfig(param.category, getParameterTypeFromUSSEType(param.type.type), - param.type.components, containerIndexSA); + param.type.components, ContainerIndex::Default); auto stringEntry = std::find_if(stringDB.begin(), stringDB.end(), [param](const StringEntry &entry) { return entry.text == param.name; @@ -284,9 +321,10 @@ namespace gxp { parameter.nameOffset = stringEntry->index - (data.size() + OFFSET_OF(parameter, nameOffset)); - std::vector paramData(sizeof(parameter)); - std::memcpy(paramData.data(), ¶meter, sizeof(parameter)); - data.insert(data.end(), paramData.begin(), paramData.end()); + data.insert(data.end(), + reinterpret_cast(¶meter), + reinterpret_cast(¶meter) + + sizeof(parameter)); } // Varyings @@ -302,7 +340,7 @@ namespace gxp { // Code header.primaryRegCount = paRegPointer; - header.secondaryRegCount = saRegPointer; + header.secondaryRegCount = saRegPointer + literals.size(); header.tempRegCount1 = tMaxRegs; header.tempRegCount2 = tMaxRegs; // Difference between both reg counts? { diff --git a/src/gxp/src/gxp.cpp b/src/gxp/src/gxp.cpp index 9a2df74..f98f681 100644 --- a/src/gxp/src/gxp.cpp +++ b/src/gxp/src/gxp.cpp @@ -141,9 +141,9 @@ namespace gxp { } uint16_t createParameterConfig(ParameterCategory category, ParameterType type, - uint32_t components, uint32_t containerIndex) { + uint32_t components, ContainerIndex containerIndex) { uint16_t config = 0; - config |= (containerIndex & 0b1111u) << 12u; + config |= (static_cast(containerIndex) & 0b1111u) << 12u; config |= (components & 0b1111u) << 8u; config |= (static_cast(type) & 0b1111u) << 4u; config |= static_cast(category) & 0b1111u; diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index dfdb289..d0a0a94 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -1,5 +1,4 @@ add_library(translator - include/translator/config.h include/translator/translator.h src/codes.cpp diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp index b4ed9e5..e134fcb 100644 --- a/src/translator/src/codes.cpp +++ b/src/translator/src/codes.cpp @@ -26,6 +26,8 @@ void CompilerGXP::useRegister(spv::Id id) { } usse::RegisterReference CompilerGXP::getRegister(spv::Id id) { + id = resolveAlias(id); + auto varying = idVaryings.find(id); if (varying != idVaryings.end()) return getOrThrow(varyingReferences, getOrThrow(idVaryings, id)); @@ -40,13 +42,24 @@ usse::RegisterReference CompilerGXP::getRegister(spv::Id id) { if (constant) { // FP Constant only auto type = get_type(constant->constant_type); - assert(type.vecsize <= 1 - && type.columns <= 1 + assert(type.columns <= 1 && type.basetype == SPIRType::Float); - int32_t regIndex = usse::getFPConstantIndex(constant->m.c[0].r[0].f32); - if (regIndex != -1) { - return usse::RegisterReference({ usse::Type::Float32, 1, 1 }, usse::RegisterBank::FloatConstant, regIndex); + + if (type.vecsize == 1) { + int32_t regIndex = usse::getFPConstantIndex(constant->m.c[0].r[0].f32); + if (regIndex != -1) { + return usse::RegisterReference({usse::Type::Float32, 1, 1}, usse::RegisterBank::FloatConstant, + regIndex); + } } + + std::vector literal(type.vecsize); + for (uint32_t a = 0; a < type.vecsize; a++) + literal[a] = constant->m.c[0].r[a].f32; + + usse::RegisterReference literalReg = builder.registerLiteral(literal); + writeRegister(id, { literalReg }); + return literalReg; } throw std::runtime_error(fmt::format("Cannot find register, varying or constant with id {}.", id)); diff --git a/src/util/include/util/util.h b/src/util/include/util/util.h index 6d526b8..5b41b36 100644 --- a/src/util/include/util/util.h +++ b/src/util/include/util/util.h @@ -12,7 +12,9 @@ std::vector loadFileData(const std::string &path) { throw std::runtime_error(fmt::format("Cannot load file from '{}'.", path)); size_t size = stream.tellg(); - assert(size % sizeof(T) == 0); + if (size % sizeof(T) != 0) + throw std::runtime_error(fmt::format("Cannot load file from '{}' due to invalid type.", path)); + std::vector data(size / sizeof(T)); stream.seekg(0, std::ios::beg); stream.read(reinterpret_cast(data.data()), size); From 85ccbfb62c456e1f5daa58b83ff84cd2e0d20294 Mon Sep 17 00:00:00 2001 From: 1whatleytay <1whatleytay@hdsb.ca> Date: Thu, 5 Dec 2019 14:08:28 -0500 Subject: [PATCH 17/19] Stability improvements --- src/gxp/include/gxp/block.h | 5 +- src/gxp/include/gxp/builder.h | 2 +- src/gxp/include/gxp/gxp.h | 17 +- src/gxp/include/gxp/usse.h | 1 + src/gxp/src/block.cpp | 53 +++- src/gxp/src/builder.cpp | 278 ++++++++++-------- src/gxp/src/gxp.cpp | 21 +- src/gxp/src/usse.cpp | 150 +++++----- src/interface/src/interface.cpp | 10 +- .../include/translator/translator.h | 8 +- src/translator/src/codes.cpp | 194 ++++++------ 11 files changed, 410 insertions(+), 329 deletions(-) mode change 100644 => 100755 src/gxp/include/gxp/block.h mode change 100644 => 100755 src/gxp/include/gxp/builder.h mode change 100644 => 100755 src/gxp/include/gxp/gxp.h mode change 100644 => 100755 src/gxp/src/block.cpp mode change 100644 => 100755 src/gxp/src/builder.cpp mode change 100644 => 100755 src/gxp/src/gxp.cpp mode change 100644 => 100755 src/gxp/src/usse.cpp mode change 100644 => 100755 src/interface/src/interface.cpp mode change 100644 => 100755 src/translator/include/translator/translator.h mode change 100644 => 100755 src/translator/src/codes.cpp diff --git a/src/gxp/include/gxp/block.h b/src/gxp/include/gxp/block.h old mode 100644 new mode 100755 index 7c108bc..28573eb --- a/src/gxp/include/gxp/block.h +++ b/src/gxp/include/gxp/block.h @@ -20,7 +20,10 @@ namespace gxp { public: Builder &parent; - void createNop(); + void moveData( + usse::RegisterReference source, + usse::RegisterReference destination); + void createMov( usse::RegisterReference source, usse::RegisterReference destination); diff --git a/src/gxp/include/gxp/builder.h b/src/gxp/include/gxp/builder.h old mode 100644 new mode 100755 index 7a28dc2..df23cff --- a/src/gxp/include/gxp/builder.h +++ b/src/gxp/include/gxp/builder.h @@ -49,7 +49,7 @@ namespace gxp { uint32_t oRegPointer = 0; uint32_t iRegPointer = 0; - std::array tRegSpace; + std::array tRegSpace = { }; uint32_t tMaxRegs = 0; std::vector> primaryBlocks; diff --git a/src/gxp/include/gxp/gxp.h b/src/gxp/include/gxp/gxp.h old mode 100644 new mode 100755 index 27da53f..7f97e49 --- a/src/gxp/include/gxp/gxp.h +++ b/src/gxp/include/gxp/gxp.h @@ -30,7 +30,6 @@ namespace gxp { }; ParameterType getParameterTypeFromUSSEType(usse::Type type); - uint32_t getParameterTypeSize(ParameterType type); enum class ParameterSemantic : uint16_t { None, @@ -59,10 +58,11 @@ namespace gxp { Clip5 = 0x0020, Clip6 = 0x0040, Clip7 = 0x0080, - PointSize = 0x100, + PointSize = 0x0100, Fog = 0x0200, Color1 = 0x0400, Color0 = 0x0800, + Position = 0x1000, }; enum class ProgramTexCoordVertexMasks : uint32_t { @@ -140,6 +140,7 @@ namespace gxp { Buffer8, Buffer9, Buffer10, + Buffer11, Buffer12, Buffer13, Default, @@ -184,13 +185,13 @@ namespace gxp { class ProgramVaryings { public: std::uint8_t unk0[10] = { }; - std::uint8_t output_param_type = 0; - std::uint8_t output_comp_count = 0; + std::uint8_t outputParamType = 0; + std::uint8_t outputCompCount = 0; - std::uint16_t varyings_count = 0; + std::uint16_t varyingsCount = 0; std::uint16_t pad0 = 0; - std::uint32_t vertex_outputs1 = 0; - std::uint32_t vertex_outputs2 = 0; + std::uint32_t vertexOutputs1 = 0; + std::uint32_t vertexOutputs2 = 0; }; // Vita3K's SceGxmProgram structure @@ -258,6 +259,8 @@ namespace gxp { uint32_t unk8C = 0; uint32_t containerCount = 0; uint32_t containerOffset = 0; + + uint32_t padding = 0; }; uint16_t createParameterConfig(ParameterCategory category, ParameterType type, diff --git a/src/gxp/include/gxp/usse.h b/src/gxp/include/gxp/usse.h index 0fa2820..20ba3d9 100644 --- a/src/gxp/include/gxp/usse.h +++ b/src/gxp/include/gxp/usse.h @@ -119,6 +119,7 @@ namespace usse { RegisterReference getElement(uint32_t element); RegisterReference getExpanded(uint32_t count); RegisterReference getAligned(uint8_t writeMask); + RegisterReference getWithSwizzle(usse::SwizzleVec4 newSwizzle); RegisterReference() = default; RegisterReference(DataType type, RegisterBank bank, uint32_t regIndex); diff --git a/src/gxp/src/block.cpp b/src/gxp/src/block.cpp old mode 100644 new mode 100755 index d5fe36c..324432a --- a/src/gxp/src/block.cpp +++ b/src/gxp/src/block.cpp @@ -14,9 +14,32 @@ namespace gxp { fmt::print("[disasm] {}\n", usse::disasm::disassemble(name, sources, destination)); } - void Block::createNop() { - usse::disasm::disassemble("nop", { }); - instructions.push_back(usse::makeNOP()); + void Block::moveData( + usse::RegisterReference source, + usse::RegisterReference destination) { + assert(source.type.components == destination.type.components); + + // Temporary solution for Oreg-space, sometimes instructions will be generated that move itself to itself. + // This is meant to discard some of these instructions. + // TODO: Allow Oreg-space to reuse space from inst. params. if possible is better. + if (source.index == destination.index + && source.bank == destination.bank + && source.type.components == destination.type.components) + return; + + if (destination.type.components <= 2) { + // We might want to use a mov instruction over a pack for a couple of reasons when possible. + // Right now pck is weird on vita3k so I am using mov to get around it. + createMov(source, destination); + } else { + // Needs to use pack to move > 2 components in one instruction. + // NO I DONT, packing seems broken on hardware so lets do 2 movs +// createPack(source, destination); + createMov(source.getComponents(0, 2), + destination.getComponents(0, 2)); + createMov(source.getComponents(2, destination.type.components - 2), + destination.getComponents(2, destination.type.components - 2)); + } } void Block::createMov( @@ -30,7 +53,7 @@ namespace gxp { printDisassembly("mov", { source }, &destination); instructions.push_back(usse::makeVMOV( 0, // pred - 0, // skipinv + true, // skipinv 0, // test_bit_2 0, // src0_comp_sel 0, // syncstart @@ -43,7 +66,7 @@ namespace gxp { 0, // nosched static_cast(destination.type.type) & 0b111u, // move_data_type 0, // test_bit_1 - source.getSwizzleIndex(), // src0_swiz + source.getSwizzleIndex(false, 4), // src0_swiz 0, // src0_bank_sel destBankLayout.number, // dest_bank_sel srcBankLayout.number, // src1_bank_sel @@ -84,7 +107,7 @@ namespace gxp { printDisassembly("pck", { source }, &destination); instructions.push_back(usse::makeVPCK( 0, // pred - 0, // skipinv + true, // skipinv 0, // nosched 0, // unknown 0, // syncstart @@ -124,7 +147,7 @@ namespace gxp { printDisassembly("dot", { first, second }, &destination); instructions.push_back(usse::makeVDP( 0, // pred - 0, // skipinv + true, // skipinv 0, // clip_plane_enable first.type.components == 4, // opcode2 destBankLayout.extension, // dest_use_bank_ext @@ -176,7 +199,7 @@ namespace gxp { printDisassembly("add", { first, second }, &destination); instructions.push_back(usse::makeVNMAD32( 0, // pred - 0, // skipinv + true, // skipinv (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 0, // syncstart destBankLayout.extension, // dest_bank_ext @@ -226,7 +249,7 @@ namespace gxp { printDisassembly("sub", { first, second }, &destination); instructions.push_back(usse::makeVNMAD32( 0, // pred - 0, // skipinv + true, // skipinv (secondSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 0, // syncstart destBankLayout.extension, // dest_bank_ext @@ -275,7 +298,7 @@ namespace gxp { printDisassembly("mul", { first, second }, &destination); instructions.push_back(usse::makeVNMAD32( 0, // pred - 0, // skipinv + true, // skipinv (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 0, // syncstart destBankLayout.extension, // dest_bank_ext @@ -321,7 +344,7 @@ namespace gxp { printDisassembly("exp", { source }, &destination); instructions.push_back(usse::makeVCOMP( 0, // pred - 0, // skipinv + true, // skipinv typeTable[static_cast(destination.type.type)], // dest_type 0, // syncstart destBankLayout.extension, // dest_bank_ext @@ -363,7 +386,7 @@ namespace gxp { printDisassembly("log", { source }, &destination); instructions.push_back(usse::makeVCOMP( 0, // pred - 0, // skipinv + true, // skipinv typeTable[static_cast(destination.type.type)], // dest_type 0, // syncstart destBankLayout.extension, // dest_bank_ext @@ -405,7 +428,7 @@ namespace gxp { printDisassembly("rsq", { source }, &destination); instructions.push_back(usse::makeVCOMP( 0, // pred - 0, // skipinv + true, // skipinv typeTable[static_cast(destination.type.type)], // dest_type 0, // syncstart destBankLayout.extension, // dest_bank_ext @@ -450,7 +473,7 @@ namespace gxp { printDisassembly("min", { first, second }, &destination); instructions.push_back(usse::makeVNMAD32( 0, // pred - 0, // skipinv + true, // skipinv (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 0, // syncstart destBankLayout.extension, // dest_bank_ext @@ -499,7 +522,7 @@ namespace gxp { printDisassembly("max", { first, second }, &destination); instructions.push_back(usse::makeVNMAD32( 0, // pred - 0, // skipinv + true, // skipinv (firstSwizzle >> 10u) & 0b11u, // src1_swiz_10_11 0, // syncstart destBankLayout.extension, // dest_bank_ext diff --git a/src/gxp/src/builder.cpp b/src/gxp/src/builder.cpp old mode 100644 new mode 100755 index be847d5..1d8be59 --- a/src/gxp/src/builder.cpp +++ b/src/gxp/src/builder.cpp @@ -8,9 +8,14 @@ #include +#include + #define OFFSET_OF(parent, child) (reinterpret_cast(&parent.child) - reinterpret_cast(&parent)) namespace gxp { + constexpr uint32_t paAlignment = 4; + constexpr uint32_t saAlignment = 4; + void Builder::setType(gxp::ShaderType type) { header.type = static_cast(type); } @@ -46,8 +51,8 @@ namespace gxp { // Odd index can only reference vec3 (.yzw?) // For vec4, index must be even. - static bool needsAllocOffset(uint32_t index, uint32_t size) { - return size == 4 && index % 2 == 1; + static int needsAllocOffset(uint32_t index, uint32_t size, uint32_t alignment) { + return (index % alignment == 0) ? 0 : (alignment - index % alignment); } usse::RegisterReference Builder::allocateRegister(usse::RegisterBank bank, usse::DataType type) { @@ -56,16 +61,16 @@ namespace gxp { switch (bank) { case usse::RegisterBank::Primary: - index = paRegPointer + needsAllocOffset(paRegPointer, size); - paRegPointer += size + needsAllocOffset(paRegPointer, size); + index = paRegPointer + needsAllocOffset(paRegPointer, size, paAlignment); + paRegPointer += size + needsAllocOffset(paRegPointer, size, paAlignment); break; case usse::RegisterBank::Secondary: - index = saRegPointer + needsAllocOffset(saRegPointer, size); - saRegPointer += size + needsAllocOffset(saRegPointer, size); + index = saRegPointer + needsAllocOffset(saRegPointer, size, saAlignment); + saRegPointer += size + needsAllocOffset(saRegPointer, size, saAlignment); break; case usse::RegisterBank::Output: - index = oRegPointer + needsAllocOffset(oRegPointer, size); - oRegPointer += size + needsAllocOffset(oRegPointer, size); + index = oRegPointer; + oRegPointer += size; break; case usse::RegisterBank::Temporary: index = allocate(tRegSpace.data(), tRegSpace.size(), size, size == 4 ? 2 : 1); @@ -113,10 +118,24 @@ namespace gxp { size_t index = parameters.size(); parameters.push_back(parameter); - usse::RegisterReference reg = allocateRegister(parameters[index].getBank(), parameter.type); - parameters[index].resourceIndex = reg.index; + usse::DataType type = parameter.type; + if (parameter.category == ParameterCategory::Attribute) { + type.components = 4; // black screen/gpu crash on clear_v when position is not vec4 + } + + usse::RegisterReference reg = allocateRegister(parameters[index].getBank(), type); + // TODO: Which one is causing the crash!? pa size or param size? + parameters[index].type.components = type.components; + parameters[index].resourceIndex = reg.getEffectiveIndex(); parameters[index].containerIndex = 0; + if (parameter.category == ParameterCategory::Uniform) { + if (parameter.type.arraySize > 1) + header.defaultUniformBufferCount += parameter.type.arraySize * 4; + else + header.defaultUniformBufferCount += parameter.type.components; + } + return reg; } @@ -134,17 +153,19 @@ namespace gxp { std::map Builder::registerVertexVaryings( const std::vector &outputs, const std::vector &texCoords) { - varyings.varyings_count = outputs.size() + texCoords.size(); + varyings.varyingsCount = outputs.size() + texCoords.size(); - varyings.vertex_outputs1 = 0; - varyings.vertex_outputs2 = 0; + varyings.vertexOutputs1 = 0; + varyings.vertexOutputs2 = 0; for (ProgramVarying output : outputs) { - if (output == ProgramVarying::Position) continue; if (isTexCoordVarying(output)) throw std::runtime_error("TexCoord passed as regular output to createVaryings."); - varyings.vertex_outputs1 |= getVertexVaryingBits(output); + if (output == ProgramVarying::Position) + varyings.varyingsCount--; + + varyings.vertexOutputs1 |= getVertexVaryingBits(output); } for (ProgramVectorInfo texCoord : texCoords) { @@ -158,7 +179,7 @@ namespace gxp { assert(texCoord.components > 1); texCoordBits |= (0b111u << (texCoord.components - 1)) >> 3u; - varyings.vertex_outputs2 |= texCoordBits << (texCoordIndex * 3u); + varyings.vertexOutputs2 |= texCoordBits << (texCoordIndex * 3u); } std::map references; @@ -167,7 +188,7 @@ namespace gxp { a < static_cast(ProgramVarying::TexCoord0); a++) { auto varying = static_cast(a); - if (varying == ProgramVarying::Position || varyings.vertex_outputs1 & getVertexVaryingBits(varying)) { + if (varying == ProgramVarying::Position || varyings.vertexOutputs1 & getVertexVaryingBits(varying)) { references[varying] = allocateRegister(usse::RegisterBank::Output, { usse::Type::Float32, 4, 1 }); } @@ -190,7 +211,7 @@ namespace gxp { a <= static_cast(ProgramVarying::Clip7); a++) { auto varying = static_cast(a); - if (varyings.vertex_outputs1 & getVertexVaryingBits(varying)) { + if (varyings.vertexOutputs1 & getVertexVaryingBits(varying)) { uint32_t varyingSize = varying == ProgramVarying::PointSize ? 1 : 4; references[varying] = allocateRegister(usse::RegisterBank::Output, @@ -198,6 +219,8 @@ namespace gxp { } } + varyings.vertexOutputs1 |= oRegPointer << 24u; + return references; } @@ -211,12 +234,14 @@ namespace gxp { // What the heck is going on with fragment inputs!?!? ProgramFragmentInputInfo input; - input.size = (reference.size - 1) << 4u; + input.size = (paAlignment - 1) << 4u; input.componentInfo = 0b11u << 4u; // 0b11 = Float, 0b10 = Half? input.resourceIndex = reference.index; input.attributeInfo |= getFragmentVaryingBits(varying.varying); // Id - input.attributeInfo |= 0x10A000u; // 0x20000000 = Half, 0x10000000 = Fixed, 0x10A000 = Float... +// input.attributeInfo |= 0; // 0x20000000 = Half, 0x10000000 = Fixed, Float is assumed I think + if (varying.varying == ProgramVarying::Color0 || varying.varying == ProgramVarying::Color1) + input.attributeInfo |= 0x100000u; // required for Float type for color varyings input.attributeInfo |= (reference.type.components - 1) << 22u; // Component Count input.attributeInfo |= 0xFu; // Not a Sampler! @@ -231,43 +256,29 @@ namespace gxp { } usse::RegisterReference Builder::createFragmentOutput(usse::Type type, uint32_t components) { - varyings.output_comp_count = components; - varyings.output_param_type = static_cast(getParameterTypeFromUSSEType(type)); + type = usse::Type::Float16; - return usse::RegisterReference({ type, components, 1 }, usse::RegisterBank::Primary, 0); + varyings.outputCompCount = components; + varyings.outputParamType = static_cast(getParameterTypeFromUSSEType(type)); + + return allocateRegister(usse::RegisterBank::Primary, { type, components, 1 }); } std::vector Builder::build() { std::vector data(sizeof(ProgramHeader) + sizeof(ProgramVaryings)); header.varyingsOffset = sizeof(ProgramHeader) - OFFSET_OF(header, varyingsOffset); - // Strings - class StringEntry { - public: - size_t index = 0; - std::string text; - }; - - std::vector stringDB; - - for (const Parameter ¶m : parameters) { - StringEntry entry; - entry.index = data.size(); - entry.text = param.name; - data.insert(data.end(), entry.text.begin(), entry.text.end()); - data.insert(data.end(), '\0'); - stringDB.push_back(entry); - } - std::vector containers; - // Default Container - containers.push_back({ - static_cast(ContainerIndex::Default), // Container Index - 0, // ?? - 0, // Register Index - static_cast(saRegPointer) // Register Count - }); + if (header.defaultUniformBufferCount > 0) { + // Default Container + containers.push_back({ + static_cast(ContainerIndex::Default), // Container Index + 0, // ?? + 0, // Register Index + static_cast(saRegPointer) // Register Count + }); + } // Literals if (!literals.empty()) { @@ -280,7 +291,7 @@ namespace gxp { header.literalsCount = literals.size(); header.literalsOffset = data.size() - OFFSET_OF(header, literalsOffset); - for (uint32_t a = 0; a < literals.size(); a++) { + for (uint32_t a = 0; a < static_cast(literals.size()); a++) { float literal = literals[a]; data.insert(data.end(), @@ -302,41 +313,6 @@ namespace gxp { + containers.size() * sizeof(ProgramContainerInfo)); } - // Parameters - header.parameterCount = parameters.size(); - header.parametersOffset = data.size() - OFFSET_OF(header, parametersOffset); - for (const Parameter ¶m : parameters) { - ProgramParameterInfo parameter; - parameter.resourceIndex = param.resourceIndex; - parameter.arraySize = param.type.arraySize; - parameter.semantic = static_cast(param.semantic); - parameter.config = createParameterConfig(param.category, getParameterTypeFromUSSEType(param.type.type), - param.type.components, ContainerIndex::Default); - - auto stringEntry = std::find_if(stringDB.begin(), stringDB.end(), [param](const StringEntry &entry) { - return entry.text == param.name; - }); - if (stringEntry == stringDB.end()) - throw std::runtime_error(fmt::format("String \"{}\" is missing from gxp db.", param.name)); - - parameter.nameOffset = stringEntry->index - (data.size() + OFFSET_OF(parameter, nameOffset)); - - data.insert(data.end(), - reinterpret_cast(¶meter), - reinterpret_cast(¶meter) - + sizeof(parameter)); - } - - // Varyings - if (getType() == ShaderType::Fragment && !fragmentInputs.empty()) { - varyings.varyings_count = fragmentInputs.size(); - varyings.vertex_outputs1 = data.size() - - (sizeof(ProgramHeader) + OFFSET_OF(varyings, varyings_count) + sizeof(uint32_t)); - data.insert(data.end(), - reinterpret_cast(fragmentInputs.data()), - reinterpret_cast(fragmentInputs.data()) - + fragmentInputs.size() * sizeof(ProgramFragmentInputInfo)); - } // Code header.primaryRegCount = paRegPointer; @@ -344,26 +320,38 @@ namespace gxp { header.tempRegCount1 = tMaxRegs; header.tempRegCount2 = tMaxRegs; // Difference between both reg counts? { + if (getType() == ShaderType::Vertex) { + // official has 12 bytes of nothing + for (uint32_t a = 0; a < 12; a++) { + data.push_back(0); + } + } + header.secondaryProgramOffset = data.size() - OFFSET_OF(header, secondaryProgramOffset); for (const std::unique_ptr &block : secondaryBlocks) { data.insert(data.end(), - reinterpret_cast(block->instructions.data()), - reinterpret_cast(block->instructions.data()) - + block->instructions.size() * sizeof(usse::Instruction)); + reinterpret_cast(block->instructions.data()), + reinterpret_cast(block->instructions.data()) + + block->instructions.size() * sizeof(usse::Instruction)); } header.secondaryProgramOffsetEnd = data.size() - OFFSET_OF(header, secondaryProgramOffsetEnd); + if (getType() == ShaderType::Vertex) { + header.secondaryProgramOffset -= 4; + header.secondaryProgramOffsetEnd -= 4; + } + header.primaryProgramOffset = data.size() - OFFSET_OF(header, primaryProgramOffset); usse::Instruction phase = usse::makePHAS( 0, // sprvv - true, // end + false, // end true, // imm 0, // src1_bank_ext 0, // src2_bank_ext 0, // mode 0, // rate_hi 0, // rate_lo_or_nosched - 0, // wait_cond + 0b111, // wait_cond 0, // temp_count 0, // src1_bank 0, // src2_bank @@ -372,46 +360,100 @@ namespace gxp { 0 // src2_n_or_exe_addr_low ); data.insert(data.end(), - reinterpret_cast(&phase), - reinterpret_cast(&phase) + sizeof(phase)); + reinterpret_cast(&phase), + reinterpret_cast(&phase) + sizeof(phase)); header.primaryProgramInstructionCount++; for (const std::unique_ptr &block : primaryBlocks) { header.primaryProgramInstructionCount += block->instructions.size(); data.insert(data.end(), - reinterpret_cast(block->instructions.data()), - reinterpret_cast(block->instructions.data()) - + block->instructions.size() * sizeof(usse::Instruction)); + reinterpret_cast(block->instructions.data()), reinterpret_cast(block->instructions.data()) + block->instructions.size() * sizeof(usse::Instruction)); } usse::BankLayout emitLayout = usse::BankLayout::srcLayout(usse::RegisterBank::Immediate); - usse::Instruction emit = usse::makeEMIT( - 0, // sideband_high - 0, // src0_bank_ext - true, // end - 0, // src1_bank_ext - emitLayout.extension, // src2_bank_ext - 1, /* Target = MTE */ // target - 0, // task_start_or_mte_hi - 1, /* MTE = Vertex */ // task_end_or_mte_lo - 0, // nosched - 0, // sideband_mid - 0, // src0_bank - 0, // incp - 0, // src1_bank - emitLayout.number, // src2_bank - 0, // sideband_low - true, // freep - 0, // src0_n - 0, // src1_n - 0 // src2_n - ); + if (getType() == ShaderType::Vertex) { + header.primaryProgramInstructionCount++; + usse::Instruction emit = usse::makeEMIT( + 0, // sideband_high + 0, // src0_bank_ext + true, // end + emitLayout.extension, // src1_bank_ext + emitLayout.extension, // src2_bank_ext + 1, /* Target = MTE */ // target + 0, // task_start_or_mte_hi + 1, /* MTE = Vertex */ // task_end_or_mte_lo + 0, // nosched + 0, // sideband_mid + 0, // src0_bank + 0, // incp + emitLayout.number, // src1_bank + emitLayout.number, // src2_bank + 0, // sideband_low + true, // freep + 0, // src0_n + 0, // src1_n + 0 // src2_n + ); + data.insert(data.end(), + reinterpret_cast(&emit), + reinterpret_cast(&emit) + sizeof(emit)); + } + } + + std::map stringDB; + + for (const Parameter ¶m : parameters) { + stringDB[param.name] = data.size(); + data.insert(data.end(), param.name.begin(), param.name.end()); + data.insert(data.end(), '\0'); + } + + uint32_t textPadding = (4 - data.size() % sizeof(uint32_t)) % 4; + for (uint32_t a = 0; a < textPadding; a++) + data.push_back(0); + + // Parameters + header.parameterCount = parameters.size(); + header.parametersOffset = data.size() - OFFSET_OF(header, parametersOffset); + header.alternativeParametersOffset = data.size() - OFFSET_OF(header, alternativeParametersOffset); + for (const Parameter ¶m : parameters) { + ContainerIndex container = ContainerIndex::Buffer0; + if (param.category == ParameterCategory::Uniform) + container = ContainerIndex::Default; + + ProgramParameterInfo parameter; + parameter.resourceIndex = param.resourceIndex; + parameter.arraySize = param.type.arraySize; + parameter.semantic = static_cast(param.semantic); + parameter.config = createParameterConfig(param.category, getParameterTypeFromUSSEType(param.type.type), + param.type.components, container); + + parameter.nameOffset = stringDB[param.name] - (data.size() + OFFSET_OF(parameter, nameOffset)); + data.insert(data.end(), - reinterpret_cast(&emit), - reinterpret_cast(&emit) + sizeof(emit)); + reinterpret_cast(¶meter), + reinterpret_cast(¶meter) + + sizeof(parameter)); } + // Fragment Varyings + if (getType() == ShaderType::Fragment && !fragmentInputs.empty()) { + varyings.varyingsCount = fragmentInputs.size(); + varyings.vertexOutputs1 = data.size() + - (sizeof(ProgramHeader) + OFFSET_OF(varyings, varyingsCount) + sizeof(uint32_t)); + data.insert(data.end(), + reinterpret_cast(fragmentInputs.data()), + reinterpret_cast(fragmentInputs.data()) + + fragmentInputs.size() * sizeof(ProgramFragmentInputInfo)); + } + + header.size = data.size(); + + uint32_t padding = (4 - data.size() % sizeof(uint32_t)) % 4; + for (uint32_t a = 0; a < padding; a++) + data.push_back(0); + std::memcpy(data.data(), &header, sizeof(ProgramHeader)); std::memcpy(data.data() + sizeof(ProgramHeader), &varyings, sizeof(ProgramVaryings)); @@ -422,6 +464,6 @@ namespace gxp { Builder::Builder(BuilderConfig config) : config(config) { header.magic = gxpMagic; header.majorVersion = 1; - header.minorVersion = 4; + header.minorVersion = 5; } } diff --git a/src/gxp/src/gxp.cpp b/src/gxp/src/gxp.cpp old mode 100644 new mode 100755 index f98f681..d59cae9 --- a/src/gxp/src/gxp.cpp +++ b/src/gxp/src/gxp.cpp @@ -19,25 +19,6 @@ namespace gxp { } } - uint32_t getParameterTypeSize(ParameterType type) { - switch (type) { - case ParameterType::Unsigned32: - case ParameterType::Signed32: - case ParameterType::Float32: - return 4; - case ParameterType::Unsigned16: - case ParameterType::Signed16: - case ParameterType::Float16: - return 2; - case ParameterType::Unsigned8: - case ParameterType::Signed8: - return 1; - default: - throw std::runtime_error( - fmt::format("Parameter type {} has no defined size.", static_cast(type))); - } - } - std::string getVaryingName(ProgramVarying varying) { switch (varying) { case ProgramVarying::Position: return "Position"; @@ -82,6 +63,8 @@ namespace gxp { uint32_t getVertexVaryingBits(ProgramVarying varying) { switch (varying) { + case ProgramVarying::Position: return static_cast(ProgramVaryingVertexBits::Position); + case ProgramVarying::Fog: return static_cast(ProgramVaryingVertexBits::Fog); case ProgramVarying::Color0: return static_cast(ProgramVaryingVertexBits::Color0); case ProgramVarying::Color1: return static_cast(ProgramVaryingVertexBits::Color1); diff --git a/src/gxp/src/usse.cpp b/src/gxp/src/usse.cpp old mode 100644 new mode 100755 index b07dcc9..4e9744d --- a/src/gxp/src/usse.cpp +++ b/src/gxp/src/usse.cpp @@ -7,70 +7,70 @@ namespace usse { float fpConstants[] = { - 0.0f, - 0.0f, - 1.0f, - 1.0f, - 2.0f, - 8.0f, - 32.0f, - 128.0f, - 512.0f, - 2048.0f, - 8192.0f, - 32768.0f, - 0.5f, - 0.125f, - 0.03125f, - 0.0078125f, - 0.001953125f, - 0.00048828125f, - 0.0001220703125f, - 3.0517578125e-05f, - 2.7182817459106445f, - 1.4142135381698608f, - 3.1415927410125732f, - 0.7853981852531433f, - 6.2831854820251465f, - 25.132741928100586f, - 1.52587890625e-05f, - 1.5259021893143654e-05f, - 1.5500992276429315e-06f, - 0.0002604166802484542f, - 0.02083333395421505f, - 0.5f, - 0.0f, - 0.0f, - 0.007826805114746094f, - 513.0f, - 2.204391964672e+12f, - 9.472403081849855e+21f, - 4.07034684917033e+31f, - 1.1941301636397839e-07f, - 2.7789456933519086e-17f, - 6.467081701001855e-27f, - 1.50500011103716e-36f, - 1.68573558312346e-06f, - 0.0003208939451724291f, - 0.1955653429031372f, - 3281298.0f, - 0.0f, - 0.0f, - 4.661918073800564e-10f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - -NAN, - -NAN, - -NAN, - -NAN, - NAN, - NAN, - NAN, - NAN, + 0.0f, 0.0, + 0.0f, 1.0, + 1.0f, 0.0, + 1.0f, 1.0, + 2.0f, 4.0, + 8.0f, 16.0, + 32.0f, 64.0, + 128.0f, 256.0, + 512.0f, 1024.0, + 2048.0f, 4096.0, + 8192.0f, 16384.0, + 32768.0f, 65536.0, + 0.5f, 0.25, + 0.125f, 0.0625, + 0.03125f, 0.015625, + 0.0078125f, 0.00390625, + 0.001953125f, 0.0009765625, + 0.00048828125f, 0.000244140625, + 0.0001220703125f, 6.103515625e-05, + 3.0517578125e-05f, 1.52587890625e-05, + 2.7182817459106445f, 0.3678794503211975, + 1.4142135381698608f, 0.7071067690849304, + 3.1415927410125732f, 1.5707963705062866, + 0.7853981852531433f, 0.39269909262657166, + 6.2831854820251465f, 12.566370964050293, + 25.132741928100586f, 0.0, + 1.52587890625e-05f, 3.0517578125e-05, + 1.5259021893143654e-05f, 3.0518509447574615e-05, + 1.5500992276429315e-06f, 2.1701389414374717e-05, + 0.0002604166802484542f, 0.0026041667442768812, + 0.02083333395421505f, 0.125, + 0.5f, 1.0, + 0.0f, 0.0078125, + 0.0f, 0.0, + 0.007826805114746094f, 0.007826805114746094, + 513.0f, 33628160.0, + 2.204391964672e+12f, 1.4450221616883302e+17, + 9.472403081849855e+21f, 6.209345240995639e+26, + 4.07034684917033e+31f, 4.304788882405838e-41, + 1.1941301636397839e-07f, 1.821653938804957e-12, + 2.7789456933519086e-17f, 4.239300639334267e-22, + 6.467081701001855e-27f, 9.865576140123616e-32, + 1.50500011103716e-36f, 0.0, + 1.68573558312346e-06f, 0.0, + 0.0003208939451724291f, 0.0, + 0.1955653429031372f, 2.9836246540071443e-06, + 3281298.0f, 2.8082021225069334e-41, + 0.0f, 0.0, + 0.0f, 1.1014318927854575e-23, + 4.661918073800564e-10f, 0.007825851440429687, + 0.0f, 0.0, + 0.0f, 0.0, + 0.0f, 0.0, + 0.0f, 0.0, + 0.0f, 0.0, + 0.0f, 0.0, + -NAN, 0.0, + -NAN, 0.0, + -NAN, 0.0, + -NAN, 0.0, + NAN, 0.0, + NAN, 0.0, + NAN, 0.0, + NAN, 0.0, }; const uint32_t swizzleStandardSize = 16; @@ -167,7 +167,7 @@ namespace usse { if (bank == RegisterBank::Internal) index += (60 + (doubleReg ? 0 : 2)) * (bits == 7 ? 2 : 1); - else if (doubleReg) + else if (bank != RegisterBank::FloatConstant && doubleReg) index /= 2; return index; @@ -268,7 +268,8 @@ namespace usse { } RegisterReference RegisterReference::getHalf(uint32_t half) { - uint32_t width = (type.components - 1) / 2 + 1; +// uint32_t width = (type.components - 1) / 2 + 1; + uint32_t width = 2; return getComponents(width * half, width); } @@ -301,7 +302,8 @@ namespace usse { if (element >= type.arraySize) throw std::runtime_error("Register reference array out of bounds."); - return RegisterReference({ type.type, type.components, 1 }, bank, index + size / type.arraySize * element); + // Array element padding is always 4 (at least for matrices I think) + return RegisterReference({ type.type, type.components, 1 }, bank, index + 4 * element); } RegisterReference RegisterReference::getExpanded(uint32_t count) { @@ -329,13 +331,23 @@ namespace usse { return reg; } + RegisterReference RegisterReference::getWithSwizzle(usse::SwizzleVec4 newSwizzle) { + RegisterReference reg = *this; + + reg.swizzle = newSwizzle; + + return reg; + } + RegisterReference::RegisterReference(DataType type, RegisterBank bank, uint32_t regIndex) : type(type), bank(bank), size(getTypeSize(type.type) * type.components * type.arraySize / 4) { bool swizzleUp = false; - if (bank != usse::RegisterBank::Internal && regIndex % 2 == 1) { - regIndex--; + if (bank == RegisterBank::FloatConstant) { + lockSwizzle = true; + } else if (bank != RegisterBank::Internal && regIndex % 2 == 1) { swizzleUp = true; + regIndex--; } for (uint32_t a = 0; a < type.components; a++) { diff --git a/src/interface/src/interface.cpp b/src/interface/src/interface.cpp old mode 100644 new mode 100755 index c35ad5d..d55dfd7 --- a/src/interface/src/interface.cpp +++ b/src/interface/src/interface.cpp @@ -10,8 +10,8 @@ bool Interface::parseParams(int count, char **args) { for (int a = 1; a < count; a++) { if (strcmp(args[a], "-o") == 0) { - ERROR_RETURN_IF(a + 1 >= count, "Error, no output specified.") - ERROR_RETURN_IF(!outputFilePath.empty(), "Multiple output files specified.") + ERROR_RETURN_IF(a + 1 >= count, "Error, no output specified.\n") + ERROR_RETURN_IF(!outputFilePath.empty(), "Multiple output files specified.\n") outputFilePath = args[a + 1]; a++; } else if (strcmp(args[a], "-S") == 0) { // Print Disassembly @@ -23,13 +23,13 @@ bool Interface::parseParams(int count, char **args) { } else if (strcmp(args[a], "-Oreg-space") == 0) { // Optimize Register Space config.optimizeRegisterSpace = true; } else { - ERROR_RETURN_IF(!inputFilePath.empty(), "Multiple input files specified.") + ERROR_RETURN_IF(!inputFilePath.empty(), "Multiple input files specified.\n") inputFilePath = args[a]; } } - ERROR_RETURN_IF(inputFilePath.empty(), "You must specify an input.") - ERROR_RETURN_IF(outputFilePath.empty(), "You must specify an output.") + ERROR_RETURN_IF(inputFilePath.empty(), "You must specify an input.\n") + ERROR_RETURN_IF(outputFilePath.empty(), "You must specify an output.\n") return true; } diff --git a/src/translator/include/translator/translator.h b/src/translator/include/translator/translator.h old mode 100644 new mode 100755 index 33e034c..bf5ba96 --- a/src/translator/include/translator/translator.h +++ b/src/translator/include/translator/translator.h @@ -83,10 +83,10 @@ class CompilerGXP : public Compiler { const std::string &name); spv::Id resolveAlias(spv::Id id); - void useRegister(spv::Id id); - usse::RegisterReference getRegister(spv::Id id); - void writeRegister(spv::Id id, TranslatorReference reg); - void aliasRegister(spv::Id empty, spv::Id value); + void useReference(spv::Id id); + usse::RegisterReference getReference(spv::Id id); + void writeReference(spv::Id id, TranslatorReference reg); + void aliasReference(spv::Id empty, spv::Id value); void cleanupRegisters(); spv::Id createBlock(const SPIRBlock &block); diff --git a/src/translator/src/codes.cpp b/src/translator/src/codes.cpp old mode 100644 new mode 100755 index e134fcb..1796558 --- a/src/translator/src/codes.cpp +++ b/src/translator/src/codes.cpp @@ -1,6 +1,7 @@ #include #include +#include #include @@ -13,7 +14,7 @@ spv::Id CompilerGXP::resolveAlias(spv::Id id) { return id; } -void CompilerGXP::useRegister(spv::Id id) { +void CompilerGXP::useReference(spv::Id id) { if (!config.optimizeRegisterSpace) return; @@ -25,7 +26,7 @@ void CompilerGXP::useRegister(spv::Id id) { idUsesLeft[id]--; } -usse::RegisterReference CompilerGXP::getRegister(spv::Id id) { +usse::RegisterReference CompilerGXP::getReference(spv::Id id) { id = resolveAlias(id); auto varying = idVaryings.find(id); @@ -34,7 +35,7 @@ usse::RegisterReference CompilerGXP::getRegister(spv::Id id) { auto reg = idRegisters.find(resolveAlias(id)); if (reg != idRegisters.end()) { - useRegister(id); + useReference(id); return reg->second.reference; } @@ -48,8 +49,12 @@ usse::RegisterReference CompilerGXP::getRegister(spv::Id id) { if (type.vecsize == 1) { int32_t regIndex = usse::getFPConstantIndex(constant->m.c[0].r[0].f32); if (regIndex != -1) { - return usse::RegisterReference({usse::Type::Float32, 1, 1}, usse::RegisterBank::FloatConstant, - regIndex); + usse::RegisterReference floatReference( + {usse::Type::Float32, 1, 1}, usse::RegisterBank::FloatConstant, 0); + floatReference.index = regIndex / 2; + // x or y bank + floatReference.swizzle = usse::getSwizzleVec4All(static_cast(regIndex % 2)); + return floatReference; } } @@ -58,23 +63,24 @@ usse::RegisterReference CompilerGXP::getRegister(spv::Id id) { literal[a] = constant->m.c[0].r[a].f32; usse::RegisterReference literalReg = builder.registerLiteral(literal); - writeRegister(id, { literalReg }); +// writeReference(id, {literalReg}); +// idUsesLeft[id] = ~0u; return literalReg; } throw std::runtime_error(fmt::format("Cannot find register, varying or constant with id {}.", id)); } -void CompilerGXP::writeRegister(spv::Id id, TranslatorReference reg) { +void CompilerGXP::writeReference(spv::Id id, TranslatorReference reg) { if (idRegisters.find(id) == idRegisters.end()) { idRegisters[id] = std::move(reg); - useRegister(id); + useReference(id); } else { throw std::runtime_error(fmt::format("SSA Violation, id {} was assigned twice.", id)); } } -void CompilerGXP::aliasRegister(spv::Id empty, spv::Id value) { +void CompilerGXP::aliasReference(spv::Id empty, spv::Id value) { // Resolve aliases (so you can make aliases of aliases). empty = resolveAlias(empty); value = resolveAlias(value); @@ -115,16 +121,16 @@ void CompilerGXP::opLoad(const TranslatorArguments &arguments) { spv::Id result = arguments.instruction[1]; spv::Id pointer = arguments.instruction[2]; - usse::RegisterReference reg = getRegister(pointer); + usse::RegisterReference reg = getReference(pointer); if (config.optimizeRegisterSpace && idUsesLeft[pointer] == 0) { // If result allocation is going to be freed right after, just alias. - aliasRegister(result, pointer); + aliasReference(result, pointer); } else { // If not, allocate more space. usse::RegisterReference destination = builder.allocateRegister(usse::RegisterBank::Temporary, reg.type); - arguments.block.createPack(reg, destination); - writeRegister(result, { destination }); + arguments.block.moveData(reg, destination); + writeReference(result, {destination}); } } @@ -132,23 +138,19 @@ void CompilerGXP::opStore(const TranslatorArguments &arguments) { spv::Id destination = arguments.instruction[0]; spv::Id source = arguments.instruction[1]; - usse::RegisterReference sourceRegister = getRegister(source); - usse::RegisterReference destinationRegister = getRegister(destination); + usse::RegisterReference sourceRegister = getReference(source); + usse::RegisterReference destinationRegister = getReference(destination); - if (sourceRegister.type.components == 1) - arguments.block.createMov(sourceRegister, destinationRegister); - else - arguments.block.createPack(sourceRegister, destinationRegister); + arguments.block.moveData(sourceRegister, destinationRegister); } void CompilerGXP::opMatrixTimesVector(const TranslatorArguments &arguments) { - spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id matrix = arguments.instruction[2]; spv::Id vector = arguments.instruction[3]; - usse::RegisterReference matrixRegister = getRegister(matrix); - usse::RegisterReference vectorRegister = getRegister(vector); + usse::RegisterReference matrixRegister = getReference(matrix); + usse::RegisterReference vectorRegister = getReference(vector); assert(matrixRegister.type.type == vectorRegister.type.type); assert(matrixRegister.type.arraySize == vectorRegister.type.components); @@ -159,24 +161,23 @@ void CompilerGXP::opMatrixTimesVector(const TranslatorArguments &arguments) { usse::RegisterReference temp = builder.allocateRegister( usse::RegisterBank::Temporary, vectorRegister.type); - arguments.block.createPack(vectorRegister, internal); + arguments.block.moveData(vectorRegister, internal); for (uint32_t a = 0; a < vectorRegister.type.components; a++) { arguments.block.createDot(matrixRegister.getElement(a), internal, temp.getComponents(a, 1)); } builder.freeRegister(internal); - writeRegister(result, { temp }); + writeReference(result, {temp}); } void CompilerGXP::opVectorTimesScalar(const TranslatorArguments &arguments) { - spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id vectorId = arguments.instruction[2]; spv::Id scalarId = arguments.instruction[3]; - usse::RegisterReference vector = getRegister(vectorId); - usse::RegisterReference scalar = getRegister(scalarId); + usse::RegisterReference vector = getReference(vectorId); + usse::RegisterReference scalar = getReference(scalarId); usse::RegisterReference destination = builder.allocateRegister(usse::RegisterBank::Temporary, vector.type); scalar.swizzle = usse::getSwizzleVec4All(usse::SwizzleChannel::X); @@ -185,18 +186,17 @@ void CompilerGXP::opVectorTimesScalar(const TranslatorArguments &arguments) { arguments.block.createMul(vector, scalar, destination); - writeRegister(result, { destination }); + writeReference(result, {destination}); } void CompilerGXP::opCompositeExtract(const TranslatorArguments &arguments) { - spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id sourceId = arguments.instruction[2]; uint32_t index = arguments.instruction[3]; - usse::RegisterReference source = getRegister(sourceId); + usse::RegisterReference source = getReference(sourceId); - writeRegister(result, { source.getComponents(index, 1) }); + writeReference(result, {source.getComponents(index, 1)}); } void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { @@ -210,11 +210,11 @@ void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { for (size_t a = 0; a < type.vecsize;) { spv::Id sourceId = arguments.instruction[2 + a]; - usse::RegisterReference source = getRegister(sourceId); + usse::RegisterReference source = getReference(sourceId); uint32_t size = 1; while (a + size < type.vecsize) { - usse::RegisterReference next = getRegister(arguments.instruction[2 + a + size]); + usse::RegisterReference next = getReference(arguments.instruction[2 + a + size]); bool matchingBanks = source.bank == next.bank; bool matchingIndices = (source.index + static_cast(source.swizzle[0]) + size) == (next.index + static_cast(next.swizzle[0])); @@ -225,21 +225,16 @@ void CompilerGXP::opCompositeConstruct(const TranslatorArguments &arguments) { } } - if (size == 1) - arguments.block.createMov(usse::RegisterReference( - { source.type.type, size, 1 }, source.bank, source.index), output.getComponents(a, size)); - else - arguments.block.createPack(usse::RegisterReference( - { source.type.type, size, 1 }, source.bank, source.index), output.getComponents(a, size)); + source.type.components = size; + arguments.block.moveData(source.getComponents(0, size), output.getComponents(a, size)); a += size; } - writeRegister(result, { output }); + writeReference(result, {output}); } void CompilerGXP::opAccessChain(const TranslatorArguments &arguments) { - spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id base = arguments.instruction[2]; @@ -251,7 +246,7 @@ void CompilerGXP::opAccessChain(const TranslatorArguments &arguments) { SPIRType type = get_type_from_variable(base); if (type.basetype == SPIRType::Struct && is_member_builtin(type, builtInValue, &builtIn)) { - writeRegister(result, { getOrThrow(varyingReferences, translateVarying(builtIn)) }); + writeReference(result, {getOrThrow(varyingReferences, translateVarying(builtIn))}); return; } else { ref = getOrThrow(idRegisters, base); @@ -274,7 +269,7 @@ void CompilerGXP::opAccessChain(const TranslatorArguments &arguments) { } } - writeRegister(result, ref); + writeReference(result, ref); } @@ -286,13 +281,13 @@ void CompilerGXP::opVectorShuffle(const TranslatorArguments &arguments) { SPIRType type = get_type(typeId); - usse::RegisterReference first = getRegister(firstId); - usse::RegisterReference second = getRegister(secondId); + usse::RegisterReference first = getReference(firstId); + usse::RegisterReference second = getReference(secondId); usse::RegisterReference temp = builder.allocateRegister( usse::RegisterBank::Temporary, translateType(type)); - for (uint32_t a = 0; a < type.vecsize; a++) { + for (uint32_t a = 0; a < type.vecsize;) { uint32_t index = arguments.instruction[4 + a]; usse::RegisterReference source; @@ -302,18 +297,39 @@ void CompilerGXP::opVectorShuffle(const TranslatorArguments &arguments) { else source = second.getComponents(index - first.type.components, 1); - arguments.block.createMov(source, temp.getComponents(a, 1)); + uint32_t size = 1; + while (a + size < type.vecsize) { + uint32_t nextIndex = arguments.instruction[4 + a + size]; + usse::RegisterReference next; + if (index < first.type.components) + next = first.getComponents(nextIndex, 1); + else + next = second.getComponents(nextIndex - first.type.components, 1); + bool matchingBanks = source.bank == next.bank; + bool matchingIndices = (source.index + static_cast(source.swizzle[0]) + size) == + (next.index + static_cast(next.swizzle[0])); + if (matchingBanks && matchingIndices) { + size++; + } else { + break; + } + } + + arguments.block.moveData( + usse::RegisterReference({ source.type.type, size, 1 }, source.bank, source.index), + temp.getComponents(a, size)); + + a += size; } - writeRegister(result, { temp }); + writeReference(result, {temp}); } void CompilerGXP::opFNegate(const TranslatorArguments &arguments) { - spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id sourceId = arguments.instruction[2]; - usse::RegisterReference source = getRegister(sourceId); + usse::RegisterReference source = getReference(sourceId); usse::RegisterReference destination = builder.allocateRegister(usse::RegisterBank::Temporary, source.type); int32_t zeroFP = usse::getFPConstantIndex(0); @@ -321,62 +337,58 @@ void CompilerGXP::opFNegate(const TranslatorArguments &arguments) { arguments.block.createSub(zero, source, destination); - writeRegister(result, { destination }); + writeReference(result, {destination}); } void CompilerGXP::opFAdd(const TranslatorArguments &arguments) { - spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id firstId = arguments.instruction[2]; spv::Id secondId = arguments.instruction[3]; - usse::RegisterReference first = getRegister(firstId); - usse::RegisterReference second = getRegister(secondId); + usse::RegisterReference first = getReference(firstId); + usse::RegisterReference second = getReference(secondId); usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, first.type); arguments.block.createAdd(first, second, destination); - writeRegister(result, { destination }); + writeReference(result, {destination}); } void CompilerGXP::opFSub(const TranslatorArguments &arguments) { - spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id firstId = arguments.instruction[2]; spv::Id secondId = arguments.instruction[3]; - usse::RegisterReference first = getRegister(firstId); - usse::RegisterReference second = getRegister(secondId); + usse::RegisterReference first = getReference(firstId); + usse::RegisterReference second = getReference(secondId); usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, first.type); arguments.block.createSub(first, second, destination); - writeRegister(result, { destination }); + writeReference(result, {destination}); } void CompilerGXP::opFMul(const TranslatorArguments &arguments) { - spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id firstId = arguments.instruction[2]; spv::Id secondId = arguments.instruction[3]; - usse::RegisterReference first = getRegister(firstId); - usse::RegisterReference second = getRegister(secondId); + usse::RegisterReference first = getReference(firstId); + usse::RegisterReference second = getReference(secondId); usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, first.type); arguments.block.createMul(first, second, destination); - writeRegister(result, { destination }); + writeReference(result, {destination}); } void CompilerGXP::opDot(const TranslatorArguments &arguments) { - spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id firstId = arguments.instruction[2]; spv::Id secondId = arguments.instruction[3]; @@ -384,42 +396,47 @@ void CompilerGXP::opDot(const TranslatorArguments &arguments) { usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, { usse::Type::Float32, 1, 1 }); - usse::RegisterReference first = getRegister(firstId); - usse::RegisterReference second = getRegister(secondId); + usse::RegisterReference first = getReference(firstId); + usse::RegisterReference second = getReference(secondId); usse::RegisterReference internal = builder.allocateRegister( usse::RegisterBank::Internal, second.type); - arguments.block.createPack(second, internal); + arguments.block.moveData(second, internal); arguments.block.createDot(first, internal, destination); builder.freeRegister(internal); - writeRegister(result, { destination }); + writeReference(result, {destination}); } void CompilerGXP::opFunctionCall(const TranslatorArguments &arguments) { - spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id functionId = arguments.instruction[2]; SPIRFunction function = get(functionId); + fmt::print("Calling {}(", get_name(functionId)); + for (size_t a = 0; a < function.arguments.size(); a++) { // Should be alias here, not writeRegister. spv::Id moveToId = function.arguments[a].id; spv::Id moveFromId = arguments.instruction[3 + a]; + fmt::print("{}{}", a != 0 ? ", " : "", usse::disasm::disassembleReference(idRegisters[moveFromId].reference)); + // Function use counts have not been created yet (until createFunction). Allow this assignment. if (config.optimizeRegisterSpace) idUsesLeft[moveToId]++; - aliasRegister(moveToId, moveFromId); + aliasReference(moveToId, moveFromId); } + fmt::print(")...\n"); + spv::Id returnValue = createFunction(function); if (returnValue != 0) - aliasRegister(result, returnValue); + aliasReference(result, returnValue); } void CompilerGXP::opExtInst(const TranslatorArguments &arguments) { @@ -432,11 +449,10 @@ void CompilerGXP::opExtInst(const TranslatorArguments &arguments) { } void CompilerGXP::extGLSLNormalize(const TranslatorArguments &arguments) { - spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id sourceId = arguments.instruction[4]; - usse::RegisterReference source = getRegister(sourceId); + usse::RegisterReference source = getReference(sourceId); usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, source.type); @@ -445,7 +461,7 @@ void CompilerGXP::extGLSLNormalize(const TranslatorArguments &arguments) { usse::RegisterReference magnitude = builder.allocateRegister( usse::RegisterBank::Internal, { source.type.type, 1, 1 }); - arguments.block.createPack(source, temporary); + arguments.block.moveData(source, temporary); arguments.block.createDot(temporary, temporary, magnitude); arguments.block.createReverseSquareRoot(magnitude, magnitude); @@ -454,39 +470,37 @@ void CompilerGXP::extGLSLNormalize(const TranslatorArguments &arguments) { builder.freeRegister(magnitude); builder.freeRegister(temporary); - writeRegister(result, { destination }); + writeReference(result, {destination}); } void CompilerGXP::extGLSLFMin(const TranslatorArguments &arguments) { - spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id firstId = arguments.instruction[4]; spv::Id secondId = arguments.instruction[5]; - usse::RegisterReference first = getRegister(firstId); - usse::RegisterReference second = getRegister(secondId); + usse::RegisterReference first = getReference(firstId); + usse::RegisterReference second = getReference(secondId); usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, first.type); arguments.block.createMin(first, second, destination); - writeRegister(result, { destination }); + writeReference(result, {destination}); } void CompilerGXP::extGLSLFMax(const TranslatorArguments &arguments) { - spv::Id typeId = arguments.instruction[0]; spv::Id result = arguments.instruction[1]; spv::Id firstId = arguments.instruction[4]; spv::Id secondId = arguments.instruction[5]; - usse::RegisterReference first = getRegister(firstId); - usse::RegisterReference second = getRegister(secondId); + usse::RegisterReference first = getReference(firstId); + usse::RegisterReference second = getReference(secondId); usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, first.type); arguments.block.createMax(first, second, destination); - writeRegister(result, { destination }); + writeReference(result, {destination}); } void CompilerGXP::extGLSLReflect(const TranslatorArguments &arguments) { @@ -494,8 +508,8 @@ void CompilerGXP::extGLSLReflect(const TranslatorArguments &arguments) { spv::Id firstId = arguments.instruction[4]; spv::Id secondId = arguments.instruction[5]; - usse::RegisterReference first = getRegister(firstId); - usse::RegisterReference second = getRegister(secondId); + usse::RegisterReference first = getReference(firstId); + usse::RegisterReference second = getReference(secondId); usse::RegisterReference internal = builder.allocateRegister( usse::RegisterBank::Internal, second.type); @@ -508,7 +522,7 @@ void CompilerGXP::extGLSLReflect(const TranslatorArguments &arguments) { usse::RegisterReference two = usse::RegisterReference({ usse::Type::Float32, 1, 1 }, usse::RegisterBank::FloatConstant, usse::getFPConstantIndex(2)).getExpanded(1); - arguments.block.createPack(second, internal); + arguments.block.moveData(second, internal); arguments.block.createDot(first, internal, magnitude); arguments.block.createMul(two, magnitude, magnitude); arguments.block.createMul(magnitude, first, destination); @@ -517,7 +531,7 @@ void CompilerGXP::extGLSLReflect(const TranslatorArguments &arguments) { builder.freeRegister(magnitude); builder.freeRegister(internal); - writeRegister(result, { destination }); + writeReference(result, {destination}); } void CompilerGXP::extGLSLPow(const TranslatorArguments &arguments) { @@ -525,8 +539,8 @@ void CompilerGXP::extGLSLPow(const TranslatorArguments &arguments) { spv::Id firstId = arguments.instruction[4]; spv::Id secondId = arguments.instruction[5]; - usse::RegisterReference first = getRegister(firstId); - usse::RegisterReference second = getRegister(secondId); + usse::RegisterReference first = getReference(firstId); + usse::RegisterReference second = getReference(secondId); usse::RegisterReference destination = builder.allocateRegister( usse::RegisterBank::Temporary, { usse::Type::Float32, 1, 1 }); @@ -538,7 +552,7 @@ void CompilerGXP::extGLSLPow(const TranslatorArguments &arguments) { arguments.block.createMul(destination, second, destination); arguments.block.createExp(destination, destination); - writeRegister(result, { destination }); + writeReference(result, {destination}); } TranslatorArguments::TranslatorArguments( From 02d9a5c8af11bd434bf87dd185338545f629fba5 Mon Sep 17 00:00:00 2001 From: 1whatleytay <1whatleytay@hdsb.ca> Date: Sun, 5 Jan 2020 16:00:58 -0500 Subject: [PATCH 18/19] Update interface with help message --- src/interface/include/interface/interface.h | 3 ++ src/interface/src/interface.cpp | 50 ++++++++++++++++----- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/src/interface/include/interface/interface.h b/src/interface/include/interface/interface.h index 0c1a374..3f71fc1 100644 --- a/src/interface/include/interface/interface.h +++ b/src/interface/include/interface/interface.h @@ -13,8 +13,11 @@ class Interface { std::string inputFilePath; std::string outputFilePath; + bool help = false; + TranslatorConfig config; + static void printHelp(); bool parseParams(int count, char **args); public: diff --git a/src/interface/src/interface.cpp b/src/interface/src/interface.cpp index d55dfd7..73e8f77 100755 --- a/src/interface/src/interface.cpp +++ b/src/interface/src/interface.cpp @@ -5,14 +5,33 @@ #include -#define ERROR_RETURN_IF(cond, text) if (cond) { fmt::print(text); return false; } +void Interface::printHelp() { + std::string helpMessage = R"(Usage: psp2spvc [options] path/to/input.spv -o path/to/output.gxp + +Options: + -h, --help Shows help message. + -S Print shader assembly to console. + -A Print register allocation messages to console. + -L Print other debug messages to console. + -Oreg-space Enable register space optimization. Required for larger shaders. +)"; + + fmt::print(helpMessage); +} bool Interface::parseParams(int count, char **args) { for (int a = 1; a < count; a++) { if (strcmp(args[a], "-o") == 0) { - ERROR_RETURN_IF(a + 1 >= count, "Error, no output specified.\n") - ERROR_RETURN_IF(!outputFilePath.empty(), "Multiple output files specified.\n") - outputFilePath = args[a + 1]; + if (a + 1 >= count) { + fmt::print("-o option is missing an output path.\n"); + break; + } + + if (!outputFilePath.empty()) + fmt::print("An output path has already been specified, ignoring \"{}\".\n", args[a + 1]); + else + outputFilePath = args[a + 1]; + a++; } else if (strcmp(args[a], "-S") == 0) { // Print Disassembly config.printDisassembly = true; @@ -20,22 +39,31 @@ bool Interface::parseParams(int count, char **args) { config.printAllocations = true; } else if (strcmp(args[a], "-L") == 0) { // Print Optimization Debug Messages config.logDebug = true; + } else if (strcmp(args[a], "-h") == 0 || strcmp(args[a], "--help") == 0) { + help = true; } else if (strcmp(args[a], "-Oreg-space") == 0) { // Optimize Register Space config.optimizeRegisterSpace = true; } else { - ERROR_RETURN_IF(!inputFilePath.empty(), "Multiple input files specified.\n") - inputFilePath = args[a]; + if (!inputFilePath.empty()) + fmt::print("Unknown argument {}. Ignoring.", args[a]); + else + inputFilePath = args[a]; } } - ERROR_RETURN_IF(inputFilePath.empty(), "You must specify an input.\n") - ERROR_RETURN_IF(outputFilePath.empty(), "You must specify an output.\n") + if (!help) { + if (inputFilePath.empty()) + fmt::print("You must specify an input.\n"); + else if (outputFilePath.empty()) + fmt::print("You must specify an output path.\n"); + else + return true; + } - return true; + printHelp(); + return false; } -#undef ERROR_RETURN_IF - int Interface::exec(int count, char **args) { if (!parseParams(count, args)) return 1; From a060f498a54e3883f1e449a25f94f08c21c7e50f Mon Sep 17 00:00:00 2001 From: 1whatleytay <1whatleytay@hdsb.ca> Date: Sun, 5 Jan 2020 16:01:09 -0500 Subject: [PATCH 19/19] Add README.md --- README.md | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..b2884ef --- /dev/null +++ b/README.md @@ -0,0 +1,69 @@ +# psp2spvc - An experimental shader compiler for the Playstation Vita. + +psp2spvc is a program that translates between the SPIRV and GXP formats. +The goal of the project is to provide an easy, open-source way to generate GXP shaders for the Playstation Vita. + +gxmfun shader compatibility: + - [x] clear_f + - [x] clear_v + - [ ] color_f + - [ ] color_v + - [ ] cube_f + - [ ] cube_v + +The project is still early into its development, shaders can be unstable and many features are not yet implemented. +If you found an issue or would like to request a feature, feel free to make an [issue](https://github.com/1whatleytay/psp2spvc/issues). + +### GLSL +[glslangValidator](https://github.com/KhronosGroup/glslang) is the only GLSL compiler verified to work. +Other GLSL compilers may be supported in the future. + +Compile your GLSL as follows: +```shell script +glslangValidator path/to/input.glsl -V -o path/to/output.spv +``` +A Vulkan target is recommended. + +#### Uniforms +As a workaround to the Vulkan target, uniforms in the default buffer should be defined as follows: +```glsl +uniform export_name { type self; } reference_name; +``` +`export_name` is what will be visible in the binary. What will be used for `sceGxmProgramFindParameterByName`. + +`reference_name` will be the name used in the rest of the shader. + +`type` is the type of the uniform. It can be a struct. + +You can refer to the uniform as follows: `reference_name.self`. +OpenGL target/uniform syntax will be provided in the future (minor tweaking is needed). + +## Usage +psp2spvc must be provided with one input SPIRV file and one output GXP path. +The path to the input file should be provided as an argument. +The path to the output file must also be provided as argument but must be preceded by `-o`. +```shell script +psp2spvc path/to/input.spv -o path/to/output.gxp +``` + +### Options +Options can be provided as arguments to modify build or print debug information. + +Option | Description +--- | --- +-S | Print shader assembly. +-A | Print allocation messages. +-L | Print other debug messages. +-Oreg-space | Enable register space optimization. Required for larger shaders. + +## Building +You need [CMake](https://cmake.org/) to build the project. +The following commands will prepare and build the project on UNIX based systems. +```shell script +git submodule update --init --recursive +mkdir build +cmake .. +make +``` + +No prebuilt binaries are provided.