diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 9f84396f..b40e460b 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -2,17 +2,17 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to contribute to this The following people from multiple organizations have contributed to this project: -* (Ventana Micro Systems)[https://www.ventanamicro.com] - * (Arup Chakraborty)[https://github.com/arupc] +* [Ventana Micro Systems]([https://www.ventanamicro.com) + * [Arup Chakraborty](https://github.com/arupc) -* (MIPS)[https://mips.com] - * (Knute Lingaard)[https://github.com/klingaard] - * (Kathlene Magnus)[https://github.com/kathlenemagnus] +* [MIPS](https://mips.com) + * [Knute Lingaard](https://github.com/klingaard) + * [Kathlene Magnus](https://github.com/kathlenemagnus) -* (Condor Computing)[https://condorcomputing.com] - * (Jeff Nye)[https://github.com/jeffnye-gh] +* [Condor Computing](https://condorcomputing.com) + * [Jeff Nye](https://github.com/jeffnye-gh) -* (InCore Semiconductors)[https://incoresemi.com/] - * (Sai Govardhan)[https://github.com/govardhnn] +* [InCore Semiconductors](https://incoresemi.com/) + * [Sai Govardhan](https://github.com/govardhnn) List is incomplete and more contributor names/organizations to be added. diff --git a/arches/isa_json/olympia_uarch_rv64v.json b/arches/isa_json/olympia_uarch_rv64v.json index 23dda4c2..70fb3758 100644 --- a/arches/isa_json/olympia_uarch_rv64v.json +++ b/arches/isa_json/olympia_uarch_rv64v.json @@ -104,7 +104,7 @@ { "mnemonic": "vcompress.vm", "pipe": "vpermute", - "uop_gen": "PERMUTE", + "uop_gen": "COMPRESS", "latency": 4 }, { @@ -302,13 +302,13 @@ { "mnemonic": "vfmv.f.s", "pipe": "v2s", - "uop_gen": "NONE", + "uop_gen": "SCALAR_MOVE", "latency": 1 }, { "mnemonic": "vfmv.s.f", "pipe": "vmv", - "uop_gen": "NONE", + "uop_gen": "SCALAR_MOVE", "latency": 1 }, { @@ -1328,7 +1328,7 @@ { "mnemonic": "vmv.s.x", "pipe": "vmv", - "uop_gen": "NONE", + "uop_gen": "SCALAR_MOVE", "latency": 1 }, { @@ -1352,7 +1352,7 @@ { "mnemonic": "vmv.x.s", "pipe": "v2s", - "uop_gen": "NONE", + "uop_gen": "SCALAR_MOVE", "latency": 1 }, { @@ -1586,25 +1586,25 @@ { "mnemonic": "vrgather.vi", "pipe": "vpermute", - "uop_gen": "PERMUTE", + "uop_gen": "RGATHER", "latency": 4 }, { "mnemonic": "vrgather.vv", "pipe": "vpermute", - "uop_gen": "PERMUTE", + "uop_gen": "RGATHER", "latency": 4 }, { "mnemonic": "vrgather.vx", "pipe": "vpermute", - "uop_gen": "PERMUTE", + "uop_gen": "RGATHER", "latency": 4 }, { "mnemonic": "vrgatherei16.vv", "pipe": "vpermute", - "uop_gen": "PERMUTE", + "uop_gen": "RGATHER", "latency": 4 }, { @@ -1766,25 +1766,25 @@ { "mnemonic": "vslidedown.vi", "pipe": "vpermute", - "uop_gen": "PERMUTE", + "uop_gen": "SLIDEDOWN", "latency": 4 }, { "mnemonic": "vslidedown.vx", "pipe": "vpermute", - "uop_gen": "PERMUTE", + "uop_gen": "SLIDEDOWN", "latency": 4 }, { "mnemonic": "vslideup.vi", "pipe": "vpermute", - "uop_gen": "PERMUTE", + "uop_gen": "SLIDEUP", "latency": 4 }, { "mnemonic": "vslideup.vx", "pipe": "vpermute", - "uop_gen": "PERMUTE", + "uop_gen": "SLIDEUP", "latency": 4 }, { diff --git a/core/InstArchInfo.cpp b/core/InstArchInfo.cpp index 0f618984..3600a06a 100644 --- a/core/InstArchInfo.cpp +++ b/core/InstArchInfo.cpp @@ -77,8 +77,12 @@ namespace olympia {"INT_EXT", InstArchInfo::UopGenType::INT_EXT}, {"SLIDE1UP", InstArchInfo::UopGenType::SLIDE1UP}, {"SLIDE1DOWN", InstArchInfo::UopGenType::SLIDE1DOWN}, - {"PERMUTE", InstArchInfo::UopGenType::PERMUTE}, - {"NONE", InstArchInfo::UopGenType::NONE}}; + {"SCALAR_MOVE", InstArchInfo::UopGenType::SCALAR_MOVE}, + {"RGATHER", InstArchInfo::UopGenType::RGATHER}, + {"COMPRESS", InstArchInfo::UopGenType::COMPRESS}, + {"WHOLE_REG_MOVE", InstArchInfo::UopGenType::WHOLE_REG_MOVE}, + {"NONE", InstArchInfo::UopGenType::NONE}, + }; void InstArchInfo::update(const nlohmann::json & jobj) { diff --git a/core/InstArchInfo.hpp b/core/InstArchInfo.hpp index e1ab1e15..8b315f24 100644 --- a/core/InstArchInfo.hpp +++ b/core/InstArchInfo.hpp @@ -90,11 +90,16 @@ namespace olympia REDUCTION, REDUCTION_WIDE, INT_EXT, + SLIDEUP, + SLIDEDOWN, SLIDE1UP, SLIDE1DOWN, - PERMUTE, - NONE, - UNKNOWN + SCALAR_MOVE, + RGATHER, + COMPRESS, + WHOLE_REG_MOVE, + UNKNOWN, + NONE }; static constexpr uint32_t N_UOP_GEN_TYPES = static_cast(UopGenType::NONE); diff --git a/core/vector/VectorUopGenerator.cpp b/core/vector/VectorUopGenerator.cpp index 153421b6..cca25a65 100644 --- a/core/vector/VectorUopGenerator.cpp +++ b/core/vector/VectorUopGenerator.cpp @@ -169,8 +169,15 @@ namespace olympia // Exe Uop 2: vrgather.vv v21, v9 // Exe Uop 3: vrgather.vv v22, v10 // Exe Uop 4: vrgather.vv v23, v11 - uop_gen_function_map_.emplace(InstArchInfo::UopGenType::PERMUTE, - &VectorUopGenerator::generatePermuteUops_); + // uop_gen_function_map_.emplace(InstArchInfo::UopGenType::RGATHER, + // &VectorUopGenerator::generatePermuteUops_); + + // Vector scalar move uop generator + // Integer Scalar Move + // Floating-Point Scalar Move + uop_gen_function_map_.emplace( + InstArchInfo::UopGenType::SCALAR_MOVE, + &VectorUopGenerator::generateScalarMoveUops_); } void VectorUopGenerator::onBindTreeLate_() { mavis_facade_ = getMavis(getContainer()); } @@ -359,7 +366,7 @@ namespace olympia } } - // For narrowing insturction, + // For narrowing instruction, if constexpr (Type == InstArchInfo::UopGenType::NARROWING) { sparta_assert(src_rs3.field_id != mavis::InstMetaData::OperandFieldID::NONE, @@ -479,9 +486,10 @@ namespace olympia return makeInst_(srcs, dests); } - InstPtr VectorUopGenerator::generatePermuteUops_() + template + InstPtr VectorUopGenerator::generateScalarMoveUops_() { - sparta_assert(false, "Vector permute uop generation is currently not supported!"); + sparta_assert(false, "Vector Scalar move implementation TODO ..."); } InstPtr VectorUopGenerator::makeInst_(const mavis::OperandInfo::ElementList & srcs, diff --git a/core/vector/VectorUopGenerator.hpp b/core/vector/VectorUopGenerator.hpp index 0af3b9bb..30de93fa 100644 --- a/core/vector/VectorUopGenerator.hpp +++ b/core/vector/VectorUopGenerator.hpp @@ -89,7 +89,9 @@ namespace olympia template InstPtr generateSlideUops_(); - InstPtr generatePermuteUops_(); +// InstPtr generatePermuteUops_(); + + template InstPtr generateScalarMoveUops_(); InstPtr makeInst_(const mavis::OperandInfo::ElementList & srcs, const mavis::OperandInfo::ElementList & dests); diff --git a/docs/vector_permutation.adoc b/docs/vector_permutation.adoc new file mode 100644 index 00000000..58db32a7 --- /dev/null +++ b/docs/vector_permutation.adoc @@ -0,0 +1,418 @@ +:doctitle: Olympia Vector Permutation Design Document + +:toc: + +[[Document_Information]] +== Document Information + +TODO + +[[Revision_History]] +=== Revision History + +[width="100%",cols="11%,11%,16%,62%",options="header",] +|=== +|*Revision* |*Date* |*Author* |*Summary of Changes* +|0.1 | 2025.04.TODO | Sai Govardhan | Initial Vector Permutations +Design Document +|=== + +[[Conventions_and_Terminology]] +=== Conventions and Terminology + + +[width="100%",cols="17%,83%",options="header",] +|=== +|Label |Description +| VLSU | Vector Load Store Unit +| VLEN | Vector Register Length (1024 bits in Olympia) +| SEW | Selected Element Width +| LMUL | Vector Register Group Multiplier +| ELEN | Maximum Vector Element Width +| VTA | Vector Tail Agnostic +// TODO MORE +|=== +[[Related_Documents]] +=== Related Documents + +// + +[width="100%",cols="25%,75%",options="header",] +|=== +|*Title* |*Description* +| The RISC-V Vector ISA (v1.0) | TODO +// | Saturn Vectors | TODO +// | The vector thesis | TODO +// Cray reference? +// Chipsalliance T1? +// Tenstorrent Ocelot? +// Barcaelona Supercomputing Group slides? +|=== + +[[Notes_Open_Issues]] +=== Notes/Open Issues + +// + +* Note1 +* Note 2 + +[[OVERVIEW]] +== OVERVIEW +The following is the directory structure of olympia, for reference: + +```bash +. +├── arches +├── CMakeLists.txt +├── CodingStyle.md +├── conda +├── CONTRIBUTING.md +├── CONTRIBUTORS.md +├── core ## Consists of the vector/ directory +├── docs +├── fsl +├── layouts +├── LICENSE +├── mavis +├── mss +├── README.md +├── release +├── reports +├── sim +├── stf_lib +├── test +├── test.json +└── traces +``` + +We shall implement the Vector Permutation instructions in the `core/vector/`, make some modifications to the `core/InstGenerator.cpp` and run tests in the `test/core/vector/` directory. The following document lists down the instructions we have implemented, the changes we have made to the existing files and the architecture of these new vector perumute implementations. + +=== Configuring the Vector Unit + +Olympia implements the Vector Unit in the `core/vector/` directory where: + + - `VLEN` is the width of the vector register statically set to 1024 + + - `ELEN`, the Maximum Vector Element Width is specified based on `sew_` + (Selected Element Width) + +Within the `core/vector/VectorConfig.hpp` file, the `VectorConfig` class is +defined to configure the Vector Unit. + +``` +VectorConfig(uint32_t vl, uint32_t sew, uint32_t lmul, uint32_t vta) +``` + +A sample assembly instruction is: + +``` +vsetvli t0, a0, e32, m1 # Configure vector unit where a0 specifies the vector +length (vl_), sew_=32, lmul_=1 + +``` + +The `vlmax_`, the maximum vector length is set to `((VLEN / sew_) * lmul_)`. + +We would be using a subset of `vlmax_` by specifying the `vl_` in the vector +configuration. + +Take an example where VLEN is set to 1024, `sew_` is 32 bits and `lmul_` is 1. +Then `vlmax_` is ((1024/32)*1) = 32. Which means that there is one logical +Vector register is divided into 32 elements of 32 bits each. + +If we set Vector Length (that we would use) `vl_` to 16, then we are using 16 +elements of 32 `vlmax_` elements we could use in the logical vector register +file instance. + +Note that the `vta_` (Vector Tail Agnostic) parameter is set to false by +default, which indicates that it is undisturbed. + + +// + +=== How are the Vector Uops generated? + +We decode and determine the instructions as Vector instructions in the +`core/decode/Decode.cpp` file. + +```cpp +vector_enabled_(true), + vector_config_(new VectorConfig(p->init_vl, p->init_sew, p->init_lmul, p->init_vta)), +``` + +We feed Mavis with the Vector Permutation instructions in json format as specified in the +`mavis/json/isa_rv64v.json` and the `mavis/json/isa_rv64vf.json` files for both +the Base Vector instructions and the Vector Floating Point instructions. + +The `core/vector/VectorUopGenerator.hpp` file implements the Vector Uop +Generator. + + +### Adding Support to Vector Permutation instructions + +- Instruction Architecture Info: + + . `core/InstArchInfo.{hpp}/{cpp}`: + .. Already has `VPERMUTE` in TargetPipe enum + .. Need to ensure proper UopGenType for permutation, to add: + ... `SCALAR_MOVE` + ... `SLIDE1UP` + ... `SLIDE1DOWN` + ... `SLIDEUP` + ... `SLIDEDOWN` + ... `RGATHER` + ... `COMPRESS` + ... `WHOLE_REG_MOVE` + + . `mavis/json/isa_rv64v.json`: + .. Define vector permutation instruction encodings + .. Specify operand types and fields + + . `core/execute/IssueQueue.hpp`: + .. Configure scheduler for vector permute operations + + . `core/execute/Execute.cpp`: + .. Handle execution of permute operations + + . `core/vector/VectorConfig.hpp`: + .. Already has basic vector config (VLEN, SEW, LMUL) + .. May need updates for permute-specific settings + +The files we shall be modifying: + +. `core/InstArchInfo.hpp` + - UopGenType to be updated to specific implementations of Vector Permutation instructions, to remove the `PERMUTE` entry + +. `core/vector/VectorUopGenerator.hpp` + - Currently has stub for `generatePermuteUops_` + +. `core/vector/VectorUopGenerator.cpp` + - Add implementation for specific permutation ops and replace `generatePermuteUops_` + +```cpp + uop_gen_function_map_.emplace(InstArchInfo::UopGenType::PERMUTE, + &VectorUopGenerator::generatePermuteUops_); +``` + +. `test/core/vector/Vector_test.cpp`: + - Add test cases for vector permutation instructions + + +#### List of all the Vector Permutation Instructions to be implemented: + +##### Vector Scalar Move Instructions + +Integer Scalar Move + + . vmv.x.s rd, vs2 # x[rd] = vs2[0] + . vmv.s.x vd, rs1 # vd[0] = x[rs1] + +Floating-Point Scalar Move + + . vfmv.f.s rd, vs2 # f[rd] = vs2[0] (rs1=0) + . vfmv.s.f vd, rs1 # vd[0] = f[rs1] (vs2=0) + + +Key points: + + - Ignores LMUL and vector register groups + - Operates even if vstart ≥ vl or vl=0 + - Handles SEW vs XLEN width differences + + +##### Vector Slide Instructions + + . vslideup.vx/vi # vd[i+OFFSET] = vs2[i] + . vslidedown.vx/vi # vd[i] = vs2[i+OFFSET] + . vslide1up.vx # vd[0]=x[rs1], vd[i+1]=vs2[i] + . vslide1down.vx # vd[i]=vs2[i+1], vd[vl-1]=x[rs1] + +Critical behaviors: + + - No operation if vstart ≥ vl + - Follows tail/mask policies + - Source/dest register groups cannot overlap + - OFFSET from x-reg or immediatte +// - TODO MORE + +##### Vector Register Gather + + . vrgather.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; + . vrgatherei16.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; + . vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[x[rs1]] + . vrgather.vi vd, vs2, uimm, vm # vd[i] = (uimm >= VLMAX) ? 0 : vs2[uimm] + +Requirements: + + - Out-of-range indices return 0 + - No source/dest overlap allowed + - Handles different element widths +// - TODO MORE + +##### Vector Compress + + . vcompress.vm # Pack masked elements contiguously + +[[Overview_Block_Diagram]] + +##### Whole Vector Register Move + + . vmv1r.v v1, v2 # Copy v1=v2 + . vmv2r.v v10, v12 # Copy v10=v12; v11=v13 + . vmv4r.v v4, v8 # Copy v4=v8; v5=v9; v6=v10; v7=v11 + . vmv8r.v v0, v8 # Copy v0=v8; v1=v9; ...; v7=v15 + + +=== Overview Block Diagram + +// +. TODO + +[[Functional_Description]] +== Functional Description + +// +. TODO + +=== Taking an example of implementing the vector move instructions + +. vmv.x.s rd, vs2 # x[rd] = vs2[0] + +.. We add the instruction in `mavis/json/isa_rv64v.json` file + +.. Add the instruction to `core/InstArchInfo.hpp` file + +```cpp + enum class UopGenType + { + ... + SCALAR_MOVE + ... + } +``` + +.. Add a new function for SCALAR_MOVE and declare it in the header. + +```cpp + InstPtr generateScalarMoveUops_(); +``` + +.. Add the new function in the `core/vector/VectorUopGenerator.cpp` file. + +```cpp + + InstPtr VectorUopGenerator::generateScalarMoveUops_() + { + } +``` + +5. Add the tests to the `test/core/vector/Vector_test.cpp` file. + +[[Unit_Block_Diagram]] +=== Unit Block Diagram + +// +// image:media/image1.png[image,width=576,height=366] +// Figure 1 - Sample Figure +1. Vector Scalar Move Instruction + + +[[Block_Diagram_Description]] +=== Block Diagram Description + + +// + +// [[Description_of_Block_B1]] +// == Description of Block + +// +. TODO + +[[Operation]] +=== Operation + +// + +1. Vector Scalar Move Instruction + +. `vmv.x.s rd, vs2 # x[rd] = vs2[0] (vs1=0)` +- Performs its operation even if vstart ≥ vl or vl=0. +- If SEW > XLEN, the least-signi cant XLEN bits are transferred and the upper SEW-XLEN bits are ignored. +- If SEW < XLEN, the value is sign-extended to XLEN bits + +[[Interfaces]] +=== Interfaces + +// +. TODO + +[width="100%",cols="18%,21%,61%",options="header",] +|=== +|*Name* |*C++ Type* |*Purpose/Description* +| | | +| | | +| | | +|=== + +[[CPP_Class_Description]] +=== C++ Class Description + +//