micro54 artifact commit

harvard-acc · Sep 25, 2021 · ad305b0 · ad305b0
commit ad305b0
Show file tree

Hide file tree

Showing 259 changed files with 139,678 additions and 0 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "hw/matchlib"]
+	path = hw/matchlib
+	url = https://github.com/NVlabs/matchlib.git
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,13 @@
+# Harvard University - all rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/README.md b/README.md
@@ -0,0 +1,34 @@
+EdgeBERT
+========
+
+EdgeBERT is a HW/SW co-design enabling sentence-level energy optimizations for latency-aware multi-task NLP inference. In this repo, we provide both the software and hardware modelings. For full details, please check out [this paper](https://arxiv.org/pdf/2011.14203.pdf). 
+
+<img src="images/edgebert_overview.png" width="3000" height="330">
+
+## Directory structure
+
+* `hw/cmod/include/*.h` contains header files for functions and classes from EdgeBERT
+* `hw/cmod/<module>` sub-directories contain EdgeBERT SystemC modules
+* `hw/matchlib` library of synthesizable hardware components from NVIDIA
+
+* `sw/Entropy_LUT` contains the entropy datasets and a notebook to train the entropy prediction LUT (as well as a sample pretrained LUT)
+* `sw/EdgeBERT/transformers` contains a modified version of the HuggingFace Transformers library
+* `sw/EdgeBERT/examples` contains python scripts for training and evaluating models
+* `sw/EdgeBERT/scripts` contains shell scripts to run our software workflow
+
+## Citation
+
+If you find this resource useful, please consider citing the following paper:
+```
+@inproceedings{edgebert_micro2021, 
+  author = {Tambe, Thierry and Hooper, Coleman and Pentecost, Lillian and Jia, Tianyu 
+            and Yang, En-Yu and Donato, Marco and Sanh, Victor and Whatmough, Paul N.
+            and Rush, Alexander M. and Brooks, David and Wei, Gu-Yeon}, 
+  title = {EdgeBERT: Sentence-Level Energy Optimizations for Latency-Aware Multi-Task NLP Inference},
+  publisher = {Association for Computing Machinery},
+  booktitle = {Proceedings of the 54th Annual IEEE/ACM International Symposium on Microarchitecture},
+  year = {2021},
+}
+```
+## Contact Us
+For any further questions please contact [email protected]
diff --git a/hw/README.md b/hw/README.md
@@ -0,0 +1,22 @@
+## Tool versions and environment setup
+
+C++ simulation and HLS of the EdgeBERT top-level hardware accelerator and children modules have been verified to work with the following tool versions:
+
+* `gcc` - 4.9.3 (with C++11)
+* `systemc` - 2.3.1
+* `boost` - 1.55.0 
+* `catapult` - 10.5a or newer
+
+In the cmod/cmod_Makefile, please provide the correct tool installation paths for BOOST_HOME, SYSTEMC_HOME and CATAPULT_HOME 
+
+
+## Build and run
+
+### C++ compile and simulation of SystemC module
+
+The following commands run C++ compilation and simulation of the EdgeBERT accelerator Top-level, executing memory storage, followed by sparse PE execution, layer normalization, softmax, entropy assessment and finally DVFS control.
+
+    git clone --recursive https://github.com/harvard-acc/EdgeBERT.git
+    cd EdgeBERT/hw/cmod/TopAccel
+    make
+    make run
diff --git a/hw/cmod/Accum/Accum.h b/hw/cmod/Accum/Accum.h
@@ -0,0 +1,130 @@
+
+#ifndef __ACCUM_H__
+#define __ACCUM_H__
+
+#include <systemc.h>
+#include <nvhls_int.h>
+#include <nvhls_types.h>
+#include <nvhls_vector.h>
+#include <nvhls_connections.h>
+//#include <ac_std_float.h>
+#include <ArbitratedScratchpad.h>
+#include "../include/Spec.h"
+#include "../include/AdpfloatSpec.h"
+#include "../include/AdpfloatUtils.h"
+
+// TODO may need to add input channels (fixed config pattern is OK)
+SC_MODULE(Accum)
+{
+  public:
+  sc_in_clk     clk;
+  sc_in<bool>   rst;
+  static const int kDebugLevel = 0;
+
+  bool    is_relu;
+  bool    is_bias;
+  NVINT8 weight_bias;
+  spec::AdpfloatBiasType adf_accum_bias;
+  NVUINT5 accum_right_shift;
+
+  Connections::In<spec::AccelConfig>   accel_config;
+  Connections::In<spec::AccumVectorType>  vec_in;  // accume data in adpfloat format
+  Connections::In<bool> send_out;
+  Connections::Out<spec::VectorType>  vec_out; // randomly set array size 
+
+  SC_HAS_PROCESS(Accum);
+  Accum(sc_module_name name_) : sc_module(name_) {
+    SC_THREAD (Run);
+    sensitive << clk.pos();
+    NVHLS_NEG_RESET_SIGNAL_IS(rst);
+  }
+
+  spec::AccumMatrixType accum_mat;
+
+  void Run() {
+    accel_config.Reset();
+    vec_in.Reset();
+    send_out.Reset();
+    vec_out.Reset();
+
+    NVUINT5 in_ctr = 0;
+
+    is_relu = 0;
+    is_bias = 0;
+    weight_bias = 0;
+    adf_accum_bias = 0;
+    accum_right_shift = 0;
+
+    #pragma hls_pipeline_init_interval 1
+    while(1) {
+      spec::AccelConfig accel_config_tmp;
+      spec::AccumVectorType vec_in_reg;
+      spec::AccumVectorType accum_vector_out;
+      bool send_out_reg;
+      spec::VectorType vec_out_reg;
+
+      if (accel_config.PopNB(accel_config_tmp)) {
+        is_relu = accel_config_tmp.is_relu;
+        is_bias = accel_config_tmp.is_bias;
+        weight_bias = accel_config_tmp.weight_bias;
+        adf_accum_bias = accel_config_tmp.adf_accum_bias;
+        accum_right_shift = accel_config_tmp.accum_right_shift;
+      }
+
+      if (vec_in.PopNB(vec_in_reg)) {
+        //CDCOUT(sc_time_stamp()  << name() << " - DUT: Accum - in_ctr is: " << in_ctr << endl, 0);
+        CDCOUT(sc_time_stamp()  << name() << " - DUT: Accum - Received Datapath Output wth in_ctr: " << in_ctr << endl, 0);
+        if (vec_in_reg.to_rawbits() != 0) {
+          #pragma hls_unroll yes
+          for (int i = 0; i < spec::kVectorSize; i++) {
+            accum_mat[in_ctr][i] += vec_in_reg[i];
+          }
+        }
+        if (in_ctr == spec::kVectorSize-1) {
+          in_ctr = 0;
+        } else {
+          in_ctr += 1;
+        }
+      }
+
+      if (send_out.PopNB(send_out_reg)) {
+         if (send_out_reg != 0) {
+             CDCOUT(sc_time_stamp()  << name() << " - DUT: Accum - Received send_out from Datapath with is_relu: " << is_relu << endl, 0);
+             #pragma hls_pipeline_init_interval 1
+             for (int i = 0; i < spec::kVectorSize; i++) {  
+                 #pragma hls_unroll yes
+                 for (int j = 0; j < spec::kVectorSize; j++) {   
+                     // AdapativFloat right shift
+                     accum_vector_out[j] = accum_mat[i][j] >> accum_right_shift;
+
+                     //bias addition
+                     if (is_bias==1) accum_vector_out[j] += weight_bias;
+
+                     //Relu
+                     if ((is_relu==1) && (accum_vector_out[j] < 0)) accum_vector_out[j] = 0;
+
+                     //Truncation
+                     if (accum_vector_out[j] > spec::kActWordMax)
+                        accum_vector_out[j] = spec::kActWordMax;
+                     else if (accum_vector_out[j] < spec::kActWordMin) 
+                        accum_vector_out[j] = spec::kActWordMin;
+
+                     // quantize back to AdaptivFloat (8 cycles)
+                     AdpfloatType<8,3> _tmp;
+                     _tmp.set_value_fixed<16, 12>(accum_vector_out[j], adf_accum_bias);
+                     vec_out_reg[j] = _tmp.to_rawbits();
+                     accum_mat[i][j] = 0;
+                 }  
+                 vec_out.Push(vec_out_reg);
+                 CDCOUT(sc_time_stamp()  << name() << " - DUT: Accum - pushing vectors to Encoder: " << endl, 0);
+                 wait();    
+             }
+         } // if send_out != 0
+      } // if send_out
+      wait();
+    } // while
+  }  // Run
+};
+
+
+#endif
diff --git a/hw/cmod/Accum/Makefile b/hw/cmod/Accum/Makefile
@@ -0,0 +1,34 @@
+#
+#  All rights reserved - Harvard University. 
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the "License"); 
+#  you may not use this file except in compliance with the License.  
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+# 
+
+CFLAGS = -DHLS_ALGORITHMICC -DVECTOR_SIZE=16
+include ../cmod_Makefile
+
+
+all: sim_test
+
+run:
+	./sim_test
+
+sim_test: $(wildcard *.h) $(wildcard *.cpp)
+	$(CC) -o sim_test $(CFLAGS) $(USER_FLAGS) $(wildcard *.cpp) $(BOOSTLIBS) $(LIBS)
+
+sim_clean:
+	rm -rf *.o sim_*