pulp-platform · lukamac · Dec 30, 2024 · Dec 18, 2024 · Dec 18, 2024 · Nov 11, 2024
diff --git a/.github/workflows/test-neureka.yml b/.github/workflows/test-neureka.yml
@@ -12,4 +12,5 @@ jobs:
         working-directory: test
         run: |
           source /pulp-sdk/configs/siracusa.sh
-          pytest test.py -T tests -R -A neureka
+          pytest test.py -T tests -R -A neureka --build-flow=make --wmem=tcdm
+          pytest test.py -T tests -R -A neureka --build-flow=make --wmem=sram
diff --git a/.github/workflows/test-neureka_v2.yml b/.github/workflows/test-neureka_v2.yml
@@ -0,0 +1,14 @@
+name: Test Neureka v2
+on: push
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    container: ghcr.io/pulp-platform/pulp-nnx:main-test
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Run test
+        shell: bash
+        working-directory: test
+        run: |
+          pytest test.py -T tests -R -A neureka_v2 --build-flow=cmake --wmem=mram
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,9 +1,11 @@
 # Changelog
 
-## [Unreleased]
+## [0.4.0] - 2024-12-30
 
 ### Added
 
+- NnxBuildFlow and CmakeBuildFlow
+- Neureka V2 support
 - github action for testing neureka
 - add NnxMapping dictionary that maps accelerator name to the accelerator specific classes
 - choice of data generation method (ones, incremented, or random)
@@ -18,6 +20,8 @@
 
 ### Changed
 
+- wmem is no more a test configuration argument but a command line argument
+- neureka is now tested with a more recent gcc version
 - python requirements are changed into requirements-pip and requirements-conda
 - conftest now passes only strings to test.py to improve readability of pytest logs
 - NnxMemoryLayout is now NnxWeight and also has a method for source generation

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -12,8 +12,9 @@ target_include_directories(pulp-nnx PUBLIC inc util)
 
 option(USE_NE16 "Use the NE16 accelerator.")
 option(USE_NEUREKA "Use the N-EUREKA accelerator.")
+option(USE_NEUREKA_V2 "Use the N-EUREKA v2 accelerator.")
 
-if (NOT ${USE_NE16} AND NOT ${USE_NEUREKA})
+if (NOT ${USE_NE16} AND NOT ${USE_NEUREKA} AND NOT ${USE_NEUREKA_V2})
 	message(FATAL_ERROR "[PULP-NNX] No accelerator in use. Please set an appropriate USE_<acc> option.")
 endif()
 
@@ -50,3 +51,20 @@ if (${USE_NEUREKA})
 			neureka/gvsoc
 		)
 endif()
+
+if (${USE_NEUREKA_V2})
+	message(STATUS "[PULP-NNX] Using the N-EUREKA v2 accelerator.")
+	target_sources(pulp-nnx
+		PRIVATE
+			neureka_v2/bsp/neureka_v2_siracusa_bsp.c
+			neureka_v2/hal/neureka_v2.c
+			neureka_v2/hal/neureka_v2_task.c
+			src/pulp_nnx_neureka_v2.c
+		)
+	target_include_directories(pulp-nnx
+		PUBLIC
+			neureka_v2/bsp
+			neureka_v2/hal
+			neureka_v2/gvsoc
+		)
+endif()
diff --git a/README.md b/README.md
@@ -41,6 +41,7 @@ _Note: The accelerator can provide additional helper functions if needed._
 
 - [NE16](ne16/README.md)
 - [Neureka](neureka/README.md)
+- [Neureka v2](neureka_v2/README.md)
 
 ## Testing
 
@@ -50,15 +51,15 @@ You can find information about testing in the dedicated [README](test/README.md)
 
 The library was tested with following pairs of SDKs and compilers:
 
-| SDK | SDK Commit Hash | Compiler | Compiler Commit Hash |
-| --- | --------------- | -------- | -------------------- |
-| gap\_sdk (obtainable from GreenWaves Technologies) | 90df4ce219 | [gap\_gnu\_toolchain](https://github.com/GreenWaves-Technologies/gap_gnu_toolchain) | 360fd4f9d6 |
-| [pulp-sdk](https://github.com/Scheremo/pulp-sdk) | c216298881 | [pulp-riscv-gnu-toolchain](https://github.com/GreenWaves-Technologies/gap_gnu_toolchain) | 9938bd8fcf (release v1.0.16) |
+| Accelerator | SDK | SDK Commit Hash | Compiler | Compiler Commit Hash |
+| ----------- | --- | --------------- | -------- | -------------------- |
+| NE16 | gap\_sdk (obtainable from GreenWaves Technologies) | 90df4ce219 | [gap\_gnu\_toolchain](https://github.com/GreenWaves-Technologies/gap_gnu_toolchain) | 360fd4f9d6 |
+| Neureka, Neureka v2 | [pulp-sdk](https://github.com/Scheremo/pulp-sdk) | c216298881 | [riscv-gnu-toolchain](https://github.com/pulp-platform/riscv-gnu-toolchain.git) | 11ba51e (release v2.6.0) |
 
 ## Contributing
 
 Bug reports and feature requests should be reported through issues.
-All the development should be done through forks and merged onto the `dev` branch with pull requests.
+All the development should be done through forks and merged onto the `main` branch with pull requests.
 
 ## Versioning
 

diff --git a/inc/pulp_nnx_neureka_v2.h b/inc/pulp_nnx_neureka_v2.h
@@ -0,0 +1,65 @@
+/*
+ * Luka Macan <[email protected]>
+ *
+ * Copyright 2023 ETH Zurich and University of Bologna
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "neureka_v2.h"
+#include "neureka_v2_siracusa_bsp.h"
+#include "neureka_v2_task.h"
+#include <stdint.h>
+
+/* PULP-NNX interface */
+
+void neureka_v2_nnx_init(const neureka_v2_dev_t *dev,
+                         neureka_v2_siracusa_conf_t *conf);
+void neureka_v2_nnx_term(const neureka_v2_dev_t *dev);
+
+/** neureka_v2_nnx_dispatch_check
+ *
+ * Check whether you can dispatch to the accelerator.
+ */
+int neureka_v2_nnx_dispatch_check(const neureka_v2_dev_t *dev);
+
+/** neureka_v2_nnx_dispatch_wait
+ *
+ * Block until you can dispatch to the accelerator.
+ */
+void neureka_v2_nnx_dispatch_wait(const neureka_v2_dev_t *dev);
+
+/** neureka_v2_nnx_dispatch
+ *
+ * Dispatch a task to the accelerator.
+ * Fails with return code 1 if the task cannot be dispatched. Otherwise returns
+ * 0.
+ */
+int neureka_v2_nnx_dispatch(const neureka_v2_dev_t *dev,
+                            neureka_v2_task_t *task);
+
+/** neureka_v2_nnx_resolve_check
+ *
+ * Check whether the task has been resolved.
+ */
+int neureka_v2_nnx_resolve_check(const neureka_v2_dev_t *dev,
+                                 neureka_v2_task_t *task);
+
+/** neureka_v2_nnx_resolve_wait
+ *
+ * Block until you can resolve the task.
+ */
+void neureka_v2_nnx_resolve_wait(const neureka_v2_dev_t *dev,
+                                 neureka_v2_task_t *task);
diff --git a/neureka_v2/README.md b/neureka_v2/README.md
@@ -0,0 +1,37 @@
+# Neureka v2
+
+## Docs
+
+Gvsoc model repo [link](https://github.com/lukamac/gvsoc-pulp/tree/fix-vectorload).
+
+## Implemented features
+
+- [x] Convolution w/ kernel shape 1x1
+- [x] Convolution w/ kernel shape 3x3
+- [x] Depthwise convolution w/ kernel shape 3x3
+- [ ] Normalization and quantization
+    - [x] With
+    - [ ] Without
+    - [x] Relu (w/ and w/o)
+    - [x] Bias (w/ and w/o)
+    - [ ] Per-channel shift
+    - [x] Per-layer shift
+- [x] Input type
+    - [x] uint8
+    - [x] int8
+- [x] Output type
+    - [x] int8
+    - [x] uint8 (only w/ Relu)
+    - [ ] int32
+- [x] Scale type
+    - [x] int8
+    - [ ] int32
+- [x] Bias type
+    - [x] int32
+- [ ] Weight type
+    - [x] int8
+    - [ ] int2-7
+- [ ] Weight memory
+    - [ ] Shared TCDM
+    - [x] Private SRAM
+    - [x] Private MRAM
diff --git a/neureka_v2/bsp/neureka_v2_siracusa_bsp.c b/neureka_v2/bsp/neureka_v2_siracusa_bsp.c
@@ -0,0 +1,78 @@
+/*
+ * Luka Macan <[email protected]>
+ *
+ * Copyright 2023 ETH Zurich and University of Bologna
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "neureka_v2_siracusa_bsp.h"
+#include <pmsis.h>
+
+#define NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_BASE_ADDR (0x00200000)
+#define NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_OFFS 0x18
+#define NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_ADDR                             \
+  (NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_BASE_ADDR +                                \
+   NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_OFFS)
+#define NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_MASK_HCI_PRIO 0x100
+#define NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_MASK_HCI_MAXSTALL 0xff
+#define NEUREKA_V2_SIRACUSA_MAX_STALL (8)
+#define NEUREKA_V2_SIRACUSA_EVENT (1 << 12)
+#define NEUREKA_V2_SIRACUSA_BASE_ADDR (0x00201000)
+#define NEUREKA_V2_SIRACUSA_WEIGHT_MEM_BASE_ADDR (0x10400000)
+#define NEUREKA_V2_SIRACUSA_WEIGHT_MEM_MRAM_OFFSET (0x00000000)
+#define NEUREKA_V2_SIRACUSA_WEIGHT_MEM_SRAM_OFFSET (0x00400000)
+
+void neureka_v2_siracusa_hci_setpriority_neureka_v2() {
+  *(volatile uint32_t *)NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_ADDR |=
+      NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_MASK_HCI_PRIO;
+}
+
+void neureka_v2_siracusa_hci_setpriority_core() {
+  *(volatile uint32_t *)NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_ADDR &=
+      ~NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_MASK_HCI_PRIO;
+}
+
+void neureka_v2_siracusa_hci_reset_max_stall() {
+  *(volatile uint32_t *)NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_ADDR &=
+      ~NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_MASK_HCI_MAXSTALL;
+}
+
+void neureka_v2_siracusa_hci_set_max_stall(uint32_t max_stall) {
+  *(volatile uint32_t *)NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_ADDR |=
+      max_stall & NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_MASK_HCI_MAXSTALL;
+}
+
+void neureka_v2_siracusa_open(neureka_v2_siracusa_conf_t *conf) {
+  neureka_v2_siracusa_hci_setpriority_neureka_v2();
+  neureka_v2_siracusa_hci_set_max_stall(conf->max_stall);
+}
+
+void neureka_v2_siracusa_close() {
+  neureka_v2_siracusa_hci_reset_max_stall();
+  neureka_v2_siracusa_hci_setpriority_core();
+}
+
+void neureka_v2_siracusa_event_wait_and_clear() {
+  eu_evt_maskWaitAndClr(NEUREKA_V2_SIRACUSA_EVENT);
+}
+
+static const neureka_v2_dev_t neureka_v2_siracusa_dev = {
+    .hwpe_dev = (struct hwpe_dev_t){
+        .base_addr = (volatile uint32_t *)NEUREKA_V2_SIRACUSA_BASE_ADDR}};
+
+const neureka_v2_dev_t *neureka_v2_siracusa_get_dev() {
+  return &neureka_v2_siracusa_dev;
+}
diff --git a/neureka_v2/bsp/neureka_v2_siracusa_bsp.h b/neureka_v2/bsp/neureka_v2_siracusa_bsp.h
@@ -0,0 +1,67 @@
+/*
+ * Luka Macan <[email protected]>
+ *
+ * Copyright 2023 ETH Zurich and University of Bologna
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef __NEUREKA_V2_SIRACUSA_BSP_H__
+#define __NEUREKA_V2_SIRACUSA_BSP_H__
+
+#include "neureka_v2.h"
+#include <stdint.h>
+
+/**
+ * neureka_v2_siracusa_setpriority_neureka_v2
+ *
+ * Set HCI interconnect bus priority to prioritize neureka_v2.
+ */
+void neureka_v2_siracusa_hci_setpriority_neureka_v2();
+
+/**
+ * neureka_v2_siracusa_setpriority_core
+ *
+ * Set HCI bus priority to prioritize cores.
+ */
+void neureka_v2_siracusa_hci_setpriority_core();
+
+/**
+ * neureka_v2_siracusa_hci_reset_maxstall
+ *
+ * Reset the HCI bus maxstall parameter.
+ * TODO: Check if it disables it also or just resets?
+ */
+void neureka_v2_siracusa_hci_reset_max_stall();
+
+/**
+ * neureka_v2_siracusa_hci_set_maxstall
+ *
+ * Set the HCI bus maxstall. Maxstall defines how many cycles
+ * will the HCI bus stall the lower priority master, i.e. neureka_v2 or core,
+ * before letting it do a transaction.
+ */
+void neureka_v2_siracusa_hci_set_max_stall(uint32_t max_stall);
+
+typedef struct neureka_v2_siracusa_conf_t {
+  int max_stall;
+} neureka_v2_siracusa_conf_t;
+
+void neureka_v2_siracusa_open(neureka_v2_siracusa_conf_t *conf);
+void neureka_v2_siracusa_close();
+void neureka_v2_siracusa_event_wait_and_clear();
+const neureka_v2_dev_t *neureka_v2_siracusa_get_dev();
+
+#endif // !__NEUREKA_V2_SIRACUSA_BSP_H__