From d30c027714400e5bf4dc2dfb4ed977693cf8d8ea Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
Date: Fri, 27 Nov 2020 21:16:30 +0100
Subject: [PATCH 001/105] Disable python bindings for faster build   - while
 working on NMODL + LLVM, we don't worry that much     about Python bindings
 by default   - so lets disable them by default

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e9c5942c33..91a366bfcf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,7 +20,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
 # =============================================================================
 # Build options for NMODL
 # =============================================================================
-option(NMODL_ENABLE_PYTHON_BINDINGS "Enable pybind11 based python bindings" ON)
+option(NMODL_ENABLE_PYTHON_BINDINGS "Enable pybind11 based python bindings" OFF)
 option(NMODL_ENABLE_LEGACY_UNITS "Use original faraday, R, etc. instead of 2019 nist constants" OFF)
 if(NMODL_ENABLE_LEGACY_UNITS)
   add_definitions(-DUSE_LEGACY_UNITS)

From ebe6539a365b2a6954d639007b16ff2d80864ad1 Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
Date: Sat, 28 Nov 2020 06:22:04 +0100
Subject: [PATCH 002/105] Integrate LLVM into CMake build system   * added
 NMODL_ENABLE_LLVM option to enable/disable     llvm support in nmodl   *
 LLVMHelper.cmake added to help with linking LLVM libraries      - clang might
 need to use libstdc++ or libc++ linking      - on BB5, using GCC with LLVM
 libraries is fine. But using        clang results into lots of link error.
 Adding -stdlib=libstd++        solves the issue      - use
 check_cxx_source_compiles to find out which cxx flag is needed

---
 CMakeLists.txt         |  9 +++++++++
 cmake/LLVMHelper.cmake | 45 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)
 create mode 100644 cmake/LLVMHelper.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 91a366bfcf..75ed658841 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,6 +22,8 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
 # =============================================================================
 option(NMODL_ENABLE_PYTHON_BINDINGS "Enable pybind11 based python bindings" OFF)
 option(NMODL_ENABLE_LEGACY_UNITS "Use original faraday, R, etc. instead of 2019 nist constants" OFF)
+option(NMODL_ENABLE_LLVM "Enable LLVM based code generation" ON)
+
 if(NMODL_ENABLE_LEGACY_UNITS)
   add_definitions(-DUSE_LEGACY_UNITS)
 endif()
@@ -174,6 +176,13 @@ cpp_cc_find_python_module(sympy 1.3 REQUIRED)
 cpp_cc_find_python_module(textwrap 0.9 REQUIRED)
 cpp_cc_find_python_module(yaml 3.12 REQUIRED)
 
+# =============================================================================
+# Find LLVM dependencies
+# =============================================================================
+if(NMODL_ENABLE_LLVM)
+  include(LLVMHelper)
+endif()
+
 # =============================================================================
 # Compiler specific flags for external submodules
 # =============================================================================
diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake
new file mode 100644
index 0000000000..a22cf4c835
--- /dev/null
+++ b/cmake/LLVMHelper.cmake
@@ -0,0 +1,45 @@
+# =============================================================================
+# LLVM/Clang needs to be linked with either libc++ or libstdc++
+# =============================================================================
+if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NMODL_ENABLE_LLVM)
+  find_package(LLVM REQUIRED CONFIG)
+  include(CheckCXXSourceCompiles)
+
+  # test by including LLVM header and core library
+  llvm_map_components_to_libnames(LLVM_CORE_LIB core)
+  set(CMAKE_REQUIRED_INCLUDES ${LLVM_INCLUDE_DIRS})
+  set(CMAKE_REQUIRED_LIBRARIES ${LLVM_CORE_LIB})
+
+  # simple code to test LLVM library linking
+  set(CODE_TO_TEST
+      "
+    #include <llvm/IR/IRBuilder.h>
+    using namespace llvm;
+    int main(int argc, char* argv[]) {
+        std::unique_ptr<IRBuilder<>> Builder;
+    }")
+
+  # first compile without any flags
+  check_cxx_source_compiles("${CODE_TO_TEST}" LLVM_LIB_LINK_TEST)
+
+  # if standard compilation fails
+  if(NOT LLVM_LIB_LINK_TEST)
+    # try libstdc++ first
+    set(CMAKE_REQUIRED_FLAGS "-stdlib=libstdc++")
+    check_cxx_source_compiles("${CODE_TO_TEST}" LLVM_LIBSTDCPP_TEST)
+    # on failure, try libc++
+    if(NOT LLVM_LIBSTDCPP_TEST)
+      set(CMAKE_REQUIRED_FLAGS "-stdlib=libc++")
+      check_cxx_source_compiles("${CODE_TO_TEST}" LLVM_LIBCPP_TEST)
+    endif()
+    # if either library works then add it to CXX flags
+    if(LLVM_LIBSTDCPP_TEST OR LLVM_LIBCPP_TEST)
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_REQUIRED_FLAGS}")
+      message(
+        STATUS
+          "Adding ${CMAKE_REQUIRED_FLAGS} to CMAKE_CXX_FLAGS, required to link with LLVM libraries")
+    else()
+      message(STATUS "WARNING : -stdlib=libstdcx++ or -stdlib=libc++ didn't work to link with LLVM library")
+    endif()
+  endif()
+endif()

From 8994f8e38f9983f8abda6f29cade48d91592a46b Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
Date: Sun, 29 Nov 2020 00:50:16 +0100
Subject: [PATCH 003/105] Code infrastructure for LLVM code generation backend 
 - added llvm dir under codegen where LLVM code generation    work will live 
 - llvm codegen visitor created that can be used as template    for initial
 work  - cmake adapted to enable llvm codegen based on CMake option  - simple
 procedure.mod added that can be initial target for    testing  - new CLI
 option --llvm that runs LLVM codegen visitor  - Enable CXX 14 because new
 LLVM versions require it

---
 CMakeLists.txt                            |  1 +
 cmake/LLVMHelper.cmake                    |  4 +-
 src/CMakeLists.txt                        |  6 ++
 src/codegen/CMakeLists.txt                |  5 ++
 src/codegen/llvm/CMakeLists.txt           | 13 +++++
 src/codegen/llvm/codegen_llvm_visitor.cpp | 46 ++++++++++++++++
 src/codegen/llvm/codegen_llvm_visitor.hpp | 67 +++++++++++++++++++++++
 src/main.cpp                              | 18 ++++++
 test/integration/mod/procedure.mod        | 15 +++++
 9 files changed, 173 insertions(+), 2 deletions(-)
 create mode 100644 src/codegen/llvm/CMakeLists.txt
 create mode 100644 src/codegen/llvm/codegen_llvm_visitor.cpp
 create mode 100644 src/codegen/llvm/codegen_llvm_visitor.hpp
 create mode 100644 test/integration/mod/procedure.mod

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 75ed658841..c89515b331 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -181,6 +181,7 @@ cpp_cc_find_python_module(yaml 3.12 REQUIRED)
 # =============================================================================
 if(NMODL_ENABLE_LLVM)
   include(LLVMHelper)
+  add_definitions(-DNMODL_LLVM_BACKEND)
 endif()
 
 # =============================================================================
diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake
index a22cf4c835..de078be7b5 100644
--- a/cmake/LLVMHelper.cmake
+++ b/cmake/LLVMHelper.cmake
@@ -6,9 +6,9 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NMODL_ENABLE_LLVM)
   include(CheckCXXSourceCompiles)
 
   # test by including LLVM header and core library
-  llvm_map_components_to_libnames(LLVM_CORE_LIB core)
+  llvm_map_components_to_libnames(LLVM_CORE_LIBS core)
   set(CMAKE_REQUIRED_INCLUDES ${LLVM_INCLUDE_DIRS})
-  set(CMAKE_REQUIRED_LIBRARIES ${LLVM_CORE_LIB})
+  set(CMAKE_REQUIRED_LIBRARIES ${LLVM_CORE_LIBS})
 
   # simple code to test LLVM library linking
   set(CODE_TO_TEST
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7a81895ff3..d30fa8220f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -36,6 +36,12 @@ target_link_libraries(
   ${NMODL_WRAPPER_LIBS})
 cpp_cc_configure_sanitizers(TARGET nmodl)
 
+if(NMODL_ENABLE_LLVM)
+  # LLVM core libraries to link
+  llvm_map_components_to_libnames(LLVM_CORE_LIBS core)
+  target_link_libraries(nmodl llvm_codegen ${LLVM_CORE_LIBS})
+endif()
+
 # =============================================================================
 # Add dependency with nmodl pytnon module (for consumer projects)
 # =============================================================================
diff --git a/src/codegen/CMakeLists.txt b/src/codegen/CMakeLists.txt
index d261fa7acf..214d38c077 100644
--- a/src/codegen/CMakeLists.txt
+++ b/src/codegen/CMakeLists.txt
@@ -21,6 +21,11 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/fast_math.ispc
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/fast_math.hpp
                ${CMAKE_BINARY_DIR}/include/nmodl/fast_math.hpp COPYONLY)
 
+# build llvm visitor if enabled
+if(NMODL_ENABLE_LLVM)
+  add_subdirectory(llvm)
+endif()
+
 # =============================================================================
 # Install include files
 # =============================================================================
diff --git a/src/codegen/llvm/CMakeLists.txt b/src/codegen/llvm/CMakeLists.txt
new file mode 100644
index 0000000000..71ecca338c
--- /dev/null
+++ b/src/codegen/llvm/CMakeLists.txt
@@ -0,0 +1,13 @@
+# =============================================================================
+# Codegen sources
+# =============================================================================
+set(LLVM_CODEGEN_SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_visitor.cpp
+                              ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_visitor.hpp)
+
+# =============================================================================
+# LLVM codegen library
+# =============================================================================
+
+include_directories(${LLVM_INCLUDE_DIRS})
+add_library(llvm_codegen STATIC ${LLVM_CODEGEN_SOURCE_FILES})
+add_dependencies(llvm_codegen lexer util visitor)
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
new file mode 100644
index 0000000000..3f4e319503
--- /dev/null
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -0,0 +1,46 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "ast/all.hpp"
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+
+namespace nmodl {
+namespace codegen {
+
+
+// LLVM code generator objects
+using namespace llvm;
+static std::unique_ptr<LLVMContext> TheContext;
+static std::unique_ptr<Module> TheModule;
+static std::unique_ptr<IRBuilder<>> Builder;
+static std::map<std::string, Value*> NamedValues;
+
+
+void CodegenLLVMVisitor::visit_statement_block(const ast::StatementBlock& node) {
+    logger->info("CodegenLLVMVisitor : visiting statement block");
+    node.visit_children(*this);
+    // TODO : code generation for new block scope
+}
+
+void CodegenLLVMVisitor::visit_procedure_block(const ast::ProcedureBlock& node) {
+    logger->info("CodegenLLVMVisitor : visiting {} procedure", node.get_node_name());
+    node.visit_children(*this);
+    // TODO : code generation for procedure block
+}
+
+void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
+    node.visit_children(*this);
+}
+
+}  // namespace codegen
+}  // namespace nmodl
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
new file mode 100644
index 0000000000..2b77160cd5
--- /dev/null
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -0,0 +1,67 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+/**
+ * \dir
+ * \brief LLVM based code generation backend implementation for CoreNEURON
+ *
+ * \file
+ * \brief \copybrief nmodl::codegen::CodegenLLVMVisitor
+ */
+
+#include <ostream>
+#include <string>
+
+#include "utils/logger.hpp"
+#include "visitors/ast_visitor.hpp"
+
+namespace nmodl {
+namespace codegen {
+
+/**
+ * @defgroup llvm LLVM Based Code Generation Implementation
+ * @brief Implementations of LLVM based code generation
+ *
+ * @defgroup llvm_backends LLVM Codegen Backend
+ * @ingroup llvm
+ * @brief Code generation backends for NMODL AST to LLVM IR
+ * @{
+ */
+
+/**
+ * \class CodegenLLVMVisitor
+ * \brief %Visitor for transforming NMODL AST to LLVM IR
+ */
+class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
+    // Name of mod file (without .mod suffix)
+    std::string mod_filename;
+
+    // Output directory for code generation
+    std::string output_dir;
+
+  public:
+    /**
+     * \brief Constructs the LLVM code generator visitor
+     *
+     * This constructor instantiates an NMODL LLVM code generator. This is
+     * just template to work with initial implementation.
+     */
+    CodegenLLVMVisitor(const std::string& mod_filename, const std::string& output_dir)
+        : mod_filename(mod_filename)
+        , output_dir(output_dir) {}
+
+    void visit_statement_block(const ast::StatementBlock& node) override;
+    void visit_procedure_block(const ast::ProcedureBlock& node) override;
+    void visit_program(const ast::Program& node) override;
+};
+
+/** \} */  // end of llvm_backends
+
+}  // namespace codegen
+}  // namespace nmodl
diff --git a/src/main.cpp b/src/main.cpp
index 1e2a43871f..c2fc1c2018 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -17,6 +17,9 @@
 #include "codegen/codegen_cuda_visitor.hpp"
 #include "codegen/codegen_ispc_visitor.hpp"
 #include "codegen/codegen_transform_visitor.hpp"
+#ifdef NMODL_LLVM_BACKEND
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
+#endif
 #include "config/config.h"
 #include "parser/nmodl_driver.hpp"
 #include "pybind/pyembed.hpp"
@@ -82,6 +85,9 @@ int main(int argc, const char* argv[]) {
     /// true if cuda code to be generated
     bool cuda_backend(false);
 
+    /// true if llvm code to be generated
+    bool llvm_backend(false);
+
     /// true if sympy should be used for solving ODEs analytically
     bool sympy_analytic(false);
 
@@ -167,6 +173,10 @@ int main(int argc, const char* argv[]) {
         ->ignore_case()
         ->check(CLI::IsMember({"trace", "debug", "info", "warning", "error", "critical", "off"}));
 
+#ifdef NMODL_LLVM_BACKEND
+    app.add_flag("--llvm", llvm_backend, "Enable LLVM based code generation")->ignore_case();
+#endif
+
     app.add_option("file", mod_files, "One or more MOD files to process")
         ->ignore_case()
         ->required()
@@ -584,6 +594,14 @@ int main(int argc, const char* argv[]) {
                                            optimize_ionvar_copies_codegen);
                 visitor.visit_program(*ast);
             }
+
+#ifdef NMODL_LLVM_BACKEND
+            if (llvm_backend) {
+                logger->info("Running LLVM backend code generator");
+                CodegenLLVMVisitor visitor(modfile, output_dir);
+                visitor.visit_program(*ast);
+            }
+#endif
         }
     }
 
diff --git a/test/integration/mod/procedure.mod b/test/integration/mod/procedure.mod
new file mode 100644
index 0000000000..3eb4817b3b
--- /dev/null
+++ b/test/integration/mod/procedure.mod
@@ -0,0 +1,15 @@
+PROCEDURE state(x, y) {
+    LOCAL z
+    z = x + y
+}
+
+PROCEDURE rates(v) {
+    LOCAL  alpha, beta, sum
+    {
+        alpha = .1 * exp(-(v+40))
+        beta =  4 * exp(-(v+65)/18)
+    }
+    {
+        sum = alpha + beta
+    }
+}

From a3f7891aadd9ea4f3d654c9407f8c3edee5e3cb5 Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
Date: Sun, 29 Nov 2020 00:58:20 +0100
Subject: [PATCH 004/105] Azure CI fixes for LLVM build and README update  -
 install llvm via brew  - set LLV_DIR variable so that CMake can find
 llvm-config

---
 INSTALL.md          | 13 +++++++++++--
 azure-pipelines.yml |  3 ++-
 setup.py            |  2 +-
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index cf42c44ac9..20f869f5e2 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -31,7 +31,7 @@ Typically the versions of bison and flex provided by the system are outdated and
 To get recent version of all dependencies we recommend using [homebrew](https://brew.sh/):
 
 ```sh
-brew install flex bison cmake python3
+brew install flex bison cmake python3 llvm
 ```
 
 The necessary Python packages can then easily be added using the pip3 command.
@@ -57,7 +57,7 @@ export PATH=/opt/homebrew/opt/flex/bin:/opt/homebrew/opt/bison/bin:$PATH
 On Ubuntu (>=18.04) flex/bison versions are recent enough and are installed along with the system toolchain:
 
 ```sh
-apt-get install flex bison gcc python3 python3-pip
+apt-get install flex bison gcc python3 python3-pip llvm-dev llvm-runtime llvm clang-format clang
 ```
 
 The Python dependencies are installed using:
@@ -79,6 +79,15 @@ cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/nmodl
 make -j && make install
 ```
 
+If `llvm-config` is not in PATH then set LLVM_DIR as:
+
+```sh
+cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DLLVM_DIR=/path/to/llvm/install/lib/cmake/llvm
+
+# on OSX
+cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DLLVM_DIR=`brew --prefix llvm`/lib/cmake/llvm
+```
+
 And set PYTHONPATH as:
 
 ```sh
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index d430513328..997b8f5aa6 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -109,7 +109,7 @@ stages:
         mkdir -p $(Build.Repository.LocalPath)/build
         cd $(Build.Repository.LocalPath)/build
         cmake --version
-        cmake .. -DPYTHON_EXECUTABLE=$(which python3.7) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=Release
+        cmake .. -DPYTHON_EXECUTABLE=$(which python3.7) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=Release -DNMODL_ENABLE_LLVM=OFF
         make -j 2
         if [ $? -ne 0 ]
         then
@@ -185,6 +185,7 @@ stages:
       submodules: True
     - script: |
         brew install flex bison cmake python@3 gcc@8
+        brew install bison llvm
         python3 -m pip install --upgrade pip setuptools
         python3 -m pip install --user 'Jinja2>=2.9.3' 'PyYAML>=3.13' pytest pytest-cov numpy 'sympy>=1.3'
       displayName: 'Install Dependencies'
diff --git a/setup.py b/setup.py
index 5b853ee569..74c2b97f03 100644
--- a/setup.py
+++ b/setup.py
@@ -104,7 +104,7 @@ def _config_exe(exe_name):
 ]
 
 
-cmake_args = ["-DPYTHON_EXECUTABLE=" + sys.executable]
+cmake_args = ["-DPYTHON_EXECUTABLE=" + sys.executable, "-DNMODL_ENABLE_LLVM=OFF"]
 if "bdist_wheel" in sys.argv:
     cmake_args.append("-DLINK_AGAINST_PYTHON=FALSE")
 

From 3b173071ea271df4f40b390e670a5e7615c9f16a Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
Date: Sun, 29 Nov 2020 08:40:54 +0100
Subject: [PATCH 005/105] Print build status after cmake configure stage   -
 print table with different build options, flags and paths     used that can
 be helpful for debugging   - fix git revision date for older git version   -
 update INSTALL.md with correct brew paths for flex and bison

---
 CMakeLists.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c89515b331..8ae334584b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -281,6 +281,12 @@ message(STATUS "Python Bindings     | ${NMODL_ENABLE_PYTHON_BINDINGS}")
 message(STATUS "Flex                | ${FLEX_EXECUTABLE}")
 message(STATUS "Bison               | ${BISON_EXECUTABLE}")
 message(STATUS "Python              | ${PYTHON_EXECUTABLE}")
+message(STATUS "LLVM Codegen        | ${NMODL_ENABLE_LLVM}")
+if(NMODL_ENABLE_LLVM)
+  message(STATUS "  VERSION           | ${LLVM_PACKAGE_VERSION}")
+  message(STATUS "  INCLUDE           | ${LLVM_INCLUDE_DIRS}")
+  message(STATUS "  CMAKE             | ${LLVM_CMAKE_DIR}")
+endif()
 message(STATUS "--------------+--------------------------------------------------------------")
 message(STATUS " See documentation : https://github.com/BlueBrain/nmodl/")
 message(STATUS "--------------+--------------------------------------------------------------")

From cea865dd413fa25d011723baa6c2617f8ea0b3fc Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
Date: Sun, 29 Nov 2020 16:46:29 +0100
Subject: [PATCH 006/105] Adding test template for LLVM codegen   -
 test/unit/codegen/llvm.cpp added for unit testing     LLVM code generation
 visitor   - ./bin/testcodegen binary can be used to launch     LLVM codegen
 specific tests   - multiple llvm_map_components_to_libnames removed   -
 update procedure.mod with simple examples for IR generation

---
 cmake/LLVMHelper.cmake                    |  8 ++--
 src/CMakeLists.txt                        |  4 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp | 10 +++++
 src/codegen/llvm/codegen_llvm_visitor.hpp |  8 ++++
 test/integration/mod/procedure.mod        | 19 +++++++--
 test/unit/CMakeLists.txt                  | 29 ++++++++++---
 test/unit/codegen/llvm.cpp                | 51 +++++++++++++++++++++++
 7 files changed, 114 insertions(+), 15 deletions(-)
 create mode 100644 test/unit/codegen/llvm.cpp

diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake
index de078be7b5..dbd29c92b6 100644
--- a/cmake/LLVMHelper.cmake
+++ b/cmake/LLVMHelper.cmake
@@ -6,9 +6,9 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NMODL_ENABLE_LLVM)
   include(CheckCXXSourceCompiles)
 
   # test by including LLVM header and core library
-  llvm_map_components_to_libnames(LLVM_CORE_LIBS core)
+  llvm_map_components_to_libnames(LLVM_LIBS_TO_LINK core)
   set(CMAKE_REQUIRED_INCLUDES ${LLVM_INCLUDE_DIRS})
-  set(CMAKE_REQUIRED_LIBRARIES ${LLVM_CORE_LIBS})
+  set(CMAKE_REQUIRED_LIBRARIES ${LLVM_LIBS_TO_LINK})
 
   # simple code to test LLVM library linking
   set(CODE_TO_TEST
@@ -39,7 +39,9 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NMODL_ENABLE_LLVM)
         STATUS
           "Adding ${CMAKE_REQUIRED_FLAGS} to CMAKE_CXX_FLAGS, required to link with LLVM libraries")
     else()
-      message(STATUS "WARNING : -stdlib=libstdcx++ or -stdlib=libc++ didn't work to link with LLVM library")
+      message(
+        STATUS
+          "WARNING : -stdlib=libstdcx++ or -stdlib=libc++ didn't work to link with LLVM library")
     endif()
   endif()
 endif()
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d30fa8220f..022cecf1ac 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -37,9 +37,7 @@ target_link_libraries(
 cpp_cc_configure_sanitizers(TARGET nmodl)
 
 if(NMODL_ENABLE_LLVM)
-  # LLVM core libraries to link
-  llvm_map_components_to_libnames(LLVM_CORE_LIBS core)
-  target_link_libraries(nmodl llvm_codegen ${LLVM_CORE_LIBS})
+  target_link_libraries(nmodl llvm_codegen ${LLVM_LIBS_TO_LINK})
 endif()
 
 # =============================================================================
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 3f4e319503..494d5fd1f3 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -7,6 +7,7 @@
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "ast/all.hpp"
+#include "visitors/visitor_utils.hpp"
 
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
@@ -34,12 +35,21 @@ void CodegenLLVMVisitor::visit_statement_block(const ast::StatementBlock& node)
 
 void CodegenLLVMVisitor::visit_procedure_block(const ast::ProcedureBlock& node) {
     logger->info("CodegenLLVMVisitor : visiting {} procedure", node.get_node_name());
+
+    // print position, nmodl and json form as
+    /*
+    logger->info("Location {} \n NMODL {} \n JSON : {} \n",
+                 node.get_token()->position(),
+                 to_nmodl(node),
+                 to_json(node));
+    */
     node.visit_children(*this);
     // TODO : code generation for procedure block
 }
 
 void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     node.visit_children(*this);
+    result_code = "Hello World";
 }
 
 }  // namespace codegen
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 2b77160cd5..5b0ad3a968 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -45,6 +45,9 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     // Output directory for code generation
     std::string output_dir;
 
+    // result string for demo
+    std::string result_code;
+
   public:
     /**
      * \brief Constructs the LLVM code generator visitor
@@ -59,6 +62,11 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void visit_statement_block(const ast::StatementBlock& node) override;
     void visit_procedure_block(const ast::ProcedureBlock& node) override;
     void visit_program(const ast::Program& node) override;
+
+    // demo method
+    std::string get_code() const {
+        return result_code;
+    }
 };
 
 /** \} */  // end of llvm_backends
diff --git a/test/integration/mod/procedure.mod b/test/integration/mod/procedure.mod
index 3eb4817b3b..ebbc39f15a 100644
--- a/test/integration/mod/procedure.mod
+++ b/test/integration/mod/procedure.mod
@@ -1,15 +1,26 @@
-PROCEDURE state(x, y) {
+PROCEDURE hello_world() {
+    print("Hello World")
+}
+
+PROCEDURE simple_sum(x, y) {
     LOCAL z
     z = x + y
 }
 
-PROCEDURE rates(v) {
+PROCEDURE complex_sum(v) {
     LOCAL  alpha, beta, sum
     {
         alpha = .1 * exp(-(v+40))
         beta =  4 * exp(-(v+65)/18)
-    }
-    {
         sum = alpha + beta
     }
 }
+
+PROCEDURE loop_function(v) {
+    LOCAL i
+    i = 0
+    WHILE(i < 10) {
+        print("Hello World")
+        i = i + 1
+    }
+}
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index a2340b0414..a642d386eb 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -71,6 +71,11 @@ add_executable(
 
 target_link_libraries(testmodtoken lexer util)
 target_link_libraries(testlexer lexer util)
+target_link_libraries(testprinter printer util)
+target_link_libraries(testsymtab symtab lexer util)
+target_link_libraries(testunitlexer lexer util)
+target_link_libraries(testunitparser lexer test_util config)
+
 target_link_libraries(
   testparser
   visitor
@@ -80,6 +85,7 @@ target_link_libraries(
   test_util
   printer
   ${NMODL_WRAPPER_LIBS})
+
 target_link_libraries(
   testvisitor
   visitor
@@ -99,10 +105,22 @@ target_link_libraries(
   test_util
   printer
   ${NMODL_WRAPPER_LIBS})
-target_link_libraries(testprinter printer util)
-target_link_libraries(testsymtab symtab lexer util)
-target_link_libraries(testunitlexer lexer util)
-target_link_libraries(testunitparser lexer test_util config)
+
+if(NMODL_ENABLE_LLVM)
+  add_executable(testcodegen visitor/main.cpp codegen/llvm.cpp)
+  target_link_libraries(
+    testcodegen
+    visitor
+    symtab
+    lexer
+    util
+    test_util
+    printer
+    llvm_codegen
+    ${NMODL_WRAPPER_LIBS}
+    ${LLVM_LIBS_TO_LINK})
+  set(CODEGEN_TEST testcodegen)
+endif()
 
 # =============================================================================
 # Use catch_discover instead of add_test for granular test result reporting.
@@ -127,7 +145,8 @@ foreach(
   testnewton
   testfast_math
   testunitlexer
-  testunitparser)
+  testunitparser
+  ${CODEGEN_TEST})
 
   target_link_libraries(${test_name} Catch2::Catch2)
   cpp_cc_configure_sanitizers(TARGET ${test_name})
diff --git a/test/unit/codegen/llvm.cpp b/test/unit/codegen/llvm.cpp
new file mode 100644
index 0000000000..b6efe2f9ca
--- /dev/null
+++ b/test/unit/codegen/llvm.cpp
@@ -0,0 +1,51 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include <catch/catch.hpp>
+
+#include "ast/program.hpp"
+#include "parser/nmodl_driver.hpp"
+#include "visitors/checkparent_visitor.hpp"
+#include "visitors/inline_visitor.hpp"
+#include "visitors/symtab_visitor.hpp"
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
+
+using namespace nmodl;
+using namespace visitor;
+using nmodl::parser::NmodlDriver;
+
+//=============================================================================
+// Sample LLVM codegen test
+//=============================================================================
+
+std::string run_llvm_visitor(const std::string& text) {
+    NmodlDriver driver;
+    const auto& ast = driver.parse_string(text);
+
+    SymtabVisitor().visit_program(*ast);
+    InlineVisitor().visit_program(*ast);
+
+    codegen::CodegenLLVMVisitor llvm_visitor("unknown", ".");
+    llvm_visitor.visit_program(*ast);
+    return llvm_visitor.get_code();
+}
+
+SCENARIO("Running LLVM Codegen", "[visitor][llvm]") {
+    GIVEN("Simple procedure with hello world message") {
+        std::string nmodl_text = R"(
+            PROCEDURE say_hello() {
+                print("Hello World")
+            }
+        )";
+
+        THEN("Hello world message is printed") {
+            std::string expected = "Hello World";
+            auto result = run_llvm_visitor(nmodl_text);
+            REQUIRE(result == expected);
+        }
+    }
+}
\ No newline at end of file

From 00e4ac055ad710e63eb756c5d38cc8609fd8de67 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 22 Dec 2020 13:54:32 +0300
Subject: [PATCH 007/105] Initial LLVM codegen vistor routines (#457)

* Added LLVM code generation for `ProcedureBlock`.
* Added code generation routines for double, integer and
   boolean variable types.
* Added binary and unary operator code generation:
     - Supported binary operators: +, -, *, /.
     - Supported unary operators: -.
     - Assignment (=) is also supported.
* Added regex matching unit tests for LLVM code generation.
* Fixed Travis CI/builds.

fixes #451, fixes #452, fixes #456

Co-authored-by: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
---
 CMakeLists.txt                            |   1 +
 azure-pipelines.yml                       |   5 +-
 cmake/LLVMHelper.cmake                    |  14 +-
 setup.py                                  |   2 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp | 157 +++++++++++++++---
 src/codegen/llvm/codegen_llvm_visitor.hpp |  40 ++++-
 test/integration/mod/procedure.mod        |   9 +-
 test/unit/CMakeLists.txt                  |   7 +-
 test/unit/codegen/llvm.cpp                | 188 ++++++++++++++++++++--
 9 files changed, 364 insertions(+), 59 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8ae334584b..bd32ebcf0b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -181,6 +181,7 @@ cpp_cc_find_python_module(yaml 3.12 REQUIRED)
 # =============================================================================
 if(NMODL_ENABLE_LLVM)
   include(LLVMHelper)
+  include_directories(${LLVM_INCLUDE_DIRS})
   add_definitions(-DNMODL_LLVM_BACKEND)
 endif()
 
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 997b8f5aa6..47dea2d1a7 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -184,8 +184,7 @@ stages:
     - checkout: self
       submodules: True
     - script: |
-        brew install flex bison cmake python@3 gcc@8
-        brew install bison llvm
+        brew install flex bison cmake python@3 gcc@8 llvm
         python3 -m pip install --upgrade pip setuptools
         python3 -m pip install --user 'Jinja2>=2.9.3' 'PyYAML>=3.13' pytest pytest-cov numpy 'sympy>=1.3'
       displayName: 'Install Dependencies'
@@ -193,7 +192,7 @@ stages:
         export PATH=/usr/local/opt/flex/bin:/usr/local/opt/bison/bin:$PATH;
         mkdir -p $(Build.Repository.LocalPath)/build
         cd $(Build.Repository.LocalPath)/build
-        cmake .. -DPYTHON_EXECUTABLE=$(which python3) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=RelWithDebInfo -DNMODL_ENABLE_PYTHON_BINDINGS=OFF
+        cmake .. -DPYTHON_EXECUTABLE=$(which python3) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=RelWithDebInfo -DNMODL_ENABLE_PYTHON_BINDINGS=OFF -DLLVM_DIR=`brew --prefix llvm`/lib/cmake/llvm -DNMODL_ENABLE_LLVM=ON
         make -j 2
         if [ $? -ne 0 ]
         then
diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake
index dbd29c92b6..982af48660 100644
--- a/cmake/LLVMHelper.cmake
+++ b/cmake/LLVMHelper.cmake
@@ -1,15 +1,17 @@
 # =============================================================================
 # LLVM/Clang needs to be linked with either libc++ or libstdc++
 # =============================================================================
+
+find_package(LLVM REQUIRED CONFIG)
+
+# include LLVM header and core library
+llvm_map_components_to_libnames(LLVM_LIBS_TO_LINK core)
+set(CMAKE_REQUIRED_INCLUDES ${LLVM_INCLUDE_DIRS})
+set(CMAKE_REQUIRED_LIBRARIES ${LLVM_LIBS_TO_LINK})
+
 if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NMODL_ENABLE_LLVM)
-  find_package(LLVM REQUIRED CONFIG)
   include(CheckCXXSourceCompiles)
 
-  # test by including LLVM header and core library
-  llvm_map_components_to_libnames(LLVM_LIBS_TO_LINK core)
-  set(CMAKE_REQUIRED_INCLUDES ${LLVM_INCLUDE_DIRS})
-  set(CMAKE_REQUIRED_LIBRARIES ${LLVM_LIBS_TO_LINK})
-
   # simple code to test LLVM library linking
   set(CODE_TO_TEST
       "
diff --git a/setup.py b/setup.py
index 74c2b97f03..2631f447f3 100644
--- a/setup.py
+++ b/setup.py
@@ -104,7 +104,7 @@ def _config_exe(exe_name):
 ]
 
 
-cmake_args = ["-DPYTHON_EXECUTABLE=" + sys.executable, "-DNMODL_ENABLE_LLVM=OFF"]
+cmake_args = ["-DPYTHON_EXECUTABLE=" + sys.executable, "-DNMODL_ENABLE_LLVM=OFF", "-DNMODL_ENABLE_PYTHON_BINDINGS=ON"]
 if "bdist_wheel" in sys.argv:
     cmake_args.append("-DLINK_AGAINST_PYTHON=FALSE")
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 494d5fd1f3..b8b3778e86 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -10,46 +10,153 @@
 #include "visitors/visitor_utils.hpp"
 
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/ValueSymbolTable.h"
 
 namespace nmodl {
 namespace codegen {
 
 
-// LLVM code generator objects
-using namespace llvm;
-static std::unique_ptr<LLVMContext> TheContext;
-static std::unique_ptr<Module> TheModule;
-static std::unique_ptr<IRBuilder<>> Builder;
-static std::map<std::string, Value*> NamedValues;
+/****************************************************************************************/
+/*                            Overloaded visitor routines                               */
+/****************************************************************************************/
 
 
-void CodegenLLVMVisitor::visit_statement_block(const ast::StatementBlock& node) {
-    logger->info("CodegenLLVMVisitor : visiting statement block");
-    node.visit_children(*this);
-    // TODO : code generation for new block scope
+void CodegenLLVMVisitor::visit_binary_expression(const ast::BinaryExpression& node) {
+    const auto& op = node.get_op().get_value();
+
+    // Process rhs first, since lhs is handled differently for assignment and binary
+    // operators.
+    node.get_rhs()->accept(*this);
+    llvm::Value* rhs = values.back();
+    values.pop_back();
+    if (op == ast::BinaryOp::BOP_ASSIGN) {
+        auto var = dynamic_cast<ast::VarName*>(node.get_lhs().get());
+        if (!var) {
+            throw std::runtime_error("Error: only VarName assignment is currently supported.\n");
+        }
+        llvm::Value* alloca = named_values[var->get_node_name()];
+        builder.CreateStore(rhs, alloca);
+        return;
+    }
+
+    node.get_lhs()->accept(*this);
+    llvm::Value* lhs = values.back();
+    values.pop_back();
+    llvm::Value* result;
+
+    // \todo: Support other binary operators
+    switch (op) {
+#define DISPATCH(binary_op, llvm_op) \
+    case binary_op:                  \
+        result = llvm_op(lhs, rhs);  \
+        values.push_back(result);    \
+        break;
+
+        DISPATCH(ast::BinaryOp::BOP_ADDITION, builder.CreateFAdd);
+        DISPATCH(ast::BinaryOp::BOP_DIVISION, builder.CreateFDiv);
+        DISPATCH(ast::BinaryOp::BOP_MULTIPLICATION, builder.CreateFMul);
+        DISPATCH(ast::BinaryOp::BOP_SUBTRACTION, builder.CreateFSub);
+
+#undef DISPATCH
+    }
 }
 
-void CodegenLLVMVisitor::visit_procedure_block(const ast::ProcedureBlock& node) {
-    logger->info("CodegenLLVMVisitor : visiting {} procedure", node.get_node_name());
-
-    // print position, nmodl and json form as
-    /*
-    logger->info("Location {} \n NMODL {} \n JSON : {} \n",
-                 node.get_token()->position(),
-                 to_nmodl(node),
-                 to_json(node));
-    */
-    node.visit_children(*this);
-    // TODO : code generation for procedure block
+void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) {
+    const auto& constant = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*context),
+                                                  node.get_value());
+    values.push_back(constant);
+}
+
+void CodegenLLVMVisitor::visit_double(const ast::Double& node) {
+    const auto& constant = llvm::ConstantFP::get(llvm::Type::getDoubleTy(*context),
+                                                 node.get_value());
+    values.push_back(constant);
+}
+
+void CodegenLLVMVisitor::visit_integer(const ast::Integer& node) {
+    const auto& constant = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*context),
+                                                  node.get_value());
+    values.push_back(constant);
+}
+
+void CodegenLLVMVisitor::visit_local_list_statement(const ast::LocalListStatement& node) {
+    for (const auto& variable: node.get_variables()) {
+        // LocalVar always stores a Name.
+        auto name = variable->get_node_name();
+        llvm::Type* var_type = llvm::Type::getDoubleTy(*context);
+        llvm::Value* alloca = builder.CreateAlloca(var_type, /*ArraySize=*/nullptr, name);
+        named_values[name] = alloca;
+    }
 }
 
 void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     node.visit_children(*this);
-    result_code = "Hello World";
+    // Keep this for easier development (maybe move to debug mode later).
+    std::cout << print_module();
+}
+
+void CodegenLLVMVisitor::visit_procedure_block(const ast::ProcedureBlock& node) {
+    const auto& name = node.get_node_name();
+    const auto& parameters = node.get_parameters();
+
+    // The procedure parameters are doubles by default.
+    std::vector<llvm::Type*> arg_types;
+    for (size_t i = 0, e = parameters.size(); i < e; ++i)
+        arg_types.push_back(llvm::Type::getDoubleTy(*context));
+    llvm::Type* return_type = llvm::Type::getVoidTy(*context);
+
+    llvm::Function* proc =
+        llvm::Function::Create(llvm::FunctionType::get(return_type, arg_types, /*isVarArg=*/false),
+                               llvm::Function::ExternalLinkage,
+                               name,
+                               *module);
+
+    llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", proc);
+    builder.SetInsertPoint(body);
+
+    // First, allocate parameters on the stack and add them to the symbol table.
+    unsigned i = 0;
+    for (auto& arg: proc->args()) {
+        std::string arg_name = parameters[i++].get()->get_node_name();
+        llvm::Value* alloca = builder.CreateAlloca(arg.getType(), /*ArraySize=*/nullptr, arg_name);
+        arg.setName(arg_name);
+        builder.CreateStore(&arg, alloca);
+        named_values[arg_name] = alloca;
+    }
+
+    const auto& statements = node.get_statement_block()->get_statements();
+    for (const auto& statement: statements) {
+        // \todo: Support other statement types.
+        if (statement->is_local_list_statement() || statement->is_expression_statement())
+            statement->accept(*this);
+    }
+
+    values.clear();
+    // \todo: Add proper support for the symbol table.
+    named_values.clear();
+}
+
+void CodegenLLVMVisitor::visit_unary_expression(const ast::UnaryExpression& node) {
+    ast::UnaryOp op = node.get_op().get_value();
+    node.get_expression()->accept(*this);
+    llvm::Value* value = values.back();
+    values.pop_back();
+    if (op == ast::UOP_NEGATION) {
+        llvm::Value* result = builder.CreateFNeg(value);
+        values.push_back(result);
+    } else {
+        // Support only `double` operators for now.
+        throw std::runtime_error("Error: unsupported unary operator\n");
+    }
+}
+
+void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
+    llvm::Value* var = builder.CreateLoad(named_values[node.get_node_name()]);
+    values.push_back(var);
 }
 
 }  // namespace codegen
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 5b0ad3a968..5a288d9836 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -21,6 +21,10 @@
 #include "utils/logger.hpp"
 #include "visitors/ast_visitor.hpp"
 
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+
 namespace nmodl {
 namespace codegen {
 
@@ -45,8 +49,18 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     // Output directory for code generation
     std::string output_dir;
 
-    // result string for demo
-    std::string result_code;
+  private:
+    std::unique_ptr<llvm::LLVMContext> context = std::make_unique<llvm::LLVMContext>();
+
+    std::unique_ptr<llvm::Module> module = std::make_unique<llvm::Module>(mod_filename, *context);
+
+    llvm::IRBuilder<> builder;
+
+    // Stack to hold visited values
+    std::vector<llvm::Value*> values;
+
+    // Mappings for named values for lookups
+    std::map<std::string, llvm::Value*> named_values;
 
   public:
     /**
@@ -57,15 +71,27 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      */
     CodegenLLVMVisitor(const std::string& mod_filename, const std::string& output_dir)
         : mod_filename(mod_filename)
-        , output_dir(output_dir) {}
+        , output_dir(output_dir)
+        , builder(*context) {}
 
-    void visit_statement_block(const ast::StatementBlock& node) override;
+    // Visitors
+    void visit_binary_expression(const ast::BinaryExpression& node) override;
+    void visit_boolean(const ast::Boolean& node) override;
+    void visit_double(const ast::Double& node) override;
+    void visit_integer(const ast::Integer& node) override;
+    void visit_local_list_statement(const ast::LocalListStatement& node) override;
     void visit_procedure_block(const ast::ProcedureBlock& node) override;
     void visit_program(const ast::Program& node) override;
+    void visit_unary_expression(const ast::UnaryExpression& node) override;
+    void visit_var_name(const ast::VarName& node) override;
 
-    // demo method
-    std::string get_code() const {
-        return result_code;
+    // TODO: use custom printer here
+    std::string print_module() const {
+        std::string str;
+        llvm::raw_string_ostream os(str);
+        os << *module;
+        os.flush();
+        return str;
     }
 };
 
diff --git a/test/integration/mod/procedure.mod b/test/integration/mod/procedure.mod
index ebbc39f15a..4017b6a505 100644
--- a/test/integration/mod/procedure.mod
+++ b/test/integration/mod/procedure.mod
@@ -1,5 +1,10 @@
+NEURON {
+    SUFFIX procedure_test
+    THREADSAFE
+}
+
 PROCEDURE hello_world() {
-    print("Hello World")
+    printf("Hello World")
 }
 
 PROCEDURE simple_sum(x, y) {
@@ -20,7 +25,7 @@ PROCEDURE loop_function(v) {
     LOCAL i
     i = 0
     WHILE(i < 10) {
-        print("Hello World")
+        printf("Hello World")
         i = i + 1
     }
 }
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index a642d386eb..13929666d1 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -107,9 +107,10 @@ target_link_libraries(
   ${NMODL_WRAPPER_LIBS})
 
 if(NMODL_ENABLE_LLVM)
-  add_executable(testcodegen visitor/main.cpp codegen/llvm.cpp)
+  include_directories(${LLVM_INCLUDE_DIRS})
+  add_executable(testllvm visitor/main.cpp codegen/llvm.cpp)
   target_link_libraries(
-    testcodegen
+    testllvm
     visitor
     symtab
     lexer
@@ -119,7 +120,7 @@ if(NMODL_ENABLE_LLVM)
     llvm_codegen
     ${NMODL_WRAPPER_LIBS}
     ${LLVM_LIBS_TO_LINK})
-  set(CODEGEN_TEST testcodegen)
+  set(CODEGEN_TEST testllvm)
 endif()
 
 # =============================================================================
diff --git a/test/unit/codegen/llvm.cpp b/test/unit/codegen/llvm.cpp
index b6efe2f9ca..270ce97ec0 100644
--- a/test/unit/codegen/llvm.cpp
+++ b/test/unit/codegen/llvm.cpp
@@ -6,20 +6,21 @@
  *************************************************************************/
 
 #include <catch/catch.hpp>
+#include <regex>
 
 #include "ast/program.hpp"
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "parser/nmodl_driver.hpp"
 #include "visitors/checkparent_visitor.hpp"
 #include "visitors/inline_visitor.hpp"
 #include "visitors/symtab_visitor.hpp"
-#include "codegen/llvm/codegen_llvm_visitor.hpp"
 
 using namespace nmodl;
 using namespace visitor;
 using nmodl::parser::NmodlDriver;
 
 //=============================================================================
-// Sample LLVM codegen test
+// Utility to get LLVM module as a string
 //=============================================================================
 
 std::string run_llvm_visitor(const std::string& text) {
@@ -31,21 +32,184 @@ std::string run_llvm_visitor(const std::string& text) {
 
     codegen::CodegenLLVMVisitor llvm_visitor("unknown", ".");
     llvm_visitor.visit_program(*ast);
-    return llvm_visitor.get_code();
+    return llvm_visitor.print_module();
 }
 
-SCENARIO("Running LLVM Codegen", "[visitor][llvm]") {
-    GIVEN("Simple procedure with hello world message") {
+//=============================================================================
+// BinaryExpression and Double
+//=============================================================================
+
+SCENARIO("Binary expression", "[visitor][llvm]") {
+    GIVEN("Procedure with addition of its arguments") {
+        std::string nmodl_text = R"(
+            PROCEDURE add(a, b) {
+                LOCAL i
+                i = a + b
+            }
+        )";
+
+        THEN("variables are loaded and add instruction is created") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check the values are loaded correctly and added
+            std::regex rhs(R"(%1 = load double, double\* %b)");
+            std::regex lhs(R"(%2 = load double, double\* %a)");
+            std::regex res(R"(%3 = fadd double %2, %1)");
+            REQUIRE(std::regex_search(module_string, m, rhs));
+            REQUIRE(std::regex_search(module_string, m, lhs));
+            REQUIRE(std::regex_search(module_string, m, res));
+        }
+    }
+
+    GIVEN("Procedure with multiple binary operators") {
         std::string nmodl_text = R"(
-            PROCEDURE say_hello() {
-                print("Hello World")
+            PROCEDURE multiple(a, b) {
+                LOCAL i
+                i = (a - b) / (a + b)
             }
         )";
 
-        THEN("Hello world message is printed") {
-            std::string expected = "Hello World";
-            auto result = run_llvm_visitor(nmodl_text);
-            REQUIRE(result == expected);
+        THEN("variables are processed from rhs first") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check rhs
+            std::regex rr(R"(%1 = load double, double\* %b)");
+            std::regex rl(R"(%2 = load double, double\* %a)");
+            std::regex x(R"(%3 = fadd double %2, %1)");
+            REQUIRE(std::regex_search(module_string, m, rr));
+            REQUIRE(std::regex_search(module_string, m, rl));
+            REQUIRE(std::regex_search(module_string, m, x));
+
+            // Check lhs
+            std::regex lr(R"(%4 = load double, double\* %b)");
+            std::regex ll(R"(%5 = load double, double\* %a)");
+            std::regex y(R"(%6 = fsub double %5, %4)");
+            REQUIRE(std::regex_search(module_string, m, lr));
+            REQUIRE(std::regex_search(module_string, m, ll));
+            REQUIRE(std::regex_search(module_string, m, y));
+
+            // Check result
+            std::regex res(R"(%7 = fdiv double %6, %3)");
+            REQUIRE(std::regex_search(module_string, m, res));
         }
     }
-}
\ No newline at end of file
+
+    GIVEN("Procedure with assignment") {
+        std::string nmodl_text = R"(
+            PROCEDURE assignment() {
+                LOCAL i
+                i = 2
+            }
+        )";
+
+        THEN("double constant is stored into i") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check store immediate is created
+            std::regex allocation(R"(%i = alloca double)");
+            std::regex assignment(R"(store double 2.0*e\+00, double\* %i)");
+            REQUIRE(std::regex_search(module_string, m, allocation));
+            REQUIRE(std::regex_search(module_string, m, assignment));
+        }
+    }
+}
+
+//=============================================================================
+// LocalList and LocalVar
+//=============================================================================
+
+SCENARIO("Local variable", "[visitor][llvm]") {
+    GIVEN("Procedure with some local variables") {
+        std::string nmodl_text = R"(
+            PROCEDURE local() {
+                LOCAL i, j
+            }
+        )";
+
+        THEN("local variables are allocated on the stack") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check stack allocations for i and j
+            std::regex i(R"(%i = alloca double)");
+            std::regex j(R"(%j = alloca double)");
+            REQUIRE(std::regex_search(module_string, m, i));
+            REQUIRE(std::regex_search(module_string, m, j));
+        }
+    }
+}
+
+//=============================================================================
+// ProcedureBlock
+//=============================================================================
+
+SCENARIO("Procedure", "[visitor][llvm]") {
+    GIVEN("Empty procedure with no arguments") {
+        std::string nmodl_text = R"(
+            PROCEDURE empty() {}
+        )";
+
+        THEN("empty void function is produced") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check procedure has empty body
+            std::regex procedure(R"(define void @empty\(\) \{\n\})");
+            REQUIRE(std::regex_search(module_string, m, procedure));
+        }
+    }
+
+    GIVEN("Empty procedure with arguments") {
+        std::string nmodl_text = R"(
+            PROCEDURE with_argument(x) {}
+        )";
+
+        THEN("void function is produced with arguments allocated on stack") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check procedure signature
+            std::regex function_signature(R"(define void @with_argument\(double %x1\) \{)");
+            REQUIRE(std::regex_search(module_string, m, function_signature));
+
+            // Check that procedure arguments are allocated on the local stack
+            std::regex alloca_instr(R"(%x = alloca double)");
+            std::regex store_instr(R"(store double %x1, double\* %x)");
+            REQUIRE(std::regex_search(module_string, m, alloca_instr));
+            REQUIRE(std::regex_search(module_string, m, store_instr));
+        }
+    }
+}
+
+//=============================================================================
+// UnaryExpression
+//=============================================================================
+
+SCENARIO("Unary expression", "[visitor][llvm]") {
+    GIVEN("Procedure with negation") {
+        std::string nmodl_text = R"(
+            PROCEDURE negation(a) {
+                LOCAL i
+                i = -a
+            }
+        )";
+
+        THEN("fneg instruction is created") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            std::regex allocation(R"(%1 = load double, double\* %a)");
+            REQUIRE(std::regex_search(module_string, m, allocation));
+
+            // llvm v9 and llvm v11 implementation for negation
+            std::regex negation_v9(R"(%2 = fsub double -0.000000e\+00, %1)");
+            std::regex negation_v11(R"(fneg double %1)");
+            bool result = std::regex_search(module_string, m, negation_v9) ||
+                          std::regex_search(module_string, m, negation_v11);
+            REQUIRE(result == true);
+        }
+    }
+}

From 6623eb76667bf5eea5915b0387d337963c0f9f1a Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Fri, 25 Dec 2020 13:29:02 +0300
Subject: [PATCH 008/105] FunctionBlock code generation and terminator checks
 (#470)

* LLVM code generation for `FunctionBlock` is now supported.
* Terminators in function or procedure blocks are enforced:
      - Every procedure must have `ret void` instruction.
      - Every function returns a double, specified by `ret_<function_name>`.
* For local symbol table, code generation now uses LLVM's builtin
`llvm::ValueSymbolTable`.


fixes #454, fixes #469
---
 src/codegen/llvm/codegen_llvm_visitor.cpp | 123 ++++++++++++++--------
 src/codegen/llvm/codegen_llvm_visitor.hpp |  11 +-
 test/unit/codegen/llvm.cpp                |  50 ++++++++-
 3 files changed, 137 insertions(+), 47 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index b8b3778e86..6e1177cbec 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -7,6 +7,7 @@
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "ast/all.hpp"
+#include "visitors/rename_visitor.hpp"
 #include "visitors/visitor_utils.hpp"
 
 #include "llvm/IR/BasicBlock.h"
@@ -20,6 +21,80 @@ namespace nmodl {
 namespace codegen {
 
 
+/****************************************************************************************/
+/*                            Helper routines                                           */
+/****************************************************************************************/
+
+
+void CodegenLLVMVisitor::visit_procedure_or_function(const ast::Block& node) {
+    const auto& name = node.get_node_name();
+    const auto& parameters = node.get_parameters();
+
+    // Procedure or function parameters are doubles by default.
+    std::vector<llvm::Type*> arg_types;
+    for (size_t i = 0; i < parameters.size(); ++i)
+        arg_types.push_back(llvm::Type::getDoubleTy(*context));
+
+    // If visiting a function, the return type is a double by default.
+    llvm::Type* return_type = node.is_function_block() ? llvm::Type::getDoubleTy(*context)
+                                                       : llvm::Type::getVoidTy(*context);
+
+    llvm::Function* func =
+        llvm::Function::Create(llvm::FunctionType::get(return_type, arg_types, /*isVarArg=*/false),
+                               llvm::Function::ExternalLinkage,
+                               name,
+                               *module);
+
+    // Create the entry basic block of the function/procedure and point the local named values table
+    // to the symbol table.
+    llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", func);
+    builder.SetInsertPoint(body);
+    local_named_values = func->getValueSymbolTable();
+
+    // When processing a function, it returns a value named <function_name> in NMODL. Therefore, we
+    // first run RenameVisitor to rename it into ret_<function_name>. This will aid in avoiding
+    // symbolic conflicts. Then, allocate the return variable on the local stack.
+    std::string return_var_name = "ret_" + name;
+    const auto& block = node.get_statement_block();
+    if (node.is_function_block()) {
+        visitor::RenameVisitor v(name, return_var_name);
+        block->accept(v);
+        builder.CreateAlloca(llvm::Type::getDoubleTy(*context),
+                             /*ArraySize=*/nullptr,
+                             return_var_name);
+    }
+
+    // Allocate parameters on the stack and add them to the symbol table.
+    unsigned i = 0;
+    for (auto& arg: func->args()) {
+        std::string arg_name = parameters[i++].get()->get_node_name();
+        llvm::Value* alloca = builder.CreateAlloca(arg.getType(), /*ArraySize=*/nullptr, arg_name);
+        arg.setName(arg_name);
+        builder.CreateStore(&arg, alloca);
+    }
+
+    // Process function or procedure body.
+    const auto& statements = block->get_statements();
+    for (const auto& statement: statements) {
+        // \todo: Support other statement types.
+        if (statement->is_local_list_statement() || statement->is_expression_statement())
+            statement->accept(*this);
+    }
+
+    // Add the terminator. If visiting function, we need to return the value specified by
+    // ret_<function_name>.
+    if (node.is_function_block()) {
+        llvm::Value* return_var = builder.CreateLoad(local_named_values->lookup(return_var_name));
+        builder.CreateRet(return_var);
+    } else {
+        builder.CreateRetVoid();
+    }
+
+    // Clear local values stack and remove the pointer to the local symbol table.
+    values.clear();
+    local_named_values = nullptr;
+}
+
 /****************************************************************************************/
 /*                            Overloaded visitor routines                               */
 /****************************************************************************************/
@@ -38,7 +113,7 @@ void CodegenLLVMVisitor::visit_binary_expression(const ast::BinaryExpression& no
         if (!var) {
             throw std::runtime_error("Error: only VarName assignment is currently supported.\n");
         }
-        llvm::Value* alloca = named_values[var->get_node_name()];
+        llvm::Value* alloca = local_named_values->lookup(var->get_node_name());
         builder.CreateStore(rhs, alloca);
         return;
     }
@@ -77,6 +152,10 @@ void CodegenLLVMVisitor::visit_double(const ast::Double& node) {
     values.push_back(constant);
 }
 
+void CodegenLLVMVisitor::visit_function_block(const ast::FunctionBlock& node) {
+    visit_procedure_or_function(node);
+}
+
 void CodegenLLVMVisitor::visit_integer(const ast::Integer& node) {
     const auto& constant = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*context),
                                                   node.get_value());
@@ -89,7 +168,6 @@ void CodegenLLVMVisitor::visit_local_list_statement(const ast::LocalListStatemen
         auto name = variable->get_node_name();
         llvm::Type* var_type = llvm::Type::getDoubleTy(*context);
         llvm::Value* alloca = builder.CreateAlloca(var_type, /*ArraySize=*/nullptr, name);
-        named_values[name] = alloca;
     }
 }
 
@@ -100,44 +178,7 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
 }
 
 void CodegenLLVMVisitor::visit_procedure_block(const ast::ProcedureBlock& node) {
-    const auto& name = node.get_node_name();
-    const auto& parameters = node.get_parameters();
-
-    // The procedure parameters are doubles by default.
-    std::vector<llvm::Type*> arg_types;
-    for (size_t i = 0, e = parameters.size(); i < e; ++i)
-        arg_types.push_back(llvm::Type::getDoubleTy(*context));
-    llvm::Type* return_type = llvm::Type::getVoidTy(*context);
-
-    llvm::Function* proc =
-        llvm::Function::Create(llvm::FunctionType::get(return_type, arg_types, /*isVarArg=*/false),
-                               llvm::Function::ExternalLinkage,
-                               name,
-                               *module);
-
-    llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", proc);
-    builder.SetInsertPoint(body);
-
-    // First, allocate parameters on the stack and add them to the symbol table.
-    unsigned i = 0;
-    for (auto& arg: proc->args()) {
-        std::string arg_name = parameters[i++].get()->get_node_name();
-        llvm::Value* alloca = builder.CreateAlloca(arg.getType(), /*ArraySize=*/nullptr, arg_name);
-        arg.setName(arg_name);
-        builder.CreateStore(&arg, alloca);
-        named_values[arg_name] = alloca;
-    }
-
-    const auto& statements = node.get_statement_block()->get_statements();
-    for (const auto& statement: statements) {
-        // \todo: Support other statement types.
-        if (statement->is_local_list_statement() || statement->is_expression_statement())
-            statement->accept(*this);
-    }
-
-    values.clear();
-    // \todo: Add proper support for the symbol table.
-    named_values.clear();
+    visit_procedure_or_function(node);
 }
 
 void CodegenLLVMVisitor::visit_unary_expression(const ast::UnaryExpression& node) {
@@ -155,7 +196,7 @@ void CodegenLLVMVisitor::visit_unary_expression(const ast::UnaryExpression& node
 }
 
 void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
-    llvm::Value* var = builder.CreateLoad(named_values[node.get_node_name()]);
+    llvm::Value* var = builder.CreateLoad(local_named_values->lookup(node.get_node_name()));
     values.push_back(var);
 }
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 5a288d9836..801922cdc1 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -59,8 +59,8 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     // Stack to hold visited values
     std::vector<llvm::Value*> values;
 
-    // Mappings for named values for lookups
-    std::map<std::string, llvm::Value*> named_values;
+    // Pointer to the local symbol table.
+    llvm::ValueSymbolTable* local_named_values = nullptr;
 
   public:
     /**
@@ -74,10 +74,17 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
         , output_dir(output_dir)
         , builder(*context) {}
 
+    /**
+     * Visit nmodl function or procedure
+     * \param node the AST node representing the function or procedure in NMODL
+     */
+    void visit_procedure_or_function(const ast::Block& node);
+
     // Visitors
     void visit_binary_expression(const ast::BinaryExpression& node) override;
     void visit_boolean(const ast::Boolean& node) override;
     void visit_double(const ast::Double& node) override;
+    void visit_function_block(const ast::FunctionBlock& node) override;
     void visit_integer(const ast::Integer& node) override;
     void visit_local_list_statement(const ast::LocalListStatement& node) override;
     void visit_procedure_block(const ast::ProcedureBlock& node) override;
diff --git a/test/unit/codegen/llvm.cpp b/test/unit/codegen/llvm.cpp
index 270ce97ec0..44ca18391b 100644
--- a/test/unit/codegen/llvm.cpp
+++ b/test/unit/codegen/llvm.cpp
@@ -117,6 +117,44 @@ SCENARIO("Binary expression", "[visitor][llvm]") {
     }
 }
 
+//=============================================================================
+// FunctionBlock
+//=============================================================================
+
+SCENARIO("Function", "[visitor][llvm]") {
+    GIVEN("Simple function with arguments") {
+        std::string nmodl_text = R"(
+            FUNCTION foo(x) {
+               foo = x
+            }
+        )";
+
+        THEN("function is produced with arguments allocated on stack and a return instruction") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check function signature. The return type should be the default double type.
+            std::regex function_signature(R"(define double @foo\(double %x1\) \{)");
+            REQUIRE(std::regex_search(module_string, m, function_signature));
+
+            // Check that function arguments are allocated on the local stack.
+            std::regex alloca_instr(R"(%x = alloca double)");
+            std::regex store_instr(R"(store double %x1, double\* %x)");
+            REQUIRE(std::regex_search(module_string, m, alloca_instr));
+            REQUIRE(std::regex_search(module_string, m, store_instr));
+
+            // Check the return variable has also been allocated.
+            std::regex ret_instr(R"(%ret_foo = alloca double)");
+
+            // Check that the return value has been loaded and passed to terminator.
+            std::regex loaded(R"(%2 = load double, double\* %ret_foo)");
+            std::regex terminator(R"(ret double %2)");
+            REQUIRE(std::regex_search(module_string, m, loaded));
+            REQUIRE(std::regex_search(module_string, m, terminator));
+        }
+    }
+}
+
 //=============================================================================
 // LocalList and LocalVar
 //=============================================================================
@@ -156,8 +194,8 @@ SCENARIO("Procedure", "[visitor][llvm]") {
             std::string module_string = run_llvm_visitor(nmodl_text);
             std::smatch m;
 
-            // Check procedure has empty body
-            std::regex procedure(R"(define void @empty\(\) \{\n\})");
+            // Check procedure has empty body with a void return.
+            std::regex procedure(R"(define void @empty\(\) \{\n(\s)*ret void\n\})");
             REQUIRE(std::regex_search(module_string, m, procedure));
         }
     }
@@ -171,15 +209,19 @@ SCENARIO("Procedure", "[visitor][llvm]") {
             std::string module_string = run_llvm_visitor(nmodl_text);
             std::smatch m;
 
-            // Check procedure signature
+            // Check procedure signature.
             std::regex function_signature(R"(define void @with_argument\(double %x1\) \{)");
             REQUIRE(std::regex_search(module_string, m, function_signature));
 
-            // Check that procedure arguments are allocated on the local stack
+            // Check that procedure arguments are allocated on the local stack.
             std::regex alloca_instr(R"(%x = alloca double)");
             std::regex store_instr(R"(store double %x1, double\* %x)");
             REQUIRE(std::regex_search(module_string, m, alloca_instr));
             REQUIRE(std::regex_search(module_string, m, store_instr));
+
+            // Check terminator.
+            std::regex terminator(R"(ret void)");
+            REQUIRE(std::regex_search(module_string, m, terminator));
         }
     }
 }

From 13b129b536252aab1ead0612b00a43db2397c66b Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Mon, 28 Dec 2020 23:59:20 +0100
Subject: [PATCH 009/105] Add option to run LLVM optimisation passes (#471)

* Add option to run LLVM optimisation passes
  - update CLI argument from --llvm to llvm --ir --opt
  - --ir runs CodegenLLVMVicitor and emits LLVM IR
  - if --opt is passed, we run basic LLVM optimisation passes
  - update simple test to check optimisation passes
* Add function example in procedure.mod
* Add test for LLVM optimisation passes and dead code removal
---
 cmake/LLVMHelper.cmake                    |  2 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp | 25 ++++++++++++++++++
 src/codegen/llvm/codegen_llvm_visitor.hpp | 26 ++++++++++++++++--
 src/main.cpp                              | 25 +++++++++++++-----
 test/integration/mod/procedure.mod        |  8 +++++-
 test/unit/codegen/llvm.cpp                | 32 ++++++++++++++++++++---
 6 files changed, 105 insertions(+), 13 deletions(-)

diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake
index 982af48660..5d451697b9 100644
--- a/cmake/LLVMHelper.cmake
+++ b/cmake/LLVMHelper.cmake
@@ -5,7 +5,7 @@
 find_package(LLVM REQUIRED CONFIG)
 
 # include LLVM header and core library
-llvm_map_components_to_libnames(LLVM_LIBS_TO_LINK core)
+llvm_map_components_to_libnames(LLVM_LIBS_TO_LINK core native)
 set(CMAKE_REQUIRED_INCLUDES ${LLVM_INCLUDE_DIRS})
 set(CMAKE_REQUIRED_LIBRARIES ${LLVM_LIBS_TO_LINK})
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 6e1177cbec..d99e519dca 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -25,6 +25,24 @@ namespace codegen {
 /*                            Helper routines                                           */
 /****************************************************************************************/
 
+void CodegenLLVMVisitor::run_llvm_opt_passes() {
+    /// run some common optimisation passes that are commonly suggested
+    fpm.add(llvm::createInstructionCombiningPass());
+    fpm.add(llvm::createReassociatePass());
+    fpm.add(llvm::createGVNPass());
+    fpm.add(llvm::createCFGSimplificationPass());
+
+    /// initialize pass manager
+    fpm.doInitialization();
+
+    /// iterate over all functions and run the optimisation passes
+    auto& functions = module->getFunctionList();
+    for (auto& function: functions) {
+        llvm::verifyFunction(function);
+        fpm.run(function);
+    }
+}
+
 
 void CodegenLLVMVisitor::visit_procedure_or_function(const ast::Block& node) {
     const auto& name = node.get_node_name();
@@ -95,6 +113,7 @@ void CodegenLLVMVisitor::visit_procedure_or_function(const ast::Block& node) {
     local_named_values = nullptr;
 }
 
+
 /****************************************************************************************/
 /*                            Overloaded visitor routines                               */
 /****************************************************************************************/
@@ -173,6 +192,12 @@ void CodegenLLVMVisitor::visit_local_list_statement(const ast::LocalListStatemen
 
 void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     node.visit_children(*this);
+
+    if (opt_passes) {
+        logger->info("Running LLVM optimisation passes");
+        run_llvm_opt_passes();
+    }
+
     // Keep this for easier development (maybe move to debug mode later).
     std::cout << print_module();
 }
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 801922cdc1..6b94ecffbe 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -23,7 +23,12 @@
 
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
 
 namespace nmodl {
 namespace codegen {
@@ -56,12 +61,25 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
 
     llvm::IRBuilder<> builder;
 
+    llvm::legacy::FunctionPassManager fpm;
+
     // Stack to hold visited values
     std::vector<llvm::Value*> values;
 
     // Pointer to the local symbol table.
     llvm::ValueSymbolTable* local_named_values = nullptr;
 
+    // Run optimisation passes if true
+    bool opt_passes;
+
+    /**
+     *\brief Run LLVM optimisation passes on generated IR
+     *
+     * LLVM provides number of optimisation passes that can be run on the generated IR.
+     * Here we run common optimisation LLVM passes that benefits code optimisation.
+     */
+    void run_llvm_opt_passes();
+
   public:
     /**
      * \brief Constructs the LLVM code generator visitor
@@ -69,10 +87,14 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      * This constructor instantiates an NMODL LLVM code generator. This is
      * just template to work with initial implementation.
      */
-    CodegenLLVMVisitor(const std::string& mod_filename, const std::string& output_dir)
+    CodegenLLVMVisitor(const std::string& mod_filename,
+                       const std::string& output_dir,
+                       bool opt_passes)
         : mod_filename(mod_filename)
         , output_dir(output_dir)
-        , builder(*context) {}
+        , opt_passes(opt_passes)
+        , builder(*context)
+        , fpm(module.get()) {}
 
     /**
      * Visit nmodl function or procedure
diff --git a/src/main.cpp b/src/main.cpp
index c2fc1c2018..622171a278 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -164,6 +164,14 @@ int main(int argc, const char* argv[]) {
     /// floating point data type
     std::string data_type("double");
 
+#ifdef NMODL_LLVM_BACKEND
+    /// generate llvm IR
+    bool llvm_ir(false);
+
+    /// run llvm optimisation passes
+    bool llvm_opt_passes(false);
+#endif
+
     // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers,readability-magic-numbers)
     app.get_formatter()->column_width(40);
     app.set_help_all_flag("-H,--help-all", "Print this help message including all sub-commands");
@@ -173,10 +181,6 @@ int main(int argc, const char* argv[]) {
         ->ignore_case()
         ->check(CLI::IsMember({"trace", "debug", "info", "warning", "error", "critical", "off"}));
 
-#ifdef NMODL_LLVM_BACKEND
-    app.add_flag("--llvm", llvm_backend, "Enable LLVM based code generation")->ignore_case();
-#endif
-
     app.add_option("file", mod_files, "One or more MOD files to process")
         ->ignore_case()
         ->required()
@@ -286,6 +290,15 @@ int main(int argc, const char* argv[]) {
         optimize_ionvar_copies_codegen,
         fmt::format("Optimize copies of ion variables ({})", optimize_ionvar_copies_codegen))->ignore_case();
 
+#ifdef NMODL_LLVM_BACKEND
+    auto llvm_opt = app.add_subcommand("llvm", "LLVM code generation option")->ignore_case();
+    llvm_opt->add_flag("--ir",
+        llvm_ir,
+        "Generate LLVM IR ({})"_format(llvm_ir))->ignore_case();
+    llvm_opt->add_flag("--opt",
+        llvm_opt_passes,
+        "Run LLVM optimisation passes ({})"_format(llvm_opt_passes))->ignore_case();
+#endif
     // clang-format on
 
     CLI11_PARSE(app, argc, argv);
@@ -596,9 +609,9 @@ int main(int argc, const char* argv[]) {
             }
 
 #ifdef NMODL_LLVM_BACKEND
-            if (llvm_backend) {
+            if (llvm_ir) {
                 logger->info("Running LLVM backend code generator");
-                CodegenLLVMVisitor visitor(modfile, output_dir);
+                CodegenLLVMVisitor visitor(modfile, output_dir, llvm_opt_passes);
                 visitor.visit_program(*ast);
             }
 #endif
diff --git a/test/integration/mod/procedure.mod b/test/integration/mod/procedure.mod
index 4017b6a505..4a45af7d1e 100644
--- a/test/integration/mod/procedure.mod
+++ b/test/integration/mod/procedure.mod
@@ -21,7 +21,7 @@ PROCEDURE complex_sum(v) {
     }
 }
 
-PROCEDURE loop_function(v) {
+PROCEDURE loop_proc(v) {
     LOCAL i
     i = 0
     WHILE(i < 10) {
@@ -29,3 +29,9 @@ PROCEDURE loop_function(v) {
         i = i + 1
     }
 }
+
+FUNCTION square(x) {
+    LOCAL res
+    res = x * x
+    square = res
+}
diff --git a/test/unit/codegen/llvm.cpp b/test/unit/codegen/llvm.cpp
index 44ca18391b..9c86e8c30a 100644
--- a/test/unit/codegen/llvm.cpp
+++ b/test/unit/codegen/llvm.cpp
@@ -23,14 +23,14 @@ using nmodl::parser::NmodlDriver;
 // Utility to get LLVM module as a string
 //=============================================================================
 
-std::string run_llvm_visitor(const std::string& text) {
+std::string run_llvm_visitor(const std::string& text, bool opt = false) {
     NmodlDriver driver;
     const auto& ast = driver.parse_string(text);
 
     SymtabVisitor().visit_program(*ast);
     InlineVisitor().visit_program(*ast);
 
-    codegen::CodegenLLVMVisitor llvm_visitor("unknown", ".");
+    codegen::CodegenLLVMVisitor llvm_visitor("unknown", ".", opt);
     llvm_visitor.visit_program(*ast);
     return llvm_visitor.print_module();
 }
@@ -52,10 +52,11 @@ SCENARIO("Binary expression", "[visitor][llvm]") {
             std::string module_string = run_llvm_visitor(nmodl_text);
             std::smatch m;
 
-            // Check the values are loaded correctly and added
             std::regex rhs(R"(%1 = load double, double\* %b)");
             std::regex lhs(R"(%2 = load double, double\* %a)");
             std::regex res(R"(%3 = fadd double %2, %1)");
+
+            // Check the values are loaded correctly and added
             REQUIRE(std::regex_search(module_string, m, rhs));
             REQUIRE(std::regex_search(module_string, m, lhs));
             REQUIRE(std::regex_search(module_string, m, res));
@@ -255,3 +256,28 @@ SCENARIO("Unary expression", "[visitor][llvm]") {
         }
     }
 }
+
+//=============================================================================
+// Optimization : dead code removal
+//=============================================================================
+
+SCENARIO("Dead code removal", "[visitor][llvm][opt]") {
+    GIVEN("Procedure using local variables, without any side effects") {
+        std::string nmodl_text = R"(
+            PROCEDURE add(a, b) {
+                LOCAL i
+                i = a + b
+            }
+        )";
+
+        THEN("with optimisation enabled, all ops are eliminated") {
+            std::string module_string = run_llvm_visitor(nmodl_text, true);
+            std::smatch m;
+
+            // Check if the values are optimised out
+            std::regex empty_proc(
+                R"(define void @add\(double %a1, double %b2\) \{\n(\s)*ret void\n\})");
+            REQUIRE(std::regex_search(module_string, m, empty_proc));
+        }
+    }
+}
\ No newline at end of file

From 9c127f2016aba6d8fd4195657612a85c1cf5ee4b Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Wed, 30 Dec 2020 22:14:00 +0300
Subject: [PATCH 010/105] Add function call LLVM code generation (#477)

This patch adds support for function call code generation, particularly:

- User-defined procedures and functions can now lowered to LLVM IR.
- A framework for external method calls (e.g. sin, exp, etc.) has been created, currently `exp` and `pow` are supported.
- Corresponding tests added.

fixes #472
---
 src/codegen/llvm/codegen_llvm_visitor.cpp | 103 +++++++++++++++++++--
 src/codegen/llvm/codegen_llvm_visitor.hpp |  32 ++++++-
 test/unit/CMakeLists.txt                  |   3 +-
 test/unit/codegen/llvm.cpp                | 104 +++++++++++++++++++++-
 4 files changed, 231 insertions(+), 11 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index d99e519dca..430f3d78de 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -7,8 +7,8 @@
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "ast/all.hpp"
+#include "codegen/codegen_helper_visitor.hpp"
 #include "visitors/rename_visitor.hpp"
-#include "visitors/visitor_utils.hpp"
 
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
@@ -44,7 +44,56 @@ void CodegenLLVMVisitor::run_llvm_opt_passes() {
 }
 
 
-void CodegenLLVMVisitor::visit_procedure_or_function(const ast::Block& node) {
+void CodegenLLVMVisitor::create_external_method_call(const std::string& name,
+                                                     const ast::ExpressionVector& arguments) {
+    std::vector<llvm::Value*> argument_values;
+    std::vector<llvm::Type*> argument_types;
+    for (const auto& arg: arguments) {
+        arg->accept(*this);
+        llvm::Value* value = values.back();
+        llvm::Type* type = value->getType();
+        values.pop_back();
+        argument_types.push_back(type);
+        argument_values.push_back(value);
+    }
+
+#define DISPATCH(method_name, intrinsic)                                                           \
+    if (name == method_name) {                                                                     \
+        llvm::Value* result = builder.CreateIntrinsic(intrinsic, argument_types, argument_values); \
+        values.push_back(result);                                                                  \
+        return;                                                                                    \
+    }
+
+    DISPATCH("exp", llvm::Intrinsic::exp);
+    DISPATCH("pow", llvm::Intrinsic::pow);
+#undef DISPATCH
+
+    throw std::runtime_error("Error: External method" + name + " is not currently supported");
+}
+
+void CodegenLLVMVisitor::create_function_call(llvm::Function* func,
+                                              const std::string& name,
+                                              const ast::ExpressionVector& arguments) {
+    // Check that function is called with the expected number of arguments.
+    if (arguments.size() != func->arg_size()) {
+        throw std::runtime_error("Error: Incorrect number of arguments passed");
+    }
+
+    // Process each argument and add it to a vector to pass to the function call instruction. Note
+    // that type checks are not needed here as NMODL operates on doubles by default.
+    std::vector<llvm::Value*> argument_values;
+    for (const auto& arg: arguments) {
+        arg->accept(*this);
+        llvm::Value* value = values.back();
+        values.pop_back();
+        argument_values.push_back(value);
+    }
+
+    llvm::Value* call = builder.CreateCall(func, argument_values);
+    values.push_back(call);
+}
+
+void CodegenLLVMVisitor::emit_procedure_or_function_declaration(const ast::Block& node) {
     const auto& name = node.get_node_name();
     const auto& parameters = node.get_parameters();
 
@@ -57,11 +106,17 @@ void CodegenLLVMVisitor::visit_procedure_or_function(const ast::Block& node) {
     llvm::Type* return_type = node.is_function_block() ? llvm::Type::getDoubleTy(*context)
                                                        : llvm::Type::getVoidTy(*context);
 
-    llvm::Function* func =
-        llvm::Function::Create(llvm::FunctionType::get(return_type, arg_types, /*isVarArg=*/false),
-                               llvm::Function::ExternalLinkage,
-                               name,
-                               *module);
+    // Create a function that is automatically inserted into module's symbol table.
+    llvm::Function::Create(llvm::FunctionType::get(return_type, arg_types, /*isVarArg=*/false),
+                           llvm::Function::ExternalLinkage,
+                           name,
+                           *module);
+}
+
+void CodegenLLVMVisitor::visit_procedure_or_function(const ast::Block& node) {
+    const auto& name = node.get_node_name();
+    const auto& parameters = node.get_parameters();
+    llvm::Function* func = module->getFunction(name);
 
     // Create the entry basic block of the function/procedure and point the local named values table
     // to the symbol table.
@@ -175,6 +230,22 @@ void CodegenLLVMVisitor::visit_function_block(const ast::FunctionBlock& node) {
     visit_procedure_or_function(node);
 }
 
+void CodegenLLVMVisitor::visit_function_call(const ast::FunctionCall& node) {
+    const auto& name = node.get_node_name();
+    auto func = module->getFunction(name);
+    if (func) {
+        create_function_call(func, name, node.get_arguments());
+    } else {
+        auto symbol = sym_tab->lookup(name);
+        if (symbol && symbol->has_any_property(symtab::syminfo::NmodlType::extern_method)) {
+            create_external_method_call(name, node.get_arguments());
+        } else {
+            throw std::runtime_error("Error: Unknown function name: " + name +
+                                     ". (External functions references are not supported)");
+        }
+    }
+}
+
 void CodegenLLVMVisitor::visit_integer(const ast::Integer& node) {
     const auto& constant = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*context),
                                                   node.get_value());
@@ -191,6 +262,24 @@ void CodegenLLVMVisitor::visit_local_list_statement(const ast::LocalListStatemen
 }
 
 void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
+    // Before generating LLVM, gather information about AST. For now, information about functions
+    // and procedures is used only.
+    CodegenHelperVisitor v;
+    CodegenInfo info = v.analyze(node);
+
+    // For every function and procedure, generate its declaration. Thus, we can look up
+    // `llvm::Function` in the symbol table in the module.
+    for (const auto& func: info.functions) {
+        emit_procedure_or_function_declaration(*func);
+    }
+    for (const auto& proc: info.procedures) {
+        emit_procedure_or_function_declaration(*proc);
+    }
+
+    // Set the AST symbol table.
+    sym_tab = node.get_symbol_table();
+
+    // Proceed with code generation.
     node.visit_children(*this);
 
     if (opt_passes) {
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 6b94ecffbe..32347bdabd 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -18,6 +18,7 @@
 #include <ostream>
 #include <string>
 
+#include "symtab/symbol_table.hpp"
 #include "utils/logger.hpp"
 #include "visitors/ast_visitor.hpp"
 
@@ -69,7 +70,10 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     // Pointer to the local symbol table.
     llvm::ValueSymbolTable* local_named_values = nullptr;
 
-    // Run optimisation passes if true
+    // Pointer to AST symbol table.
+    symtab::SymbolTable* sym_tab;
+
+    // Run optimisation passes if true.
     bool opt_passes;
 
     /**
@@ -96,6 +100,31 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
         , builder(*context)
         , fpm(module.get()) {}
 
+    /**
+     * Create a function call to an external method
+     * \param name external method name
+     * \param arguments expressions passed as arguments to the given external method
+     */
+    void create_external_method_call(const std::string& name,
+                                     const ast::ExpressionVector& arguments);
+
+    /**
+     * Create a function call to NMODL function or procedure in the same mod file
+     * \param func LLVM function corresponding ti this call
+     * \param name function name
+     * \param arguments expressions passed as arguments to the function call
+     */
+    void create_function_call(llvm::Function* func,
+                              const std::string& name,
+                              const ast::ExpressionVector& arguments);
+
+    /**
+     * Emit function or procedure declaration in LLVM given the node
+     *
+     * \param node the AST node representing the function or procedure in NMODL
+     */
+    void emit_procedure_or_function_declaration(const ast::Block& node);
+
     /**
      * Visit nmodl function or procedure
      * \param node the AST node representing the function or procedure in NMODL
@@ -107,6 +136,7 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void visit_boolean(const ast::Boolean& node) override;
     void visit_double(const ast::Double& node) override;
     void visit_function_block(const ast::FunctionBlock& node) override;
+    void visit_function_call(const ast::FunctionCall& node) override;
     void visit_integer(const ast::Integer& node) override;
     void visit_local_list_statement(const ast::LocalListStatement& node) override;
     void visit_procedure_block(const ast::ProcedureBlock& node) override;
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 13929666d1..a5d95719e9 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -111,13 +111,14 @@ if(NMODL_ENABLE_LLVM)
   add_executable(testllvm visitor/main.cpp codegen/llvm.cpp)
   target_link_libraries(
     testllvm
+    llvm_codegen
+    codegen
     visitor
     symtab
     lexer
     util
     test_util
     printer
-    llvm_codegen
     ${NMODL_WRAPPER_LIBS}
     ${LLVM_LIBS_TO_LINK})
   set(CODEGEN_TEST testllvm)
diff --git a/test/unit/codegen/llvm.cpp b/test/unit/codegen/llvm.cpp
index 9c86e8c30a..d2c0a65e86 100644
--- a/test/unit/codegen/llvm.cpp
+++ b/test/unit/codegen/llvm.cpp
@@ -12,7 +12,6 @@
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "parser/nmodl_driver.hpp"
 #include "visitors/checkparent_visitor.hpp"
-#include "visitors/inline_visitor.hpp"
 #include "visitors/symtab_visitor.hpp"
 
 using namespace nmodl;
@@ -28,7 +27,6 @@ std::string run_llvm_visitor(const std::string& text, bool opt = false) {
     const auto& ast = driver.parse_string(text);
 
     SymtabVisitor().visit_program(*ast);
-    InlineVisitor().visit_program(*ast);
 
     codegen::CodegenLLVMVisitor llvm_visitor("unknown", ".", opt);
     llvm_visitor.visit_program(*ast);
@@ -156,6 +154,108 @@ SCENARIO("Function", "[visitor][llvm]") {
     }
 }
 
+//=============================================================================
+// FunctionCall
+//=============================================================================
+
+SCENARIO("Function call", "[visitor][llvm]") {
+    GIVEN("A call to procedure") {
+        std::string nmodl_text = R"(
+            PROCEDURE bar() {}
+            FUNCTION foo() {
+                bar()
+            }
+        )";
+
+        THEN("a void call instruction is created") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check for call instruction.
+            std::regex call(R"(call void @bar\(\))");
+            REQUIRE(std::regex_search(module_string, m, call));
+        }
+    }
+
+    GIVEN("A call to function declared below the caller") {
+        std::string nmodl_text = R"(
+            FUNCTION foo(x) {
+                foo = 4 * bar()
+            }
+            FUNCTION bar() {
+                bar = 5
+            }
+        )";
+
+        THEN("a correct call instruction is created") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check for call instruction.
+            std::regex call(R"(%[0-9]+ = call double @bar\(\))");
+            REQUIRE(std::regex_search(module_string, m, call));
+        }
+    }
+
+    GIVEN("A call to function with arguments") {
+        std::string nmodl_text = R"(
+            FUNCTION foo(x, y) {
+                foo = 4 * x - y
+            }
+            FUNCTION bar(i) {
+                bar = foo(i, 4)
+            }
+        )";
+
+        THEN("arguments are processed before the call and passed to call instruction") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check correct arguments.
+            std::regex i(R"(%1 = load double, double\* %i)");
+            std::regex call(R"(call double @foo\(double %1, double 4.000000e\+00\))");
+            REQUIRE(std::regex_search(module_string, m, i));
+            REQUIRE(std::regex_search(module_string, m, call));
+        }
+    }
+
+    GIVEN("A call to external method") {
+        std::string nmodl_text = R"(
+            FUNCTION bar(i) {
+                bar = exp(i)
+            }
+        )";
+
+        THEN("LLVM intrinsic corresponding to this method is created") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check for intrinsic declaration.
+            std::regex exp(R"(declare double @llvm\.exp\.f64\(double\))");
+            REQUIRE(std::regex_search(module_string, m, exp));
+
+            // Check the correct call is made.
+            std::regex call(R"(call double @llvm\.exp\.f64\(double %[0-9]+\))");
+            REQUIRE(std::regex_search(module_string, m, call));
+        }
+    }
+
+    GIVEN("A call to function with the wrong number of arguments") {
+        std::string nmodl_text = R"(
+            FUNCTION foo(x, y) {
+                foo = 4 * x - y
+            }
+            FUNCTION bar(i) {
+                bar = foo(i)
+            }
+        )";
+
+        THEN("a runtime error is thrown") {
+            REQUIRE_THROWS_AS(run_llvm_visitor(nmodl_text), std::runtime_error);
+        }
+    }
+}
+
 //=============================================================================
 // LocalList and LocalVar
 //=============================================================================

From 470d54aa46860952155d0139521842b041ae9ad2 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 31 Dec 2020 00:49:13 +0300
Subject: [PATCH 011/105] Support for IndexedName codegen (#478)

LLVM code generation for `IndexedName`s.

- Added code generation for initialising arrays in LOCAL blocks (with both integer constants and macros).
- Added support for indexing arrays.

fixes #467
---
 src/codegen/llvm/codegen_llvm_visitor.cpp |  88 +++++++++++++++--
 src/codegen/llvm/codegen_llvm_visitor.hpp |  29 ++++++
 test/unit/codegen/llvm.cpp                | 111 ++++++++++++++++++++++
 3 files changed, 220 insertions(+), 8 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 430f3d78de..b2a09fdd96 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -25,6 +25,44 @@ namespace codegen {
 /*                            Helper routines                                           */
 /****************************************************************************************/
 
+bool CodegenLLVMVisitor::check_array_bounds(const ast::IndexedName& node, unsigned index) {
+    llvm::Type* array_type =
+        local_named_values->lookup(node.get_node_name())->getType()->getPointerElementType();
+    unsigned length = array_type->getArrayNumElements();
+    return 0 <= index && index < length;
+}
+
+llvm::Value* CodegenLLVMVisitor::create_gep(const std::string& name, unsigned index) {
+    llvm::Type* index_type = llvm::Type::getInt32Ty(*context);
+    std::vector<llvm::Value*> indices;
+    indices.push_back(llvm::ConstantInt::get(index_type, 0));
+    indices.push_back(llvm::ConstantInt::get(index_type, index));
+
+    return builder.CreateInBoundsGEP(local_named_values->lookup(name), indices);
+}
+
+llvm::Value* CodegenLLVMVisitor::codegen_indexed_name(const ast::IndexedName& node) {
+    unsigned index = get_array_index_or_length(node);
+
+    // Check if index is within array bounds.
+    if (!check_array_bounds(node, index))
+        throw std::runtime_error("Error: Index is out of bounds");
+
+    return create_gep(node.get_node_name(), index);
+}
+
+unsigned CodegenLLVMVisitor::get_array_index_or_length(const ast::IndexedName& indexed_name) {
+    auto integer = std::dynamic_pointer_cast<ast::Integer>(indexed_name.get_length());
+    if (!integer)
+        throw std::runtime_error("Error: expecting integer index or length");
+
+    // Check if integer value is taken from a macro.
+    if (!integer->get_macro())
+        return integer->get_value();
+    const auto& macro = sym_tab->lookup(integer->get_macro()->get_node_name());
+    return static_cast<unsigned>(*macro->get_value());
+}
+
 void CodegenLLVMVisitor::run_llvm_opt_passes() {
     /// run some common optimisation passes that are commonly suggested
     fpm.add(llvm::createInstructionCombiningPass());
@@ -43,7 +81,6 @@ void CodegenLLVMVisitor::run_llvm_opt_passes() {
     }
 }
 
-
 void CodegenLLVMVisitor::create_external_method_call(const std::string& name,
                                                      const ast::ExpressionVector& arguments) {
     std::vector<llvm::Value*> argument_values;
@@ -187,8 +224,17 @@ void CodegenLLVMVisitor::visit_binary_expression(const ast::BinaryExpression& no
         if (!var) {
             throw std::runtime_error("Error: only VarName assignment is currently supported.\n");
         }
-        llvm::Value* alloca = local_named_values->lookup(var->get_node_name());
-        builder.CreateStore(rhs, alloca);
+
+        const auto& identifier = var->get_name();
+        if (identifier->is_name()) {
+            llvm::Value* alloca = local_named_values->lookup(var->get_node_name());
+            builder.CreateStore(rhs, alloca);
+        } else if (identifier->is_indexed_name()) {
+            auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
+            builder.CreateStore(rhs, codegen_indexed_name(*indexed_name));
+        } else {
+            throw std::runtime_error("Error: Unsupported variable type");
+        }
         return;
     }
 
@@ -254,10 +300,22 @@ void CodegenLLVMVisitor::visit_integer(const ast::Integer& node) {
 
 void CodegenLLVMVisitor::visit_local_list_statement(const ast::LocalListStatement& node) {
     for (const auto& variable: node.get_variables()) {
-        // LocalVar always stores a Name.
-        auto name = variable->get_node_name();
-        llvm::Type* var_type = llvm::Type::getDoubleTy(*context);
-        llvm::Value* alloca = builder.CreateAlloca(var_type, /*ArraySize=*/nullptr, name);
+        std::string name = variable->get_node_name();
+        const auto& identifier = variable->get_name();
+        // Local variable can be a scalar (Node AST class) or an array (IndexedName AST class). For
+        // each case, create memory allocations with the corresponding LLVM type.
+        llvm::Type* var_type;
+        if (identifier->is_indexed_name()) {
+            auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
+            unsigned length = get_array_index_or_length(*indexed_name);
+            var_type = llvm::ArrayType::get(llvm::Type::getDoubleTy(*context), length);
+        } else if (identifier->is_name()) {
+            // This case corresponds to a scalar local variable. Its type is double by default.
+            var_type = llvm::Type::getDoubleTy(*context);
+        } else {
+            throw std::runtime_error("Error: Unsupported local variable type");
+        }
+        builder.CreateAlloca(var_type, /*ArraySize=*/nullptr, name);
     }
 }
 
@@ -310,7 +368,21 @@ void CodegenLLVMVisitor::visit_unary_expression(const ast::UnaryExpression& node
 }
 
 void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
-    llvm::Value* var = builder.CreateLoad(local_named_values->lookup(node.get_node_name()));
+    const auto& identifier = node.get_name();
+    if (!identifier->is_name() && !identifier->is_indexed_name())
+        throw std::runtime_error("Error: Unsupported variable type");
+
+    llvm::Value* ptr;
+    if (identifier->is_name())
+        ptr = local_named_values->lookup(node.get_node_name());
+
+    if (identifier->is_indexed_name()) {
+        auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
+        ptr = codegen_indexed_name(*indexed_name);
+    }
+
+    // Finally, load the variable from the pointer value.
+    llvm::Value* var = builder.CreateLoad(ptr);
     values.push_back(var);
 }
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 32347bdabd..be4eb04867 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -100,6 +100,35 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
         , builder(*context)
         , fpm(module.get()) {}
 
+    /**
+     * Checks if array index specified by the given IndexedName is within bounds
+     * \param node IndexedName representing array
+     * \return     \c true if the index is within bounds
+     */
+    bool check_array_bounds(const ast::IndexedName& node, unsigned index);
+
+    /**
+     * Generates LLVM code for the given IndexedName
+     * \param node IndexedName NMODL AST node
+     * \return LLVM code generated for this AST node
+     */
+    llvm::Value* codegen_indexed_name(const ast::IndexedName& node);
+
+    /**
+     * Returns GEP instruction to 1D array
+     * \param name 1D array name
+     * \param index element index
+     * \return GEP instruction value
+     */
+    llvm::Value* create_gep(const std::string& name, unsigned index);
+
+    /**
+     * Returns array index or length from given IndexedName
+     * \param node IndexedName representing array
+     * \return array index or length
+     */
+    unsigned get_array_index_or_length(const ast::IndexedName& node);
+
     /**
      * Create a function call to an external method
      * \param name external method name
diff --git a/test/unit/codegen/llvm.cpp b/test/unit/codegen/llvm.cpp
index d2c0a65e86..0ceadbe6f1 100644
--- a/test/unit/codegen/llvm.cpp
+++ b/test/unit/codegen/llvm.cpp
@@ -116,6 +116,31 @@ SCENARIO("Binary expression", "[visitor][llvm]") {
     }
 }
 
+//=============================================================================
+// Define
+//=============================================================================
+
+SCENARIO("Define", "[visitor][llvm]") {
+    GIVEN("Procedure with array variable of length specified by DEFINE") {
+        std::string nmodl_text = R"(
+            DEFINE N 100
+
+            PROCEDURE foo() {
+                LOCAL x[N]
+            }
+        )";
+
+        THEN("macro is expanded and array is allocated") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check stack allocations for i and j
+            std::regex array(R"(%x = alloca \[100 x double\])");
+            REQUIRE(std::regex_search(module_string, m, array));
+        }
+    }
+}
+
 //=============================================================================
 // FunctionBlock
 //=============================================================================
@@ -256,6 +281,92 @@ SCENARIO("Function call", "[visitor][llvm]") {
     }
 }
 
+//=============================================================================
+// IndexedName
+//=============================================================================
+
+SCENARIO("Indexed name", "[visitor][llvm]") {
+    GIVEN("Procedure with a local array variable") {
+        std::string nmodl_text = R"(
+            PROCEDURE foo() {
+                LOCAL x[2]
+            }
+        )";
+
+        THEN("array is allocated") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            std::regex array(R"(%x = alloca \[2 x double\])");
+            REQUIRE(std::regex_search(module_string, m, array));
+        }
+    }
+
+    GIVEN("Procedure with a local array assignment") {
+        std::string nmodl_text = R"(
+            PROCEDURE foo() {
+                LOCAL x[2]
+                x[1] = 3
+            }
+        )";
+
+        THEN("element is stored to the array") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check GEP is created correctly to pint at array element.
+            std::regex GEP(
+                R"(%1 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i32 0, i32 1)");
+            REQUIRE(std::regex_search(module_string, m, GEP));
+
+            // Check the value is stored to the pointer.
+            std::regex store(R"(store double 3.000000e\+00, double\* %1)");
+            REQUIRE(std::regex_search(module_string, m, store));
+        }
+    }
+
+    GIVEN("Procedure with a assignment of array element") {
+        std::string nmodl_text = R"(
+            PROCEDURE foo() {
+                LOCAL x[2], y
+                x[1] = 3
+                y = x[1]
+            }
+        )";
+
+        THEN("array element is stored to the variable") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check GEP is created correctly to pint at array element.
+            std::regex GEP(
+                R"(%2 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i32 0, i32 1)");
+            REQUIRE(std::regex_search(module_string, m, GEP));
+
+            // Check the value is loaded from the pointer.
+            std::regex load(R"(%3 = load double, double\* %2)");
+            REQUIRE(std::regex_search(module_string, m, load));
+
+            // Check the value is stored to the the variable.
+            std::regex store(R"(store double %3, double\* %y)");
+            REQUIRE(std::regex_search(module_string, m, store));
+        }
+    }
+
+    GIVEN("Array with out of bounds access") {
+        std::string nmodl_text = R"(
+            PROCEDURE foo() {
+                LOCAL x[2]
+                x[5] = 3
+            }
+        )";
+
+        THEN("error is thrown") {
+            REQUIRE_THROWS_AS(run_llvm_visitor(nmodl_text), std::runtime_error);
+        }
+    }
+}
+
 //=============================================================================
 // LocalList and LocalVar
 //=============================================================================

From 1674f3b520767f58384fb6f05fd1b8c39dedb334 Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Wed, 6 Jan 2021 23:51:59 +0100
Subject: [PATCH 012/105] Improvements for code generation specific
 transformations (#483)

NMODL AST needs various transformation to generate C++
code or LLVM IR. This PR is begining of AST transformations
to simplify code generation backends.

* New CodegenLLVMHelperVisitor to perform various AST
  transformations to simplify code generation for various
  backends and simulators.
* CodegenLLVMHelperVisitor is currently limited to LLVM
  backend to simplify initial implementation and keep
  C++ based backends working.
* CodegenLLVMHelperVisitor now handles FUNCTIONS and
  PROCEDURES blocks
  -  Replace LocalListStatement with CodegenVarStatement
  - Added new AST types for code generation
    - CodegenVar to represent variable used for code generation
    - CodegenVarType to represent codegen variable
    - CodegenVarListStatement to represent list of CodegenVar
    - CodegenStruct will be used in future to represent struct
      like NrnThread or Mechanism class

See #474
---
 src/codegen/llvm/CMakeLists.txt               |   7 +-
 .../llvm/codegen_llvm_helper_visitor.cpp      | 113 ++++++++++++++++++
 .../llvm/codegen_llvm_helper_visitor.hpp      |  50 ++++++++
 src/codegen/llvm/codegen_llvm_visitor.cpp     |   8 ++
 src/language/code_generator.cmake             |   8 ++
 src/language/codegen.yaml                     | 109 ++++++++++++++++-
 src/language/nmodl.yaml                       |   2 +-
 src/language/node_info.py                     |   2 +
 src/language/nodes.py                         |   4 +
 src/language/templates/ast/ast_decl.hpp       |  10 ++
 .../templates/visitors/nmodl_visitor.cpp      |   3 +
 src/main.cpp                                  |   2 +
 test/integration/mod/procedure.mod            |   2 +-
 test/unit/codegen/llvm.cpp                    |   2 +-
 14 files changed, 315 insertions(+), 7 deletions(-)
 create mode 100644 src/codegen/llvm/codegen_llvm_helper_visitor.cpp
 create mode 100644 src/codegen/llvm/codegen_llvm_helper_visitor.hpp

diff --git a/src/codegen/llvm/CMakeLists.txt b/src/codegen/llvm/CMakeLists.txt
index 71ecca338c..db16d4072c 100644
--- a/src/codegen/llvm/CMakeLists.txt
+++ b/src/codegen/llvm/CMakeLists.txt
@@ -1,8 +1,11 @@
 # =============================================================================
 # Codegen sources
 # =============================================================================
-set(LLVM_CODEGEN_SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_visitor.cpp
-                              ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_visitor.hpp)
+set(LLVM_CODEGEN_SOURCE_FILES
+    ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_visitor.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_visitor.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_helper_visitor.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_helper_visitor.hpp)
 
 # =============================================================================
 # LLVM codegen library
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
new file mode 100644
index 0000000000..c52cc92a3d
--- /dev/null
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -0,0 +1,113 @@
+
+/*************************************************************************
+ * Copyright (C) 2018-2019 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include "codegen_llvm_helper_visitor.hpp"
+
+#include "ast/all.hpp"
+#include "utils/logger.hpp"
+#include "visitors/visitor_utils.hpp"
+
+namespace nmodl {
+namespace codegen {
+
+using namespace fmt::literals;
+
+void CodegenLLVMHelperVisitor::visit_statement_block(ast::StatementBlock& node) {
+    node.visit_children(*this);
+
+    /// if local list statement exist, we have to replace it
+    auto local_statement = visitor::get_local_list_statement(node);
+    if (local_statement) {
+        /// create codegen variables from local variables
+        ast::CodegenVarVector variables;
+        for (const auto& var: local_statement->get_variables()) {
+            variables.emplace_back(new ast::CodegenVar(0, var->get_name()->clone()));
+        }
+
+        /// remove local list statement now
+        const auto& statements = node.get_statements();
+        node.erase_statement(statements.begin());
+
+        /// create new codegen variable statement
+        auto type = new ast::CodegenVarType(ast::AstNodeType::DOUBLE);
+        auto statement = std::make_shared<ast::CodegenVarListStatement>(type, variables);
+
+        /// insert codegen variable statement
+        node.insert_statement(statements.begin(), statement);
+    }
+}
+
+void CodegenLLVMHelperVisitor::add_function_procedure_node(ast::Block& node) {
+    std::string function_name = node.get_node_name();
+
+    const auto& source_node_type = node.get_node_type();
+    auto name = new ast::Name(new ast::String(function_name));
+    auto return_var = new ast::Name(new ast::String("ret_" + function_name));
+    ast::CodegenVarType* var_type = nullptr;
+    ast::CodegenVarType* return_type = nullptr;
+
+    /// return type based on node type
+    bool is_function = source_node_type == ast::AstNodeType::FUNCTION_BLOCK;
+    if (is_function) {
+        var_type = new ast::CodegenVarType(ast::AstNodeType::DOUBLE);
+    } else {
+        var_type = new ast::CodegenVarType(ast::AstNodeType::INTEGER);
+    }
+
+    /// return type is same as variable type
+    return_type = var_type->clone();
+
+    /// function body and it's statement
+    auto block = node.get_statement_block()->clone();
+    const auto& statements = block->get_statements();
+
+    /// insert return variable at the start of the block
+    ast::CodegenVarVector codegen_vars;
+    codegen_vars.emplace_back(new ast::CodegenVar(0, return_var->clone()));
+    auto statement = std::make_shared<ast::CodegenVarListStatement>(var_type, codegen_vars);
+    block->insert_statement(statements.begin(), statement);
+
+    /// add return statement
+    auto return_statement = new ast::CodegenReturnStatement(return_var);
+    block->emplace_back_statement(return_statement);
+
+    /// prepare arguments
+    ast::CodegenArgumentVector code_arguments;
+    const auto& arguments = node.get_parameters();
+    for (const auto& arg: arguments) {
+        auto type = new ast::CodegenVarType(ast::AstNodeType::DOUBLE);
+        auto var = arg->get_name()->clone();
+        code_arguments.emplace_back(new ast::CodegenArgument(type, var));
+    }
+
+    /// add new node to AST
+    auto function =
+        std::make_shared<ast::CodegenFunction>(return_type, name, code_arguments, block);
+    codegen_functions.push_back(function);
+}
+
+void CodegenLLVMHelperVisitor::visit_procedure_block(ast::ProcedureBlock& node) {
+    node.visit_children(*this);
+    add_function_procedure_node(node);
+}
+
+void CodegenLLVMHelperVisitor::visit_function_block(ast::FunctionBlock& node) {
+    node.visit_children(*this);
+    add_function_procedure_node(node);
+}
+
+void CodegenLLVMHelperVisitor::visit_program(ast::Program& node) {
+    logger->info("Running CodegenLLVMHelperVisitor");
+    node.visit_children(*this);
+    for (auto& fun: codegen_functions) {
+        node.emplace_back_node(fun);
+    }
+}
+
+}  // namespace codegen
+}  // namespace nmodl
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
new file mode 100644
index 0000000000..b7ff57aec1
--- /dev/null
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -0,0 +1,50 @@
+/*************************************************************************
+ * Copyright (C) 2018-2019 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+/**
+ * \file
+ * \brief \copybrief nmodl::codegen::CodegenLLVMHelperVisitor
+ */
+
+#include <string>
+
+#include "codegen/codegen_info.hpp"
+#include "symtab/symbol_table.hpp"
+#include "visitors/ast_visitor.hpp"
+
+namespace nmodl {
+namespace codegen {
+
+/**
+ * @addtogroup llvm_codegen_details
+ * @{
+ */
+
+/**
+ * \class CodegenLLVMHelperVisitor
+ * \brief Helper visitor to gather AST information to help LLVM code generation
+ */
+class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
+    std::vector<std::shared_ptr<ast::CodegenFunction>> codegen_functions;
+
+    void add_function_procedure_node(ast::Block& node);
+
+  public:
+    CodegenLLVMHelperVisitor() = default;
+
+    void visit_statement_block(ast::StatementBlock& node) override;
+    void visit_procedure_block(ast::ProcedureBlock& node) override;
+    void visit_function_block(ast::FunctionBlock& node) override;
+    void visit_program(ast::Program& node) override;
+};
+
+/** @} */  // end of llvm_codegen_details
+
+}  // namespace codegen
+}  // namespace nmodl
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index b2a09fdd96..0fa0864d9a 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -6,6 +6,8 @@
  *************************************************************************/
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "codegen/llvm/codegen_llvm_helper_visitor.hpp"
+
 #include "ast/all.hpp"
 #include "codegen/codegen_helper_visitor.hpp"
 #include "visitors/rename_visitor.hpp"
@@ -347,6 +349,12 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
 
     // Keep this for easier development (maybe move to debug mode later).
     std::cout << print_module();
+
+    // not used yet
+    {
+        CodegenLLVMHelperVisitor v;
+        v.visit_program(const_cast<ast::Program&>(node));
+    }
 }
 
 void CodegenLLVMVisitor::visit_procedure_block(const ast::ProcedureBlock& node) {
diff --git a/src/language/code_generator.cmake b/src/language/code_generator.cmake
index 36d10117e4..356e05283f 100644
--- a/src/language/code_generator.cmake
+++ b/src/language/code_generator.cmake
@@ -65,6 +65,14 @@ set(AST_GENERATED_SOURCES
     ${PROJECT_BINARY_DIR}/src/ast/block_comment.hpp
     ${PROJECT_BINARY_DIR}/src/ast/boolean.hpp
     ${PROJECT_BINARY_DIR}/src/ast/breakpoint_block.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/codegen_argument.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/codegen_for_statement.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/codegen_function.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/codegen_return_statement.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/codegen_struct.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/codegen_var.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/codegen_var_list_statement.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/codegen_var_type.hpp
     ${PROJECT_BINARY_DIR}/src/ast/compartment.hpp
     ${PROJECT_BINARY_DIR}/src/ast/conductance_hint.hpp
     ${PROJECT_BINARY_DIR}/src/ast/conserve.hpp
diff --git a/src/language/codegen.yaml b/src/language/codegen.yaml
index 5511d14478..d8c0a9d1ee 100644
--- a/src/language/codegen.yaml
+++ b/src/language/codegen.yaml
@@ -29,6 +29,34 @@
                 children:
                   - Number:
                   - Identifier:
+                      children:
+                        - CodegenVarType:
+                            brief: "Represent type of the variable"
+                            members:
+                              - type:
+                                  brief: "Type of the ast node"
+                                  type: AstNodeType
+                        - CodegenVar:
+                            brief: "Represent variable used for code generation"
+                            members:
+                              - pointer:
+                                  brief: "If variable is pointer type"
+                                  type: int
+                              - name:
+                                  brief: "Name of the variable"
+                                  type: Identifier
+                                  node_name: true
+                        - CodegenArgument:
+                            brief: "Represent argument to a function"
+                            members:
+                              - type:
+                                  brief: "Type of the argument"
+                                  type: CodegenVarType
+                                  suffix: {value: " "}
+                              - name:
+                                  brief: "Name of the argument"
+                                  type: Identifier
+                                  node_name: true
                   - Block:
                       children:
                         - NrnStateBlock:
@@ -89,7 +117,29 @@
                                   type: StatementBlock
                               - finalize_block:
                                   brief: "Statement block to be executed after calling linear solver"
-                                  type: StatementBlock                                  
+                                  type: StatementBlock
+                        - CodegenFunction:
+                            brief: "Function generated from FUNCTION or PROCEDURE block"
+                            members:
+                              - return_type:
+                                  brief: "Return type of the function"
+                                  type: CodegenVarType
+                                  suffix: {value: " "}
+                              - name:
+                                  brief: "Name of the function"
+                                  type: Name
+                                  node_name: true
+                              - arguments:
+                                  brief: "Vector of the parameters to the function"
+                                  type: CodegenArgument
+                                  vector: true
+                                  prefix: {value: "(", force: true}
+                                  suffix: {value: ")", force: true}
+                                  separator: ", "
+                              - statement_block:
+                                  brief: "Body of the function"
+                                  type: StatementBlock
+                                  getter: {override: true}
                   - WrappedExpression:
                       brief: "Wrap any other expression type"
                       members:
@@ -110,6 +160,17 @@
                         - node_to_solve:
                             brief: "Block to be solved (callback node or solution node itself)"
                             type: Expression
+                  - CodegenStruct:
+                      brief: "Represent a struct or class for code generation"
+                      members:
+                        - variable_statements:
+                            brief: "member variables of the class/struct"
+                            type: CodegenVarListStatement
+                            vector: true
+                        - functions:
+                            brief: "member functions of the class/struct"
+                            type: CodegenFunction
+                            vector: true
             - Statement:
                 brief: "Statement base class"
                 children:
@@ -120,4 +181,48 @@
                             brief: "Value of new timestep"
                             type: Double
                             prefix: {value: " = "}
-                      brief: "Statement to indicate a change in timestep in a given block"
\ No newline at end of file
+                      brief: "Statement to indicate a change in timestep in a given block"
+                  - CodegenForStatement:
+                      brief: "Represent for loop used for code generation"
+                      nmodl: "for("
+                      members:
+                        - initialization:
+                            brief: "initialization expression for the loop"
+                            type: Expression
+                            optional: true
+                        - condition:
+                            brief: "condition expression for the loop"
+                            type: Expression
+                            optional: true
+                            prefix: {value: ";"}
+                            suffix: {value: "; "}
+                        - increment:
+                            brief: "increment or decrement expression for the loop"
+                            type: Expression
+                            optional: true
+                            suffix: {value: ") "}
+                        - statement_block:
+                            brief: "body of the loop"
+                            type: StatementBlock
+                            getter: {override: true}
+                  - CodegenReturnStatement:
+                      brief: "Represent return statement for code generation"
+                      nmodl: "return "
+                      members:
+                        - statement:
+                            brief: "return statement"
+                            type: Expression
+                            optional: true
+                  - CodegenVarListStatement:
+                      brief: "Represent list of variables used for code generation"
+                      members:
+                        - var_type:
+                            brief: "Type of the variables"
+                            type: CodegenVarType
+                            suffix: {value: " "}
+                        - variables:
+                            brief: "List of the variables to define"
+                            type: CodegenVar
+                            vector: true
+                            separator: ", "
+                            add: true
diff --git a/src/language/nmodl.yaml b/src/language/nmodl.yaml
index 9f94ada917..ef8fbbe49c 100644
--- a/src/language/nmodl.yaml
+++ b/src/language/nmodl.yaml
@@ -1368,7 +1368,7 @@
                             type: Double
 
             - Statement:
-                brief: "TODO"
+                brief: "Base class to represent a statement in the NMODL"
                 children:
                   - UnitState:
                       brief: "TODO"
diff --git a/src/language/node_info.py b/src/language/node_info.py
index 821d263287..6b9a74243f 100644
--- a/src/language/node_info.py
+++ b/src/language/node_info.py
@@ -28,6 +28,7 @@
                   "FirstLastType",
                   "BAType",
                   "UnitStateType",
+                  "AstNodeType",
                   }
 
 BASE_TYPES = {"std::string" } | INTEGRAL_TYPES
@@ -164,6 +165,7 @@
 STATEMENT_BLOCK_NODE = "StatementBlock"
 STRING_NODE = "String"
 UNIT_BLOCK = "UnitBlock"
+AST_NODETYPE_NODE= "AstNodeType"
 
 # name of variable in prime node which represent order of derivative
 ORDER_VAR_NAME = "order"
diff --git a/src/language/nodes.py b/src/language/nodes.py
index 88ad1bb000..ad4ee818c6 100644
--- a/src/language/nodes.py
+++ b/src/language/nodes.py
@@ -136,6 +136,10 @@ def is_boolean_node(self):
     def is_name_node(self):
         return self.class_name == node_info.NAME_NODE
 
+    @property
+    def is_ast_nodetype_node(self):
+        return self.class_name == node_info.AST_NODETYPE_NODE
+
     @property
     def is_enum_node(self):
         data_type = node_info.DATA_TYPES[self.class_name]
diff --git a/src/language/templates/ast/ast_decl.hpp b/src/language/templates/ast/ast_decl.hpp
index a6bdae69a1..dd03c3d282 100644
--- a/src/language/templates/ast/ast_decl.hpp
+++ b/src/language/templates/ast/ast_decl.hpp
@@ -12,6 +12,7 @@
 #pragma once
 
 #include <memory>
+#include <string>
 #include <vector>
 
 /// \file
@@ -50,6 +51,15 @@ enum class AstNodeType {
 
 /** @} */  // end of ast_type
 
+static inline std::string to_string(AstNodeType type) {
+    {% for node in nodes %}
+      if(type == AstNodeType::{{ node.class_name|snake_case|upper }}) {
+          return "{{ node.class_name|snake_case|upper }}";
+      }
+    {% endfor %}
+    throw std::runtime_error("Unhandled type in to_string(AstNodeType type)!");
+}
+
 /**
  * @defgroup ast_vec_type AST Vector Type Aliases
  * @ingroup ast
diff --git a/src/language/templates/visitors/nmodl_visitor.cpp b/src/language/templates/visitors/nmodl_visitor.cpp
index 9c60bf8f87..7956fefdd1 100644
--- a/src/language/templates/visitors/nmodl_visitor.cpp
+++ b/src/language/templates/visitors/nmodl_visitor.cpp
@@ -116,6 +116,9 @@ void NmodlPrintVisitor::visit_{{ node.class_name|snake_case}}(const {{ node.clas
     {% for child in node.children %}
         {% call guard(child.force_prefix, child.force_suffix) -%}
         {% if child.is_base_type_node %}
+            {% if child.is_ast_nodetype_node %}
+               printer->add_element(ast::to_string(node.get_{{child.varname}}()));
+            {% endif %}
         {% else %}
             {% if child.optional or child.is_statement_block_node %}
                 if(node.get_{{ child.varname }}()) {
diff --git a/src/main.cpp b/src/main.cpp
index 622171a278..28411afaf9 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -20,6 +20,7 @@
 #ifdef NMODL_LLVM_BACKEND
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
 #endif
+
 #include "config/config.h"
 #include "parser/nmodl_driver.hpp"
 #include "pybind/pyembed.hpp"
@@ -613,6 +614,7 @@ int main(int argc, const char* argv[]) {
                 logger->info("Running LLVM backend code generator");
                 CodegenLLVMVisitor visitor(modfile, output_dir, llvm_opt_passes);
                 visitor.visit_program(*ast);
+                ast_to_nmodl(*ast, filepath("llvm"));
             }
 #endif
         }
diff --git a/test/integration/mod/procedure.mod b/test/integration/mod/procedure.mod
index 4a45af7d1e..daa4ad33ad 100644
--- a/test/integration/mod/procedure.mod
+++ b/test/integration/mod/procedure.mod
@@ -21,7 +21,7 @@ PROCEDURE complex_sum(v) {
     }
 }
 
-PROCEDURE loop_proc(v) {
+PROCEDURE loop_proc(v, t) {
     LOCAL i
     i = 0
     WHILE(i < 10) {
diff --git a/test/unit/codegen/llvm.cpp b/test/unit/codegen/llvm.cpp
index 0ceadbe6f1..d644947e79 100644
--- a/test/unit/codegen/llvm.cpp
+++ b/test/unit/codegen/llvm.cpp
@@ -491,4 +491,4 @@ SCENARIO("Dead code removal", "[visitor][llvm][opt]") {
             REQUIRE(std::regex_search(module_string, m, empty_proc));
         }
     }
-}
\ No newline at end of file
+}

From f6d8b859969c755bae209aff52eae7a9d4a2433e Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Thu, 7 Jan 2021 00:50:36 +0100
Subject: [PATCH 013/105] nrn_state function generation in NMODL AST to help
 LLVM codegen (#484)

* Added new BinaryOp for += and -=
  * Added string_to_binaryop function
  * Added Void node type to represent void return type
  * Added CodegenAtomicStatement for ion write statements
  * llvm helper started handling visit_nrn_state_block
    - NrnStateBlock is being converted into CodegenFunction
    - for loop body with solution blocks created
    - voltage and node index initialization code added
    - read and write ion statements are handled
  * Some of the functions are now moved into CodegenInfo

Co-authored-by: Ioannis Magkanaris <iomagkanaris@gmail.com>
---
 src/ast/ast_common.hpp                        |  25 +-
 src/codegen/codegen_c_visitor.hpp             |  56 --
 src/codegen/codegen_info.cpp                  |  74 +++
 src/codegen/codegen_info.hpp                  |  94 ++++
 .../llvm/codegen_llvm_helper_visitor.cpp      | 500 ++++++++++++++++--
 .../llvm/codegen_llvm_helper_visitor.hpp      |  42 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp     |   2 +-
 src/language/code_generator.cmake             |   2 +
 src/language/codegen.yaml                     |  37 +-
 9 files changed, 722 insertions(+), 110 deletions(-)

diff --git a/src/ast/ast_common.hpp b/src/ast/ast_common.hpp
index c2d3ffc7fa..1766dea62b 100644
--- a/src/ast/ast_common.hpp
+++ b/src/ast/ast_common.hpp
@@ -43,9 +43,12 @@ namespace ast {
  *
  * NMODL support different binary operators and this
  * type is used to store their value in the AST.
+ *
+ * \note `+=` and `-=` are not supported by NMODL but they
+ * are added for code generation nodes.
  */
 typedef enum {
-    BOP_ADDITION,        ///< \+
+    BOP_ADDITION = 0,    ///< \+
     BOP_SUBTRACTION,     ///< --
     BOP_MULTIPLICATION,  ///< \c *
     BOP_DIVISION,        ///< \/
@@ -58,7 +61,9 @@ typedef enum {
     BOP_LESS_EQUAL,      ///< <=
     BOP_ASSIGN,          ///< =
     BOP_NOT_EQUAL,       ///< !=
-    BOP_EXACT_EQUAL      ///< ==
+    BOP_EXACT_EQUAL,     ///< ==
+    BOP_ADD_ASSIGN,      ///< \+=
+    BOP_SUB_ASSIGN       ///< \-=
 } BinaryOp;
 
 /**
@@ -68,7 +73,7 @@ typedef enum {
  * is used to lookup the corresponding symbol for the operator.
  */
 static const std::string BinaryOpNames[] =
-    {"+", "-", "*", "/", "^", "&&", "||", ">", "<", ">=", "<=", "=", "!=", "=="};
+    {"+", "-", "*", "/", "^", "&&", "||", ">", "<", ">=", "<=", "=", "!=", "==", "+=", "-="};
 
 /// enum type for unary operators
 typedef enum { UOP_NOT, UOP_NEGATION } UnaryOp;
@@ -100,6 +105,20 @@ typedef enum { LTMINUSGT, LTLT, MINUSGT } ReactionOp;
 /// string representation of ast::ReactionOp
 static const std::string ReactionOpNames[] = {"<->", "<<", "->"};
 
+/**
+ * Get corresponding ast::BinaryOp for given string
+ * @param op Binary operator in string format
+ * @return ast::BinaryOp for given string
+ */
+static inline BinaryOp string_to_binaryop(const std::string& op) {
+    /// check if binary operator supported otherwise error
+    auto it = std::find(std::begin(BinaryOpNames), std::end(BinaryOpNames), op);
+    if (it == std::end(BinaryOpNames)) {
+        throw std::runtime_error("Error in string_to_binaryop, can't find " + op);
+    }
+    int pos = std::distance(std::begin(BinaryOpNames), it);
+    return static_cast<BinaryOp>(pos);
+}
 /** @} */  // end of ast_prop
 
 }  // namespace ast
diff --git a/src/codegen/codegen_c_visitor.hpp b/src/codegen/codegen_c_visitor.hpp
index 3a4fc39c13..0a80e52d01 100644
--- a/src/codegen/codegen_c_visitor.hpp
+++ b/src/codegen/codegen_c_visitor.hpp
@@ -46,46 +46,6 @@ namespace codegen {
  * @{
  */
 
-/**
- * \enum BlockType
- * \brief Helper to represent various block types
- *
- * Note: do not assign integers to these enums
- *
- */
-enum BlockType {
-    /// initial block
-    Initial,
-
-    /// constructor block
-    Constructor,
-
-    /// destructor block
-    Destructor,
-
-    /// breakpoint block
-    Equation,
-
-    /// ode_* routines block (not used)
-    Ode,
-
-    /// derivative block
-    State,
-
-    /// watch block
-    Watch,
-
-    /// net_receive block
-    NetReceive,
-
-    /// before / after block
-    BeforeAfter,
-
-    /// fake ending block type for loops on the enums. Keep it at the end
-    BlockTypeEnd
-};
-
-
 /**
  * \enum MemberType
  * \brief Helper to represent various variables types
@@ -140,22 +100,6 @@ struct IndexVariableInfo {
         , is_integer(is_integer) {}
 };
 
-
-/**
- * \class ShadowUseStatement
- * \brief Represents ion write statement during code generation
- *
- * Ion update statement needs use of shadow vectors for certain backends
- * as atomics operations are not supported on cpu backend.
- *
- * \todo If shadow_lhs is empty then we assume shadow statement not required
- */
-struct ShadowUseStatement {
-    std::string lhs;
-    std::string op;
-    std::string rhs;
-};
-
 /** @} */  // end of codegen_details
 
 
diff --git a/src/codegen/codegen_info.cpp b/src/codegen/codegen_info.cpp
index fb96b26e59..f0a173c0a2 100644
--- a/src/codegen/codegen_info.cpp
+++ b/src/codegen/codegen_info.cpp
@@ -8,6 +8,7 @@
 #include "codegen/codegen_info.hpp"
 
 #include "ast/all.hpp"
+#include "utils/logger.hpp"
 #include "visitors/var_usage_visitor.hpp"
 #include "visitors/visitor_utils.hpp"
 
@@ -15,6 +16,8 @@
 namespace nmodl {
 namespace codegen {
 
+using namespace fmt::literals;
+using symtab::syminfo::NmodlType;
 using visitor::VarUsageVisitor;
 
 /// if any ion has write variable
@@ -106,5 +109,76 @@ bool CodegenInfo::is_voltage_used_by_watch_statements() const {
     });
 }
 
+bool CodegenInfo::state_variable(const std::string& name) const {
+    // clang-format off
+    auto result = std::find_if(state_vars.begin(),
+                               state_vars.end(),
+                               [&name](const SymbolType& sym) {
+                                   return name == sym->get_name();
+                               }
+    );
+    // clang-format on
+    return result != state_vars.end();
+}
+
+std::pair<std::string, std::string> CodegenInfo::read_ion_variable_name(
+    const std::string& name) const {
+    return {name, "ion_" + name};
+}
+
+
+std::pair<std::string, std::string> CodegenInfo::write_ion_variable_name(
+    const std::string& name) const {
+    return {"ion_" + name, name};
+}
+
+
+/**
+ * \details Current variable used in breakpoint block could be local variable.
+ * In this case, neuron has already renamed the variable name by prepending
+ * "_l". In our implementation, the variable could have been renamed by
+ * one of the pass. And hence, we search all local variables and check if
+ * the variable is renamed. Note that we have to look into the symbol table
+ * of statement block and not breakpoint.
+ */
+std::string CodegenInfo::breakpoint_current(std::string current) const {
+    auto& breakpoint = breakpoint_node;
+    if (breakpoint == nullptr) {
+        return current;
+    }
+    const auto& symtab = breakpoint->get_statement_block()->get_symbol_table();
+    const auto& variables = symtab->get_variables_with_properties(NmodlType::local_var);
+    for (const auto& var: variables) {
+        std::string renamed_name = var->get_name();
+        std::string original_name = var->get_original_name();
+        if (current == original_name) {
+            current = renamed_name;
+            break;
+        }
+    }
+    return current;
+}
+
+
+bool CodegenInfo::is_an_instance_variable(const std::string& varname) const {
+    /// check if symbol of given name exist
+    auto check_symbol = [](const std::string& name, const std::vector<SymbolType>& symbols) {
+        for (auto& symbol: symbols) {
+            if (symbol->get_name() == name) {
+                return true;
+            }
+        }
+        return false;
+    };
+
+    /// check if variable exist into all possible types
+    if (check_symbol(varname, assigned_vars) || check_symbol(varname, state_vars) ||
+        check_symbol(varname, range_parameter_vars) || check_symbol(varname, range_assigned_vars) ||
+        check_symbol(varname, range_state_vars)) {
+        return true;
+    }
+    return false;
+}
+
 }  // namespace codegen
 }  // namespace nmodl
diff --git a/src/codegen/codegen_info.hpp b/src/codegen/codegen_info.hpp
index 9ca2409dbe..3ece7ae902 100644
--- a/src/codegen/codegen_info.hpp
+++ b/src/codegen/codegen_info.hpp
@@ -16,6 +16,7 @@
 #include <unordered_set>
 
 #include "ast/ast.hpp"
+#include "codegen/codegen_naming.hpp"
 #include "symtab/symbol_table.hpp"
 
 namespace nmodl {
@@ -134,6 +135,59 @@ struct IndexSemantics {
         , size(size) {}
 };
 
+/**
+ * \enum BlockType
+ * \brief Helper to represent various block types
+ *
+ * Note: do not assign integers to these enums
+ *
+ */
+enum BlockType {
+    /// initial block
+    Initial,
+
+    /// destructor block
+    Destructor,
+
+    /// breakpoint block
+    Equation,
+
+    /// ode_* routines block (not used)
+    Ode,
+
+    /// derivative block
+    State,
+
+    /// watch block
+    Watch,
+
+    /// net_receive block
+    NetReceive,
+
+    /// fake ending block type for loops on the enums. Keep it at the end
+    BlockTypeEnd
+};
+
+/**
+ * \class ShadowUseStatement
+ * \brief Represents ion write statement during code generation
+ *
+ * Ion update statement needs use of shadow vectors for certain backends
+ * as atomics operations are not supported on cpu backend.
+ *
+ * \todo Currently `nrn_wrote_conc` is also added to shadow update statements
+ * list as it's corresponding to ion update statement in INITIAL block. This
+ * needs to be factored out.
+ * \todo This can be represented as AST node (like ast::CodegenAtomicStatement)
+ * but currently C backend use this same implementation. So we are using this
+ * same structure and then converting to ast::CodegenAtomicStatement for LLVM
+ * visitor.
+ */
+struct ShadowUseStatement {
+    std::string lhs;
+    std::string op;
+    std::string rhs;
+};
 
 /**
  * \class CodegenInfo
@@ -422,6 +476,46 @@ struct CodegenInfo {
     /// true if WatchStatement uses voltage v variable
     bool is_voltage_used_by_watch_statements() const;
 
+    /**
+     * Checks if the given variable name belongs to a state variable
+     * \param name The variable name
+     * \return     \c true if the variable is a state variable
+     */
+    bool state_variable(const std::string& name) const;
+
+    /**
+     * Return ion variable name and corresponding ion read variable name
+     * \param name The ion variable name
+     * \return     The ion read variable name
+     */
+    std::pair<std::string, std::string> read_ion_variable_name(const std::string& name) const;
+
+    /**
+     * Return ion variable name and corresponding ion write variable name
+     * \param name The ion variable name
+     * \return     The ion write variable name
+     */
+    std::pair<std::string, std::string> write_ion_variable_name(const std::string& name) const;
+
+    /**
+     * Determine the variable name for the "current" used in breakpoint block taking into account
+     * intermediate code transformations.
+     * \param current The variable name for the current used in the model
+     * \return        The name for the current to be printed in C
+     */
+    std::string breakpoint_current(std::string current) const;
+
+    /**
+     * Check if variable with given name is an instance variable
+     *
+     * Instance varaibles are local to each mechanism instance and
+     * needs to be accessed with an array index. Such variables are
+     * assigned, range, parameter+range etc.
+     * @param varname Name of the variable
+     * @return True if variable is per mechanism instance
+     */
+    bool is_an_instance_variable(const std::string& varname) const;
+
     /// if we need a call back to wrote_conc in neuron/coreneuron
     bool require_wrote_conc = false;
 };
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index c52cc92a3d..341ab03fb6 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -9,6 +9,7 @@
 #include "codegen_llvm_helper_visitor.hpp"
 
 #include "ast/all.hpp"
+#include "codegen/codegen_helper_visitor.hpp"
 #include "utils/logger.hpp"
 #include "visitors/visitor_utils.hpp"
 
@@ -17,91 +18,496 @@ namespace codegen {
 
 using namespace fmt::literals;
 
-void CodegenLLVMHelperVisitor::visit_statement_block(ast::StatementBlock& node) {
-    node.visit_children(*this);
-
-    /// if local list statement exist, we have to replace it
-    auto local_statement = visitor::get_local_list_statement(node);
-    if (local_statement) {
-        /// create codegen variables from local variables
-        ast::CodegenVarVector variables;
-        for (const auto& var: local_statement->get_variables()) {
-            variables.emplace_back(new ast::CodegenVar(0, var->get_name()->clone()));
-        }
-
-        /// remove local list statement now
-        const auto& statements = node.get_statements();
-        node.erase_statement(statements.begin());
+/**
+ * \brief Create variable definition statement
+ *
+ * `LOCAL` variables in NMODL don't have type. These variables need
+ * to be defined with float type. Same for index, loop iteration and
+ * local variables. This helper function function is used to create
+ * type specific local variable.
+ *
+ * @param names Name of the variables to be defined
+ * @param type Type of the variables
+ * @return Statement defining variables
+ */
+static std::shared_ptr<ast::CodegenVarListStatement> create_local_variable_statement(
+    const std::vector<std::string>& names,
+    ast::AstNodeType type) {
+    /// create variables for the given name
+    ast::CodegenVarVector variables;
+    for (const auto& name: names) {
+        auto varname = new ast::Name(new ast::String(name));
+        variables.emplace_back(new ast::CodegenVar(0, varname));
+    }
+    auto var_type = new ast::CodegenVarType(type);
+    /// construct statement and return it
+    return std::make_shared<ast::CodegenVarListStatement>(var_type, variables);
+}
 
-        /// create new codegen variable statement
-        auto type = new ast::CodegenVarType(ast::AstNodeType::DOUBLE);
-        auto statement = std::make_shared<ast::CodegenVarListStatement>(type, variables);
+/**
+ * \brief Create expression for a given NMODL code statement
+ * @param code NMODL code statement
+ * @return Expression representing given NMODL code
+ */
+static std::shared_ptr<ast::Expression> create_statement_as_expression(const std::string& code) {
+    const auto& statement = visitor::create_statement(code);
+    auto expr_statement = std::dynamic_pointer_cast<ast::ExpressionStatement>(statement);
+    auto expr = expr_statement->get_expression()->clone();
+    return std::make_shared<ast::WrappedExpression>(expr);
+}
 
-        /// insert codegen variable statement
-        node.insert_statement(statements.begin(), statement);
-    }
+/**
+ * \brief Create expression for given NMODL code expression
+ * @param code NMODL code expression
+ * @return Expression representing NMODL code
+ */
+std::shared_ptr<ast::Expression> create_expression(const std::string& code) {
+    /// as provided code is only expression and not a full statement, create
+    /// a temporary assignment statement
+    const auto& wrapped_expr = create_statement_as_expression("some_var = " + code);
+    /// now extract RHS (representing original code) and return it as expression
+    auto expr = std::dynamic_pointer_cast<ast::WrappedExpression>(wrapped_expr)->get_expression();
+    auto rhs = std::dynamic_pointer_cast<ast::BinaryExpression>(expr)->get_rhs();
+    return std::make_shared<ast::WrappedExpression>(rhs->clone());
 }
 
-void CodegenLLVMHelperVisitor::add_function_procedure_node(ast::Block& node) {
+/**
+ * \brief Add code generation function for FUNCTION or PROCEDURE block
+ * @param node AST node representing FUNCTION or PROCEDURE
+ *
+ * When we have a PROCEDURE or FUNCTION like
+ *
+ * \code{.mod}
+ *      FUNCTION sum(x,y) {
+ *          LOCAL res
+ *          res = x + y
+ *          sum = res
+ *      }
+ * \endcode
+ *
+ * this gets typically converted to C/C++ code as:
+ *
+ * \code{.cpp}
+ *      double sum(double x, double y) {
+ *          double res;
+ *          double ret_sum;
+ *          res = x + y;
+ *          ret_sum = res;
+ *          return ret_sum;
+ * \endcode
+ *
+ * We perform following transformations so that code generation backends
+ * will have minimum logic:
+ *  - Add return type
+ *  - Add type for the function arguments
+ *  - Define variables and return variable
+ *  - Add return type (int for PROCEDURE and double for FUNCTION)
+ */
+void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
+    /// name of the function from the node
     std::string function_name = node.get_node_name();
-
-    const auto& source_node_type = node.get_node_type();
     auto name = new ast::Name(new ast::String(function_name));
+
+    /// return variable name has "ret_" prefix
     auto return_var = new ast::Name(new ast::String("ret_" + function_name));
-    ast::CodegenVarType* var_type = nullptr;
-    ast::CodegenVarType* return_type = nullptr;
 
     /// return type based on node type
-    bool is_function = source_node_type == ast::AstNodeType::FUNCTION_BLOCK;
-    if (is_function) {
-        var_type = new ast::CodegenVarType(ast::AstNodeType::DOUBLE);
+    ast::CodegenVarType* ret_var_type = nullptr;
+    if (node.get_node_type() == ast::AstNodeType::FUNCTION_BLOCK) {
+        ret_var_type = new ast::CodegenVarType(FLOAT_TYPE);
     } else {
-        var_type = new ast::CodegenVarType(ast::AstNodeType::INTEGER);
+        ret_var_type = new ast::CodegenVarType(INTEGER_TYPE);
     }
 
-    /// return type is same as variable type
-    return_type = var_type->clone();
-
-    /// function body and it's statement
+    /// function body and it's statement, copy original block
     auto block = node.get_statement_block()->clone();
     const auto& statements = block->get_statements();
 
     /// insert return variable at the start of the block
     ast::CodegenVarVector codegen_vars;
     codegen_vars.emplace_back(new ast::CodegenVar(0, return_var->clone()));
-    auto statement = std::make_shared<ast::CodegenVarListStatement>(var_type, codegen_vars);
+    auto statement = std::make_shared<ast::CodegenVarListStatement>(ret_var_type, codegen_vars);
     block->insert_statement(statements.begin(), statement);
 
     /// add return statement
     auto return_statement = new ast::CodegenReturnStatement(return_var);
     block->emplace_back_statement(return_statement);
 
-    /// prepare arguments
-    ast::CodegenArgumentVector code_arguments;
-    const auto& arguments = node.get_parameters();
-    for (const auto& arg: arguments) {
-        auto type = new ast::CodegenVarType(ast::AstNodeType::DOUBLE);
-        auto var = arg->get_name()->clone();
-        code_arguments.emplace_back(new ast::CodegenArgument(type, var));
+    /// prepare function arguments based original node arguments
+    ast::CodegenArgumentVector arguments;
+    for (const auto& param: node.get_parameters()) {
+        /// create new type and name for creating new ast node
+        auto type = new ast::CodegenVarType(FLOAT_TYPE);
+        auto var = param->get_name()->clone();
+        arguments.emplace_back(new ast::CodegenArgument(type, var));
     }
 
-    /// add new node to AST
-    auto function =
-        std::make_shared<ast::CodegenFunction>(return_type, name, code_arguments, block);
+    /// return type of the function is same as return variable type
+    ast::CodegenVarType* fun_ret_type = ret_var_type->clone();
+
+    /// we have all information for code generation function, create a new node
+    /// which will be inserted later into AST
+    auto function = std::make_shared<ast::CodegenFunction>(fun_ret_type, name, arguments, block);
     codegen_functions.push_back(function);
 }
 
+static void append_statements_from_block(ast::StatementVector& statements,
+                                         const std::shared_ptr<ast::StatementBlock>& block) {
+    const auto& block_statements = block->get_statements();
+    statements.insert(statements.end(), block_statements.begin(), block_statements.end());
+}
+
+static std::shared_ptr<ast::CodegenAtomicStatement> create_atomic_statement(std::string& lhs_str,
+                                                                            std::string& op_str,
+                                                                            std::string& rhs_str) {
+    auto lhs = std::make_shared<ast::Name>(new ast::String(lhs_str));
+    auto op = ast::BinaryOperator(ast::string_to_binaryop(op_str));
+    auto rhs = create_expression(rhs_str);
+    return std::make_shared<ast::CodegenAtomicStatement>(lhs, op, rhs);
+}
+
+/**
+ * For a given block type, add read ion statements
+ *
+ * Depending upon the block type, we have to update read ion variables
+ * during code generation. Depending on block/procedure being printed,
+ * this method adds necessary read ion variable statements and also
+ * corresponding index calculation statements. Note that index statements
+ * are added separately at the beginning for just readability purpose.
+ *
+ * @param type The type of code block being generated
+ * @param int_variables Index variables to be created
+ * @param double_variables Floating point variables to be created
+ * @param index_statements Statements for loading indexes (typically for ions)
+ * @param body_statements main compute/update statements
+ *
+ * \todo After looking into mod2c and neuron implementation, it seems like
+ * Ode block type is not used. Need to look into implementation details.
+ *
+ * \todo Ion copy optimization is not implemented yet. This is currently
+ * implemented in C backend using `ion_read_statements_optimized()`.
+ */
+void CodegenLLVMHelperVisitor::ion_read_statements(BlockType type,
+                                                   std::vector<std::string>& int_variables,
+                                                   std::vector<std::string>& double_variables,
+                                                   ast::StatementVector& index_statements,
+                                                   ast::StatementVector& body_statements) {
+    /// create read ion and corresponding index statements
+    auto create_read_statements = [&](std::pair<std::string, std::string> variable_names) {
+        // variable in current mechanism instance
+        std::string& varname = variable_names.first;
+        // ion variable to be read
+        std::string& ion_varname = variable_names.second;
+        // index for reading ion variable
+        std::string index_varname = "{}_id"_format(varname);
+        // first load the index
+        std::string index_statement = "{} = {}_index[id]"_format(index_varname, ion_varname);
+        // now assign the value
+        std::string read_statement = "{} = {}[{}]"_format(varname, ion_varname, index_varname);
+        // push index definition, index statement and actual read statement
+        int_variables.push_back(index_varname);
+        index_statements.push_back(visitor::create_statement(index_statement));
+        body_statements.push_back(visitor::create_statement(read_statement));
+    };
+
+    /// iterate over all ions and create statements for given block type
+    for (const auto& ion: info.ions) {
+        const std::string& name = ion.name;
+        for (const auto& var: ion.reads) {
+            if (type == BlockType::Ode && ion.is_ionic_conc(var) && info.state_variable(var)) {
+                continue;
+            }
+            auto variable_names = info.read_ion_variable_name(var);
+            create_read_statements(variable_names);
+        }
+        for (const auto& var: ion.writes) {
+            if (type == BlockType::Ode && ion.is_ionic_conc(var) && info.state_variable(var)) {
+                continue;
+            }
+            if (ion.is_ionic_conc(var)) {
+                auto variable_names = info.read_ion_variable_name(var);
+                create_read_statements(variable_names);
+            }
+        }
+    }
+}
+
+/**
+ * For a given block type, add write ion statements
+ *
+ * Depending upon the block type, we have to update write ion variables
+ * during code generation. Depending on block/procedure being printed,
+ * this method adds necessary write ion variable statements and also
+ * corresponding index calculation statements. Note that index statements
+ * are added separately at the beginning for just readability purpose.
+ *
+ * @param type The type of code block being generated
+ * @param int_variables Index variables to be created
+ * @param double_variables Floating point variables to be created
+ * @param index_statements Statements for loading indexes (typically for ions)
+ * @param body_statements main compute/update statements
+ *
+ * \todo If intra or extra cellular ionic concentration is written
+ * then it requires call to `nrn_wrote_conc`. In C backend this is
+ * implemented in `ion_write_statements()` itself but this is not
+ * handled yet.
+ */
+void CodegenLLVMHelperVisitor::ion_write_statements(BlockType type,
+                                                    std::vector<std::string>& int_variables,
+                                                    std::vector<std::string>& double_variables,
+                                                    ast::StatementVector& index_statements,
+                                                    ast::StatementVector& body_statements) {
+    /// create write ion and corresponding index statements
+    auto create_write_statements = [&](std::string ion_varname, std::string op, std::string rhs) {
+        // index for writing ion variable
+        std::string index_varname = "{}_id"_format(ion_varname);
+        // load index
+        std::string index_statement = "{} = {}_index[id]"_format(index_varname, ion_varname);
+        // ion variable to write (with index)
+        std::string ion_to_write = "{}[{}]"_format(ion_varname, index_varname);
+        // push index definition, index statement and actual write statement
+        int_variables.push_back(index_varname);
+        index_statements.push_back(visitor::create_statement(index_statement));
+        body_statements.push_back(create_atomic_statement(ion_to_write, op, rhs));
+    };
+
+    /// iterate over all ions and create write ion statements for given block type
+    for (const auto& ion: info.ions) {
+        std::string concentration;
+        std::string name = ion.name;
+        for (const auto& var: ion.writes) {
+            auto variable_names = info.write_ion_variable_name(var);
+            /// ionic currents are accumulated
+            if (ion.is_ionic_current(var)) {
+                if (type == BlockType::Equation) {
+                    std::string current = info.breakpoint_current(var);
+                    std::string lhs = variable_names.first;
+                    std::string op = "+=";
+                    std::string rhs = current;
+                    // for synapse type
+                    if (info.point_process) {
+                        auto area = codegen::naming::NODE_AREA_VARIABLE;
+                        rhs += "*(1.e2/{})"_format(area);
+                    }
+                    create_write_statements(lhs, op, rhs);
+                }
+            } else {
+                if (!ion.is_rev_potential(var)) {
+                    concentration = var;
+                }
+                std::string lhs = variable_names.first;
+                std::string op = "=";
+                std::string rhs = variable_names.second;
+                create_write_statements(lhs, op, rhs);
+            }
+        }
+
+        /// still need to handle, need to define easy to use API
+        if (type == BlockType::Initial && !concentration.empty()) {
+            int index = 0;
+            if (ion.is_intra_cell_conc(concentration)) {
+                index = 1;
+            } else if (ion.is_extra_cell_conc(concentration)) {
+                index = 2;
+            } else {
+                /// \todo Unhandled case also in neuron implementation
+                throw std::logic_error("codegen error for {} ion"_format(ion.name));
+            }
+            std::string ion_type_name = "{}_type"_format(ion.name);
+            std::string lhs = "int {}"_format(ion_type_name);
+            std::string op = "=";
+            std::string rhs = ion_type_name;
+            create_write_statements(lhs, op, rhs);
+            logger->error("conc_write_statement() call is required but it's not supported");
+        }
+    }
+}
+
+/**
+ * Convert variables in given node to instance variables
+ *
+ * For code generation, variables of type range, assigned, state or parameter+range
+ * needs to be converted to instance variable i.e. they need to be accessed with
+ * loop index variable. For example, `h` variables needs to be converted to `h[id]`.
+ *
+ * @param node Ast node under which variables to be converted to instance type
+ */
+void CodegenLLVMHelperVisitor::convert_to_instance_variable(ast::Node& node,
+                                                            std::string& index_var) {
+    /// collect all variables in the node of type ast::VarName
+    auto variables = collect_nodes(node, {ast::AstNodeType::VAR_NAME});
+    for (auto& v: variables) {
+        auto variable = std::dynamic_pointer_cast<ast::VarName>(v);
+        /// if variable is of type instance then convert it to index
+        if (info.is_an_instance_variable(variable->get_node_name())) {
+            auto name = variable->get_name()->clone();
+            auto index = new ast::Name(new ast::String(index_var));
+            auto indexed_name = std::make_shared<ast::IndexedName>(name, index);
+            variable->set_name(indexed_name);
+        }
+    }
+}
+
+/**
+ * \brief Visit StatementBlock and convert Local statement for code generation
+ * @param node AST node representing Statement block
+ *
+ * Statement blocks can have LOCAL statement and if it exist it's typically
+ * first statement in the vector. We have to remove LOCAL statement and convert
+ * it to CodegenVarListStatement that will represent all variables as double.
+ */
+void CodegenLLVMHelperVisitor::visit_statement_block(ast::StatementBlock& node) {
+    /// first process all children blocks if any
+    node.visit_children(*this);
+
+    /// check if block contains LOCAL statement
+    const auto& local_statement = visitor::get_local_list_statement(node);
+    if (local_statement) {
+        /// create codegen variables from local variables
+        /// clone variable to make new independent statement
+        ast::CodegenVarVector variables;
+        for (const auto& var: local_statement->get_variables()) {
+            variables.emplace_back(new ast::CodegenVar(0, var->get_name()->clone()));
+        }
+
+        /// remove local list statement now
+        const auto& statements = node.get_statements();
+        node.erase_statement(statements.begin());
+
+        /// create new codegen variable statement and insert at the beginning of the block
+        auto type = new ast::CodegenVarType(FLOAT_TYPE);
+        auto statement = std::make_shared<ast::CodegenVarListStatement>(type, variables);
+        node.insert_statement(statements.begin(), statement);
+    }
+}
+
 void CodegenLLVMHelperVisitor::visit_procedure_block(ast::ProcedureBlock& node) {
     node.visit_children(*this);
-    add_function_procedure_node(node);
+    create_function_for_node(node);
 }
 
 void CodegenLLVMHelperVisitor::visit_function_block(ast::FunctionBlock& node) {
     node.visit_children(*this);
-    add_function_procedure_node(node);
+    create_function_for_node(node);
+}
+
+/**
+ * \brief Convert ast::NrnStateBlock to corresponding code generation function nrn_state
+ * @param node AST node representing ast::NrnStateBlock
+ *
+ * Solver passes converts DERIVATIVE block from MOD into ast::NrnStateBlock node
+ * that represent `nrn_state` function in the generated CPP code. To help this
+ * code generation, we perform various transformation on ast::NrnStateBlock and
+ * create new code generation function.
+ */
+void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
+    /// statements for new function to be generated
+    ast::StatementVector function_statements;
+
+    /// create variable definition for loop index and insert at the beginning
+    std::string loop_index_var = "id";
+    std::vector<std::string> int_variables{"id"};
+    function_statements.push_back(create_local_variable_statement(int_variables, INTEGER_TYPE));
+
+    /// create now main compute part : for loop over channel instances
+
+    /// loop constructs : initialization, condition and increment
+    const auto& initialization = create_statement_as_expression("id = 0");
+    const auto& condition = create_expression("id < node_count");
+    const auto& increment = create_statement_as_expression("id = id + 1");
+
+    /// loop body : initialization + solve blocks
+    ast::StatementVector loop_def_statements;
+    ast::StatementVector loop_index_statements;
+    ast::StatementVector loop_body_statements;
+    {
+        std::vector<std::string> int_variables{"node_id"};
+        std::vector<std::string> double_variables{"v"};
+
+        /// access node index and corresponding voltage
+        loop_index_statements.push_back(visitor::create_statement("node_id = node_index[id]"));
+        loop_body_statements.push_back(visitor::create_statement("v = voltage[node_id]"));
+
+        /// read ion variables
+        ion_read_statements(BlockType::State,
+                            int_variables,
+                            double_variables,
+                            loop_index_statements,
+                            loop_body_statements);
+
+        /// main compute node : extract solution expressions from the derivative block
+        const auto& solutions = collect_nodes(node, {ast::AstNodeType::SOLUTION_EXPRESSION});
+        for (const auto& statement: solutions) {
+            const auto& solution = std::dynamic_pointer_cast<ast::SolutionExpression>(statement);
+            const auto& block = std::dynamic_pointer_cast<ast::StatementBlock>(
+                solution->get_node_to_solve());
+            append_statements_from_block(loop_body_statements, block);
+        }
+
+        /// add breakpoint block if no current
+        if (info.currents.empty() && info.breakpoint_node != nullptr) {
+            auto block = info.breakpoint_node->get_statement_block();
+            append_statements_from_block(loop_body_statements, block);
+        }
+
+        /// write ion statements
+        ion_write_statements(BlockType::State,
+                             int_variables,
+                             double_variables,
+                             loop_index_statements,
+                             loop_body_statements);
+
+        loop_def_statements.push_back(create_local_variable_statement(int_variables, INTEGER_TYPE));
+        loop_def_statements.push_back(
+            create_local_variable_statement(double_variables, FLOAT_TYPE));
+
+        // \todo handle process_shadow_update_statement and wrote_conc_call yet
+    }
+
+    ast::StatementVector loop_body;
+    loop_body.insert(loop_body.end(), loop_def_statements.begin(), loop_def_statements.end());
+    loop_body.insert(loop_body.end(), loop_index_statements.begin(), loop_index_statements.end());
+    loop_body.insert(loop_body.end(), loop_body_statements.begin(), loop_body_statements.end());
+
+    /// now construct a new code block which will become the body of the loop
+    auto loop_block = std::make_shared<ast::StatementBlock>(loop_body);
+
+    /// convert all variables inside loop body to instance variables
+    convert_to_instance_variable(*loop_block, loop_index_var);
+
+    /// create for loop node
+    auto for_loop_statement = std::make_shared<ast::CodegenForStatement>(initialization,
+                                                                         condition,
+                                                                         increment,
+                                                                         loop_block);
+
+    /// loop itself becomes one of the statement in the function
+    function_statements.push_back(for_loop_statement);
+
+    /// new block for the function
+    auto function_block = new ast::StatementBlock(function_statements);
+
+    /// name of the function and it's return type
+    std::string function_name = "nrn_state_" + stringutils::tolower(info.mod_suffix);
+    auto name = new ast::Name(new ast::String(function_name));
+    auto return_type = new ast::CodegenVarType(ast::AstNodeType::VOID);
+
+    /// \todo : currently there are no arguments
+    ast::CodegenArgumentVector code_arguments;
+
+    /// finally, create new function
+    auto function =
+        std::make_shared<ast::CodegenFunction>(return_type, name, code_arguments, function_block);
+    codegen_functions.push_back(function);
+
+    std::cout << nmodl::to_nmodl(function);
 }
 
 void CodegenLLVMHelperVisitor::visit_program(ast::Program& node) {
+    /// run codegen helper visitor to collect information
+    CodegenHelperVisitor v;
+    info = v.analyze(node);
+
     logger->info("Running CodegenLLVMHelperVisitor");
     node.visit_children(*this);
     for (auto& fun: codegen_functions) {
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index b7ff57aec1..6b1684e7d1 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -28,19 +28,57 @@ namespace codegen {
 
 /**
  * \class CodegenLLVMHelperVisitor
- * \brief Helper visitor to gather AST information to help LLVM code generation
+ * \brief Helper visitor for AST information to help code generation backends
+ *
+ * Code generation backends convert NMODL AST to C++ code. But during this
+ * C++ code generation, various transformations happens and final code generated
+ * is quite different / large than actual kernel represented in MOD file ro
+ * NMODL AST.
+ *
+ * Currently, these transformations are embedded into code generation backends
+ * like ast::CodegenCVisitor. If we have to generate code for new simulator, there
+ * will be duplication of these transformations. Also, for completely new
+ * backends like NEURON simulator or SIMD library, we will have code duplication.
+ *
+ * In order to avoid this, we perform maximum transformations in this visitor.
+ * Currently we focus on transformations that will help LLVM backend but later
+ * these will be common across all backends.
  */
 class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
+    /// newly generated code generation specific functions
     std::vector<std::shared_ptr<ast::CodegenFunction>> codegen_functions;
 
-    void add_function_procedure_node(ast::Block& node);
+    /// ast information for code generation
+    codegen::CodegenInfo info;
+
+    /// default integer and float node type
+    const ast::AstNodeType INTEGER_TYPE = ast::AstNodeType::INTEGER;
+    const ast::AstNodeType FLOAT_TYPE = ast::AstNodeType::DOUBLE;
+
+    /// create new function for FUNCTION or PROCEDURE block
+    void create_function_for_node(ast::Block& node);
 
   public:
     CodegenLLVMHelperVisitor() = default;
 
+    void ion_read_statements(BlockType type,
+                             std::vector<std::string>& int_variables,
+                             std::vector<std::string>& double_variables,
+                             ast::StatementVector& index_statements,
+                             ast::StatementVector& body_statements);
+
+    void ion_write_statements(BlockType type,
+                              std::vector<std::string>& int_variables,
+                              std::vector<std::string>& double_variables,
+                              ast::StatementVector& index_statements,
+                              ast::StatementVector& body_statements);
+
+    void convert_to_instance_variable(ast::Node& node, std::string& index_var);
+
     void visit_statement_block(ast::StatementBlock& node) override;
     void visit_procedure_block(ast::ProcedureBlock& node) override;
     void visit_function_block(ast::FunctionBlock& node) override;
+    void visit_nrn_state_block(ast::NrnStateBlock& node) override;
     void visit_program(ast::Program& node) override;
 };
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 0fa0864d9a..6228b39d04 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -350,7 +350,7 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     // Keep this for easier development (maybe move to debug mode later).
     std::cout << print_module();
 
-    // not used yet
+    // not used yet : this will be used at the beginning of this function
     {
         CodegenLLVMHelperVisitor v;
         v.visit_program(const_cast<ast::Program&>(node));
diff --git a/src/language/code_generator.cmake b/src/language/code_generator.cmake
index 356e05283f..acc7dec8b6 100644
--- a/src/language/code_generator.cmake
+++ b/src/language/code_generator.cmake
@@ -66,6 +66,7 @@ set(AST_GENERATED_SOURCES
     ${PROJECT_BINARY_DIR}/src/ast/boolean.hpp
     ${PROJECT_BINARY_DIR}/src/ast/breakpoint_block.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_argument.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/codegen_atomic_statement.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_for_statement.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_function.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_return_statement.hpp
@@ -192,6 +193,7 @@ set(AST_GENERATED_SOURCES
     ${PROJECT_BINARY_DIR}/src/ast/valence.hpp
     ${PROJECT_BINARY_DIR}/src/ast/var_name.hpp
     ${PROJECT_BINARY_DIR}/src/ast/verbatim.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/void.hpp
     ${PROJECT_BINARY_DIR}/src/ast/watch.hpp
     ${PROJECT_BINARY_DIR}/src/ast/watch_statement.hpp
     ${PROJECT_BINARY_DIR}/src/ast/while_statement.hpp
diff --git a/src/language/codegen.yaml b/src/language/codegen.yaml
index d8c0a9d1ee..81421ecff9 100644
--- a/src/language/codegen.yaml
+++ b/src/language/codegen.yaml
@@ -28,6 +28,9 @@
             - Expression:
                 children:
                   - Number:
+                  - Void:
+                      nmodl: "VOID"
+                      brief: "Represent void type in code generation"
                   - Identifier:
                       children:
                         - CodegenVarType:
@@ -194,7 +197,7 @@
                             brief: "condition expression for the loop"
                             type: Expression
                             optional: true
-                            prefix: {value: ";"}
+                            prefix: {value: "; "}
                             suffix: {value: "; "}
                         - increment:
                             brief: "increment or decrement expression for the loop"
@@ -226,3 +229,35 @@
                             vector: true
                             separator: ", "
                             add: true
+                  - CodegenAtomicStatement:
+                      brief: "Represent atomic operation"
+                      description: |
+                        During code generation certain operations like ion updates, vec_rhs or
+                        vec_d updates (for synapse) needs to be atomic operations if executed by
+                        multiple threads. In case of SIMD, there are conflicts for `vec_d` and
+                        `vec_rhs` for synapse types. Here are some statements from C++ backend:
+
+                        \code{.cpp}
+                            vec_d[node_id] += g
+                            vec_rhs[node_id] -= rhs
+                            ion_ina[indexes[some_index]] += ina[id]
+                            ion_cai[indexes[some_index]] = cai[id]  // cai here is state variable
+                        \endcode
+
+                        These operations will be represented by atomic statement node type:
+                        * `vec_d[node_id]` : lhs
+                        * `+=` : atomic_op
+                        * `g` : rhs
+
+                      members:
+                        - lhs:
+                            brief: "Variable to be updated atomically"
+                            type: Identifier
+                        - atomic_op:
+                            brief: "Operator"
+                            type: BinaryOperator
+                            prefix: {value: " "}
+                            suffix: {value: " "}
+                        - rhs:
+                            brief: "Expression for atomic operation"
+                            type: Expression

From 5e0fee033f9dda926df007075b6d2b6f5714e2a6 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Fri, 8 Jan 2021 10:58:58 +0300
Subject: [PATCH 014/105] Running functions from MOD files via LLVM JIT (#482)

This commit introduces a functionality to execute functions from MOD file via LLVM jit.

For that, there is now:
- `JITDriver` class that, given a LLVM IR module, set ups the JIT compiler and is able to look up a function and execute it.
- `Runner` class that wraps around JIT driver. It helps to initialise JIT with LLVM IR module only once, and then run multiple functions from it.

To execute functions, `nmodl_llvm_runner` executable is used. It takes a single mod file and a specified entry-point function, and runs it via LLVM code generation pipeline and JIT driver. Only functions with double result types are supported at the moment.

For example, for MOD file `foo.mod`:
```
FUNCTION one() {
    one = 1
}

FUNCTION bar() {
    bar = one() + exp(1)
}
```
running `nmodl_llvm_runner -f foo.mod -e bar` gives
```
Result: 3.718282
```

Tests for execution of generated  IR have been added as well.

fixes #482

Co-authored-by: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
---
 cmake/LLVMHelper.cmake                       |   2 +-
 src/codegen/llvm/CMakeLists.txt              |  38 ++++-
 src/codegen/llvm/codegen_llvm_visitor.hpp    |  10 +-
 src/codegen/llvm/jit_driver.cpp              |  81 ++++++++++
 src/codegen/llvm/jit_driver.hpp              |  82 ++++++++++
 src/codegen/llvm/main.cpp                    |  74 +++++++++
 test/unit/CMakeLists.txt                     |  13 ++
 test/unit/codegen/codegen_llvm_execution.cpp | 162 +++++++++++++++++++
 8 files changed, 457 insertions(+), 5 deletions(-)
 create mode 100644 src/codegen/llvm/jit_driver.cpp
 create mode 100644 src/codegen/llvm/jit_driver.hpp
 create mode 100644 src/codegen/llvm/main.cpp
 create mode 100644 test/unit/codegen/codegen_llvm_execution.cpp

diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake
index 5d451697b9..a731fa0151 100644
--- a/cmake/LLVMHelper.cmake
+++ b/cmake/LLVMHelper.cmake
@@ -5,7 +5,7 @@
 find_package(LLVM REQUIRED CONFIG)
 
 # include LLVM header and core library
-llvm_map_components_to_libnames(LLVM_LIBS_TO_LINK core native)
+llvm_map_components_to_libnames(LLVM_LIBS_TO_LINK core orcjit native)
 set(CMAKE_REQUIRED_INCLUDES ${LLVM_INCLUDE_DIRS})
 set(CMAKE_REQUIRED_LIBRARIES ${LLVM_LIBS_TO_LINK})
 
diff --git a/src/codegen/llvm/CMakeLists.txt b/src/codegen/llvm/CMakeLists.txt
index db16d4072c..bd54f4143d 100644
--- a/src/codegen/llvm/CMakeLists.txt
+++ b/src/codegen/llvm/CMakeLists.txt
@@ -5,12 +5,44 @@ set(LLVM_CODEGEN_SOURCE_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_visitor.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_visitor.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_helper_visitor.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_helper_visitor.hpp)
+    ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_helper_visitor.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.hpp)
 
 # =============================================================================
-# LLVM codegen library
+# LLVM codegen library and executable
 # =============================================================================
 
 include_directories(${LLVM_INCLUDE_DIRS})
-add_library(llvm_codegen STATIC ${LLVM_CODEGEN_SOURCE_FILES})
+add_library(runner_obj OBJECT ${LLVM_CODEGEN_SOURCE_FILES})
+add_dependencies(runner_obj lexer_obj)
+set_property(TARGET runner_obj PROPERTY POSITION_INDEPENDENT_CODE ON)
+
+add_library(llvm_codegen STATIC $<TARGET_OBJECTS:runner_obj>)
+
 add_dependencies(llvm_codegen lexer util visitor)
+
+if(NOT NMODL_AS_SUBPROJECT)
+  add_executable(nmodl_llvm_runner main.cpp)
+
+  target_link_libraries(
+    nmodl_llvm_runner
+    llvm_codegen
+    codegen
+    visitor
+    symtab
+    lexer
+    util
+    test_util
+    printer
+    ${NMODL_WRAPPER_LIBS}
+    ${LLVM_LIBS_TO_LINK})
+endif()
+
+# =============================================================================
+# Install executable
+# =============================================================================
+
+if(NOT NMODL_AS_SUBPROJECT)
+  install(TARGETS nmodl_llvm_runner DESTINATION ${NMODL_INSTALL_DIR_SUFFIX}bin)
+endif()
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index be4eb04867..599cfc7b58 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -154,6 +154,14 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      */
     void emit_procedure_or_function_declaration(const ast::Block& node);
 
+    /**
+     * Return module pointer
+     * \return LLVM IR module pointer
+     */
+    std::unique_ptr<llvm::Module> get_module() {
+        return std::move(module);
+    }
+
     /**
      * Visit nmodl function or procedure
      * \param node the AST node representing the function or procedure in NMODL
@@ -173,7 +181,7 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void visit_unary_expression(const ast::UnaryExpression& node) override;
     void visit_var_name(const ast::VarName& node) override;
 
-    // TODO: use custom printer here
+    // \todo: move this to debug mode (e.g. -v option or --dump-ir)
     std::string print_module() const {
         std::string str;
         llvm::raw_string_ostream os(str);
diff --git a/src/codegen/llvm/jit_driver.cpp b/src/codegen/llvm/jit_driver.cpp
new file mode 100644
index 0000000000..a7673bb2ff
--- /dev/null
+++ b/src/codegen/llvm/jit_driver.cpp
@@ -0,0 +1,81 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include "jit_driver.hpp"
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
+
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/ObjectCache.h"
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+#include "llvm/ExecutionEngine/Orc/LLJIT.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+
+namespace nmodl {
+namespace runner {
+
+void JITDriver::init() {
+    llvm::InitializeNativeTarget();
+    llvm::InitializeNativeTargetAsmPrinter();
+
+    set_target_triple(module.get());
+    auto data_layout = module->getDataLayout();
+
+    // Create IR compile function callback.
+    auto compile_function_creator = [&](llvm::orc::JITTargetMachineBuilder tm_builder)
+        -> llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
+        auto tm = tm_builder.createTargetMachine();
+        if (!tm)
+            return tm.takeError();
+        return std::make_unique<llvm::orc::TMOwningSimpleCompiler>(std::move(*tm));
+    };
+
+    auto jit_instance = cantFail(
+        llvm::orc::LLJITBuilder().setCompileFunctionCreator(compile_function_creator).create());
+
+    // Add a ThreadSafeModule to the driver.
+    llvm::orc::ThreadSafeModule tsm(std::move(module), std::make_unique<llvm::LLVMContext>());
+    cantFail(jit_instance->addIRModule(std::move(tsm)));
+    jit = std::move(jit_instance);
+
+    // Resolve symbols.
+    llvm::orc::JITDylib& sym_tab = jit->getMainJITDylib();
+    sym_tab.addGenerator(cantFail(llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
+        data_layout.getGlobalPrefix())));
+}
+
+void JITDriver::set_target_triple(llvm::Module* module) {
+    auto target_triple = llvm::sys::getDefaultTargetTriple();
+    std::string error;
+    auto target = llvm::TargetRegistry::lookupTarget(target_triple, error);
+    if (!target)
+        throw std::runtime_error("Error: " + error + "\n");
+
+    std::string cpu(llvm::sys::getHostCPUName());
+    llvm::SubtargetFeatures features;
+    llvm::StringMap<bool> host_features;
+
+    if (llvm::sys::getHostCPUFeatures(host_features)) {
+        for (auto& f: host_features)
+            features.AddFeature(f.first(), f.second);
+    }
+
+    std::unique_ptr<llvm::TargetMachine> machine(
+        target->createTargetMachine(target_triple, cpu, features.getString(), {}, {}));
+    if (!machine)
+        throw std::runtime_error("Error: failed to create a target machine\n");
+
+    module->setDataLayout(machine->createDataLayout());
+    module->setTargetTriple(target_triple);
+}
+
+}  // namespace runner
+}  // namespace nmodl
diff --git a/src/codegen/llvm/jit_driver.hpp b/src/codegen/llvm/jit_driver.hpp
new file mode 100644
index 0000000000..d1e9a9412f
--- /dev/null
+++ b/src/codegen/llvm/jit_driver.hpp
@@ -0,0 +1,82 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+/**
+ * \dir
+ * \brief Implementation of LLVM's JIT-based execution engine to run functions from MOD files
+ *
+ * \file
+ * \brief \copybrief nmodl::runner::JITDriver
+ */
+
+#include "llvm/ExecutionEngine/Orc/LLJIT.h"
+
+namespace nmodl {
+namespace runner {
+
+/**
+ * \class JITDriver
+ * \brief Driver to execute MOD file function via LLVM IR backend
+ */
+class JITDriver {
+  private:
+    std::unique_ptr<llvm::LLVMContext> context = std::make_unique<llvm::LLVMContext>();
+
+    std::unique_ptr<llvm::orc::LLJIT> jit;
+
+    std::unique_ptr<llvm::Module> module;
+
+  public:
+    JITDriver(std::unique_ptr<llvm::Module> m)
+        : module(std::move(m)) {}
+
+    /// Initialize the JIT.
+    void init();
+
+    /// Lookup the entry-point in the JIT and execute it, returning the result.
+    template <typename T>
+    T execute(const std::string& entry_point) {
+        auto expected_symbol = jit->lookup(entry_point);
+        if (!expected_symbol)
+            throw std::runtime_error("Error: entry-point symbol not found in JIT\n");
+
+        auto (*res)() = (T(*)())(intptr_t) expected_symbol->getAddress();
+        T result = res();
+        return result;
+    }
+
+    /// Set the target triple on the module.
+    static void set_target_triple(llvm::Module* module);
+};
+
+/**
+ * \class Runner
+ * \brief A wrapper around JITDriver to execute an entry point in the LLVM IR module.
+ */
+class Runner {
+  private:
+    std::unique_ptr<llvm::Module> module;
+
+    std::unique_ptr<JITDriver> driver = std::make_unique<JITDriver>(std::move(module));
+
+  public:
+    Runner(std::unique_ptr<llvm::Module> m)
+        : module(std::move(m)) {
+        driver->init();
+    }
+
+    /// Run the entry-point function.
+    template <typename T>
+    double run(const std::string& entry_point) {
+        return driver->execute<T>(entry_point);
+    }
+};
+
+}  // namespace runner
+}  // namespace nmodl
diff --git a/src/codegen/llvm/main.cpp b/src/codegen/llvm/main.cpp
new file mode 100644
index 0000000000..11ea178cb4
--- /dev/null
+++ b/src/codegen/llvm/main.cpp
@@ -0,0 +1,74 @@
+/*************************************************************************
+ * Copyright (C) 2018-2021 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include <CLI/CLI.hpp>
+
+#include "ast/program.hpp"
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "jit_driver.hpp"
+#include "parser/nmodl_driver.hpp"
+#include "utils/logger.hpp"
+#include "visitors/symtab_visitor.hpp"
+
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+
+using namespace nmodl;
+using namespace runner;
+
+int main(int argc, const char* argv[]) {
+    CLI::App app{
+        "NMODL LLVM Runner : Executes functions from a MOD file via LLVM IR code generation"};
+
+    // Currently, only a single MOD file is supported, as well as an entry point with a double
+    // return type. While returning a double value is a general case in NMODL, it will be nice to
+    // have a more generic functionality. \todo: Add support for different return types (int, void).
+
+    std::string filename;
+    std::string entry_point_name = "main";
+
+    app.add_option("-f,--file,file", filename, "A single MOD file source")
+        ->required()
+        ->check(CLI::ExistingFile);
+    app.add_option("-e,--entry-point,entry-point",
+                   entry_point_name,
+                   "An entry point function from the MOD file");
+
+    CLI11_PARSE(app, argc, argv);
+
+    logger->info("Parsing MOD file to AST");
+    parser::NmodlDriver driver;
+    const auto& ast = driver.parse_file(filename);
+
+    logger->info("Running Symtab Visitor");
+    visitor::SymtabVisitor().visit_program(*ast);
+
+    logger->info("Running LLVM Visitor");
+    codegen::CodegenLLVMVisitor llvm_visitor(filename, /*output_dir=*/".", /*opt_passes=*/false);
+    llvm_visitor.visit_program(*ast);
+    std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
+
+    // Check if the entry-point is valid for JIT driver to execute.
+    auto func = module->getFunction(entry_point_name);
+    if (!func)
+        throw std::runtime_error("Error: entry-point is not found\n");
+
+    if (func->getNumOperands() != 0)
+        throw std::runtime_error("Error: entry-point functions with arguments are not supported\n");
+
+    if (!func->getReturnType()->isDoubleTy())
+        throw std::runtime_error(
+            "Error: entry-point functions with non-double return type are not supported\n");
+
+    Runner runner(std::move(module));
+
+    // Since only double type is supported, provide explicit double type to the running function.
+    auto r = runner.run<double>(entry_point_name);
+    fprintf(stderr, "Result: %f\n", r);
+
+    return 0;
+}
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index a5d95719e9..e67c23c8fb 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -109,6 +109,7 @@ target_link_libraries(
 if(NMODL_ENABLE_LLVM)
   include_directories(${LLVM_INCLUDE_DIRS})
   add_executable(testllvm visitor/main.cpp codegen/llvm.cpp)
+  add_executable(test_llvm_runner visitor/main.cpp codegen/codegen_llvm_execution.cpp)
   target_link_libraries(
     testllvm
     llvm_codegen
@@ -121,6 +122,18 @@ if(NMODL_ENABLE_LLVM)
     printer
     ${NMODL_WRAPPER_LIBS}
     ${LLVM_LIBS_TO_LINK})
+  target_link_libraries(
+    test_llvm_runner
+    llvm_codegen
+    codegen
+    visitor
+    symtab
+    lexer
+    util
+    test_util
+    printer
+    ${NMODL_WRAPPER_LIBS}
+    ${LLVM_LIBS_TO_LINK})
   set(CODEGEN_TEST testllvm)
 endif()
 
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
new file mode 100644
index 0000000000..6f1bf7b8ca
--- /dev/null
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -0,0 +1,162 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include <catch/catch.hpp>
+#include <regex>
+
+#include "ast/program.hpp"
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "codegen/llvm/jit_driver.hpp"
+#include "parser/nmodl_driver.hpp"
+#include "visitors/checkparent_visitor.hpp"
+#include "visitors/symtab_visitor.hpp"
+
+using namespace nmodl;
+using namespace runner;
+using namespace visitor;
+using nmodl::parser::NmodlDriver;
+
+static double EPSILON = 1e-15;
+
+//=============================================================================
+// No optimisations
+//=============================================================================
+
+SCENARIO("Arithmetic expression", "[llvm][runner]") {
+    GIVEN("Functions with some arithmetic expressions") {
+        std::string nmodl_text = R"(
+            FUNCTION exponential() {
+                LOCAL i
+                i = 1
+                exponential = exp(i)
+            }
+
+            FUNCTION constant() {
+                constant = 10
+            }
+
+            FUNCTION arithmetic() {
+                LOCAL x, y
+                x = 3
+                y = 7
+                arithmetic = x * y / (x + y)
+            }
+
+            FUNCTION bar() {
+                LOCAL i, j
+                i = 2
+                j = i + 2
+                bar = 2 * 3 + j
+            }
+
+            FUNCTION function_call() {
+                foo()
+                function_call = bar() / constant()
+            }
+
+            PROCEDURE foo() {}
+        )";
+
+
+        NmodlDriver driver;
+        const auto& ast = driver.parse_string(nmodl_text);
+
+        SymtabVisitor().visit_program(*ast);
+        codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
+                                                 /*output_dir=*/".",
+                                                 /*opt_passes=*/false);
+        llvm_visitor.visit_program(*ast);
+
+        std::unique_ptr<llvm::Module> m = llvm_visitor.get_module();
+        Runner runner(std::move(m));
+
+        THEN("functions are evaluated correctly") {
+            auto exp_result = runner.run<double>("exponential");
+            REQUIRE(fabs(exp_result - 2.718281828459045) < EPSILON);
+
+            auto constant_result = runner.run<double>("constant");
+            REQUIRE(fabs(constant_result - 10.0) < EPSILON);
+
+            auto arithmetic_result = runner.run<double>("arithmetic");
+            REQUIRE(fabs(arithmetic_result - 2.1) < EPSILON);
+
+            auto function_call_result = runner.run<double>("function_call");
+            REQUIRE(fabs(function_call_result - 1.0) < EPSILON);
+        }
+    }
+}
+
+//=============================================================================
+// With optimisations
+//=============================================================================
+
+SCENARIO("Optimised arithmetic expression", "[llvm][runner]") {
+    GIVEN("Functions with some arithmetic expressions") {
+        std::string nmodl_text = R"(
+            FUNCTION exponential() {
+                LOCAL i
+                i = 1
+                exponential = exp(i)
+            }
+
+            FUNCTION constant() {
+                constant = 10 * 2 - 100 / 50 * 5
+            }
+
+            FUNCTION arithmetic() {
+                LOCAL x, y
+                x = 3
+                y = 7
+                arithmetic = x * y / (x + y)
+            }
+
+            FUNCTION bar() {
+                LOCAL i, j
+                i = 2
+                j = i + 2
+                bar = 2 * 3 + j
+            }
+
+            FUNCTION function_call() {
+                foo()
+                function_call = bar() / constant()
+            }
+
+            PROCEDURE foo() {}
+        )";
+
+
+        NmodlDriver driver;
+        const auto& ast = driver.parse_string(nmodl_text);
+
+        SymtabVisitor().visit_program(*ast);
+        codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
+                                                 /*output_dir=*/".",
+                                                 /*opt_passes=*/true);
+        llvm_visitor.visit_program(*ast);
+
+        std::unique_ptr<llvm::Module> m = llvm_visitor.get_module();
+        Runner runner(std::move(m));
+
+        THEN("optimizations preserve function results") {
+            // Check exponential is turned into a constant.
+            auto exp_result = runner.run<double>("exponential");
+            REQUIRE(fabs(exp_result - 2.718281828459045) < EPSILON);
+
+            // Check constant folding.
+            auto constant_result = runner.run<double>("constant");
+            REQUIRE(fabs(constant_result - 10.0) < EPSILON);
+
+            // Check constant folding.
+            auto arithmetic_result = runner.run<double>("arithmetic");
+            REQUIRE(fabs(arithmetic_result - 2.1) < EPSILON);
+
+            auto function_call_result = runner.run<double>("function_call");
+            REQUIRE(fabs(function_call_result - 1.0) < EPSILON);
+        }
+    }
+}

From 34bbaab025544954598529deefec52ed0c597198 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 12 Jan 2021 12:50:22 +0300
Subject: [PATCH 015/105] Extended support for binary ops and refactoring
 (#489)

* Added more bin ops and refactored code
   - Now, there are code generation functions for all comparison
      and logical operators.
   - Code generation functions are now split based on the expression "type"
      (assignment, arithmetic, comparison, logical). Moreover, the lhs and rhs
      expression results can be both double and integer. This is important for
      control flow code generation and for the new AST node CodegenVarType.
* Added support for NOT op
* Added default type flag to switch between float and double
* Added tests for single precision
* Renames LLVM test file to codegen_llvm_ir.cpp to follow convention.
* NOTE : Tests for new operators will be added when the first control
                flow node (most likely FOR node) will land.

fixes #453
---
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 158 ++++++++++++++----
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  47 +++++-
 src/main.cpp                                  |   8 +-
 test/unit/CMakeLists.txt                      |   2 +-
 .../codegen/{llvm.cpp => codegen_llvm_ir.cpp} |  20 ++-
 5 files changed, 188 insertions(+), 47 deletions(-)
 rename test/unit/codegen/{llvm.cpp => codegen_llvm_ir.cpp} (95%)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 6228b39d04..6f134149e3 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -65,6 +65,12 @@ unsigned CodegenLLVMVisitor::get_array_index_or_length(const ast::IndexedName& i
     return static_cast<unsigned>(*macro->get_value());
 }
 
+llvm::Type* CodegenLLVMVisitor::get_default_fp_type() {
+    if (use_single_precision)
+        return llvm::Type::getFloatTy(*context);
+    return llvm::Type::getDoubleTy(*context);
+}
+
 void CodegenLLVMVisitor::run_llvm_opt_passes() {
     /// run some common optimisation passes that are commonly suggested
     fpm.add(llvm::createInstructionCombiningPass());
@@ -139,10 +145,10 @@ void CodegenLLVMVisitor::emit_procedure_or_function_declaration(const ast::Block
     // Procedure or function parameters are doubles by default.
     std::vector<llvm::Type*> arg_types;
     for (size_t i = 0; i < parameters.size(); ++i)
-        arg_types.push_back(llvm::Type::getDoubleTy(*context));
+        arg_types.push_back(get_default_fp_type());
 
     // If visiting a function, the return type is a double by default.
-    llvm::Type* return_type = node.is_function_block() ? llvm::Type::getDoubleTy(*context)
+    llvm::Type* return_type = node.is_function_block() ? get_default_fp_type()
                                                        : llvm::Type::getVoidTy(*context);
 
     // Create a function that is automatically inserted into module's symbol table.
@@ -152,6 +158,90 @@ void CodegenLLVMVisitor::emit_procedure_or_function_declaration(const ast::Block
                            *module);
 }
 
+llvm::Value* CodegenLLVMVisitor::visit_arithmetic_bin_op(llvm::Value* lhs,
+                                                         llvm::Value* rhs,
+                                                         unsigned op) {
+    const auto& bin_op = static_cast<ast::BinaryOp>(op);
+    llvm::Type* lhs_type = lhs->getType();
+    llvm::Value* result;
+
+    switch (bin_op) {
+#define DISPATCH(binary_op, llvm_fp_op, llvm_int_op)         \
+    case binary_op:                                          \
+        if (lhs_type->isDoubleTy() || lhs_type->isFloatTy()) \
+            result = llvm_fp_op(lhs, rhs);                   \
+        else                                                 \
+            result = llvm_int_op(lhs, rhs);                  \
+        return result;
+
+        DISPATCH(ast::BinaryOp::BOP_ADDITION, builder.CreateFAdd, builder.CreateAdd);
+        DISPATCH(ast::BinaryOp::BOP_DIVISION, builder.CreateFDiv, builder.CreateSDiv);
+        DISPATCH(ast::BinaryOp::BOP_MULTIPLICATION, builder.CreateFMul, builder.CreateMul);
+        DISPATCH(ast::BinaryOp::BOP_SUBTRACTION, builder.CreateFSub, builder.CreateSub);
+
+#undef DISPATCH
+
+    default:
+        return nullptr;
+    }
+}
+
+void CodegenLLVMVisitor::visit_assign_op(const ast::BinaryExpression& node, llvm::Value* rhs) {
+    auto var = dynamic_cast<ast::VarName*>(node.get_lhs().get());
+    if (!var) {
+        throw std::runtime_error("Error: only VarName assignment is currently supported.\n");
+    }
+
+    const auto& identifier = var->get_name();
+    if (identifier->is_name()) {
+        llvm::Value* alloca = local_named_values->lookup(var->get_node_name());
+        builder.CreateStore(rhs, alloca);
+    } else if (identifier->is_indexed_name()) {
+        auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
+        builder.CreateStore(rhs, codegen_indexed_name(*indexed_name));
+    } else {
+        throw std::runtime_error("Error: Unsupported variable type");
+    }
+}
+
+llvm::Value* CodegenLLVMVisitor::visit_logical_bin_op(llvm::Value* lhs,
+                                                      llvm::Value* rhs,
+                                                      unsigned op) {
+    const auto& bin_op = static_cast<ast::BinaryOp>(op);
+    return bin_op == ast::BinaryOp::BOP_AND ? builder.CreateAnd(lhs, rhs)
+                                            : builder.CreateOr(lhs, rhs);
+}
+
+llvm::Value* CodegenLLVMVisitor::visit_comparison_bin_op(llvm::Value* lhs,
+                                                         llvm::Value* rhs,
+                                                         unsigned op) {
+    const auto& bin_op = static_cast<ast::BinaryOp>(op);
+    llvm::Type* lhs_type = lhs->getType();
+    llvm::Value* result;
+
+    switch (bin_op) {
+#define DISPATCH(binary_op, f_llvm_op, i_llvm_op)            \
+    case binary_op:                                          \
+        if (lhs_type->isDoubleTy() || lhs_type->isFloatTy()) \
+            result = f_llvm_op(lhs, rhs);                    \
+        else                                                 \
+            result = i_llvm_op(lhs, rhs);                    \
+        return result;
+
+        DISPATCH(ast::BinaryOp::BOP_EXACT_EQUAL, builder.CreateICmpEQ, builder.CreateFCmpOEQ);
+        DISPATCH(ast::BinaryOp::BOP_GREATER, builder.CreateICmpSGT, builder.CreateFCmpOGT);
+        DISPATCH(ast::BinaryOp::BOP_GREATER_EQUAL, builder.CreateICmpSGE, builder.CreateFCmpOGE);
+        DISPATCH(ast::BinaryOp::BOP_LESS, builder.CreateICmpSLT, builder.CreateFCmpOLT);
+        DISPATCH(ast::BinaryOp::BOP_LESS_EQUAL, builder.CreateICmpSLE, builder.CreateFCmpOLE);
+        DISPATCH(ast::BinaryOp::BOP_NOT_EQUAL, builder.CreateICmpNE, builder.CreateFCmpONE);
+
+#undef DISPATCH
+
+    default:
+        return nullptr;
+    }
+}
+
 void CodegenLLVMVisitor::visit_procedure_or_function(const ast::Block& node) {
     const auto& name = node.get_node_name();
     const auto& parameters = node.get_parameters();
@@ -222,44 +312,39 @@ void CodegenLLVMVisitor::visit_binary_expression(const ast::BinaryExpression& no
     llvm::Value* rhs = values.back();
     values.pop_back();
     if (op == ast::BinaryOp::BOP_ASSIGN) {
-        auto var = dynamic_cast<ast::VarName*>(node.get_lhs().get());
-        if (!var) {
-            throw std::runtime_error("Error: only VarName assignment is currently supported.\n");
-        }
-
-        const auto& identifier = var->get_name();
-        if (identifier->is_name()) {
-            llvm::Value* alloca = local_named_values->lookup(var->get_node_name());
-            builder.CreateStore(rhs, alloca);
-        } else if (identifier->is_indexed_name()) {
-            auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
-            builder.CreateStore(rhs, codegen_indexed_name(*indexed_name));
-        } else {
-            throw std::runtime_error("Error: Unsupported variable type");
-        }
+        visit_assign_op(node, rhs);
         return;
     }
 
     node.get_lhs()->accept(*this);
     llvm::Value* lhs = values.back();
     values.pop_back();
-    llvm::Value* result;
 
-    // \todo: Support other binary operators
+    llvm::Value* result;
     switch (op) {
-#define DISPATCH(binary_op, llvm_op) \
-    case binary_op:                  \
-        result = llvm_op(lhs, rhs);  \
-        values.push_back(result);    \
+    case ast::BOP_ADDITION:
+    case ast::BOP_DIVISION:
+    case ast::BOP_MULTIPLICATION:
+    case ast::BOP_SUBTRACTION:
+        result = visit_arithmetic_bin_op(lhs, rhs, op);
         break;
-
-        DISPATCH(ast::BinaryOp::BOP_ADDITION, builder.CreateFAdd);
-        DISPATCH(ast::BinaryOp::BOP_DIVISION, builder.CreateFDiv);
-        DISPATCH(ast::BinaryOp::BOP_MULTIPLICATION, builder.CreateFMul);
-        DISPATCH(ast::BinaryOp::BOP_SUBTRACTION, builder.CreateFSub);
-
-#undef DISPATCH
+    case ast::BOP_AND:
+    case ast::BOP_OR:
+        result = visit_logical_bin_op(lhs, rhs, op);
+        break;
+    case ast::BOP_EXACT_EQUAL:
+    case ast::BOP_GREATER:
+    case ast::BOP_GREATER_EQUAL:
+    case ast::BOP_LESS:
+    case ast::BOP_LESS_EQUAL:
+    case ast::BOP_NOT_EQUAL:
+        result = visit_comparison_bin_op(lhs, rhs, op);
+        break;
+    default:
+        throw std::runtime_error("Error: binary operator is not supported\n");
     }
+
+    values.push_back(result);
 }
 
 void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) {
@@ -269,8 +354,7 @@ void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) {
 }
 
 void CodegenLLVMVisitor::visit_double(const ast::Double& node) {
-    const auto& constant = llvm::ConstantFP::get(llvm::Type::getDoubleTy(*context),
-                                                 node.get_value());
+    const auto& constant = llvm::ConstantFP::get(get_default_fp_type(), node.get_value());
     values.push_back(constant);
 }
 
@@ -310,10 +394,10 @@ void CodegenLLVMVisitor::visit_local_list_statement(const ast::LocalListStatemen
         if (identifier->is_indexed_name()) {
             auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
             unsigned length = get_array_index_or_length(*indexed_name);
-            var_type = llvm::ArrayType::get(llvm::Type::getDoubleTy(*context), length);
+            var_type = llvm::ArrayType::get(get_default_fp_type(), length);
         } else if (identifier->is_name()) {
             // This case corresponds to a scalar local variable. Its type is double by default.
-            var_type = llvm::Type::getDoubleTy(*context);
+            var_type = get_default_fp_type();
         } else {
             throw std::runtime_error("Error: Unsupported local variable type");
         }
@@ -367,10 +451,10 @@ void CodegenLLVMVisitor::visit_unary_expression(const ast::UnaryExpression& node
     llvm::Value* value = values.back();
     values.pop_back();
     if (op == ast::UOP_NEGATION) {
-        llvm::Value* result = builder.CreateFNeg(value);
-        values.push_back(result);
+        values.push_back(builder.CreateFNeg(value));
+    } else if (op == ast::UOP_NOT) {
+        values.push_back(builder.CreateNot(value));
     } else {
-        // Support only `double` operators for now.
         throw std::runtime_error("Error: unsupported unary operator\n");
     }
 }
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 599cfc7b58..066bdf35e3 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -76,6 +76,9 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     // Run optimisation passes if true.
     bool opt_passes;
 
+    // Use 32-bit floating-point type if true. Otherwise, use deafult 64-bit.
+    bool use_single_precision;
+
     /**
      *\brief Run LLVM optimisation passes on generated IR
      *
@@ -93,10 +96,12 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      */
     CodegenLLVMVisitor(const std::string& mod_filename,
                        const std::string& output_dir,
-                       bool opt_passes)
+                       bool opt_passes,
+                       bool use_single_precision = false)
         : mod_filename(mod_filename)
         , output_dir(output_dir)
         , opt_passes(opt_passes)
+        , use_single_precision(use_single_precision)
         , builder(*context)
         , fpm(module.get()) {}
 
@@ -129,6 +134,12 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      */
     unsigned get_array_index_or_length(const ast::IndexedName& node);
 
+    /**
+     * Returns 64-bit or 32-bit LLVM floating type
+     * \return     \c LLVM floating point type according to `use_single_precision` flag
+     */
+    llvm::Type* get_default_fp_type();
+
     /**
      * Create a function call to an external method
      * \param name external method name
@@ -162,6 +173,40 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
         return std::move(module);
     }
 
+    /**
+     * Visit nmodl arithmetic binary operator
+     * \param lhs LLVM value of evaluated lhs expression
+     * \param rhs LLVM value of evaluated rhs expression
+     * \param op the AST binary operator (ADD, DIV, MUL, SUB)
+     * \return LLVM IR value result
+     */
+    llvm::Value* visit_arithmetic_bin_op(llvm::Value* lhs, llvm::Value* rhs, unsigned op);
+
+    /**
+     * Visit nmodl assignment operator (ASSIGN)
+     * \param node the AST node representing the binary expression in NMODL
+     * \param rhs LLVM value of evaluated rhs expression
+     */
+    void visit_assign_op(const ast::BinaryExpression& node, llvm::Value* rhs);
+
+    /**
+     * Visit nmodl logical binary operator
+     * \param lhs LLVM value of evaluated lhs expression
+     * \param rhs LLVM value of evaluated rhs expression
+     * \param op the AST binary operator (AND, OR)
+     * \return LLVM IR value result
+     */
+    llvm::Value* visit_logical_bin_op(llvm::Value* lhs, llvm::Value* rhs, unsigned op);
+
+    /**
+     * Visit nmodl comparison binary operator
+     * \param lhs LLVM value of evaluated lhs expression
+     * \param rhs LLVM value of evaluated rhs expression
+     * \param op the AST binary operator (EXACT_EQUAL, GREATER, GREATER_EQUAL, LESS, LESS_EQUAL,
+     * NOT_EQUAL) \return LLVM IR value result
+     */
+    llvm::Value* visit_comparison_bin_op(llvm::Value* lhs, llvm::Value* rhs, unsigned op);
+
     /**
      * Visit nmodl function or procedure
      * \param node the AST node representing the function or procedure in NMODL
diff --git a/src/main.cpp b/src/main.cpp
index 28411afaf9..38f8cbdcb2 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -169,6 +169,9 @@ int main(int argc, const char* argv[]) {
     /// generate llvm IR
     bool llvm_ir(false);
 
+    /// use single precision floating-point types
+    bool llvm_float_type(false);
+
     /// run llvm optimisation passes
     bool llvm_opt_passes(false);
 #endif
@@ -299,6 +302,9 @@ int main(int argc, const char* argv[]) {
     llvm_opt->add_flag("--opt",
         llvm_opt_passes,
         "Run LLVM optimisation passes ({})"_format(llvm_opt_passes))->ignore_case();
+    llvm_opt->add_flag("--single-precision",
+                       llvm_float_type,
+                       "Use single precision floating-point types ({})"_format(llvm_float_type))->ignore_case();
 #endif
     // clang-format on
 
@@ -612,7 +618,7 @@ int main(int argc, const char* argv[]) {
 #ifdef NMODL_LLVM_BACKEND
             if (llvm_ir) {
                 logger->info("Running LLVM backend code generator");
-                CodegenLLVMVisitor visitor(modfile, output_dir, llvm_opt_passes);
+                CodegenLLVMVisitor visitor(modfile, output_dir, llvm_opt_passes, llvm_float_type);
                 visitor.visit_program(*ast);
                 ast_to_nmodl(*ast, filepath("llvm"));
             }
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index e67c23c8fb..88ecd3b75d 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -108,7 +108,7 @@ target_link_libraries(
 
 if(NMODL_ENABLE_LLVM)
   include_directories(${LLVM_INCLUDE_DIRS})
-  add_executable(testllvm visitor/main.cpp codegen/llvm.cpp)
+  add_executable(testllvm visitor/main.cpp codegen/codegen_llvm_ir.cpp)
   add_executable(test_llvm_runner visitor/main.cpp codegen/codegen_llvm_execution.cpp)
   target_link_libraries(
     testllvm
diff --git a/test/unit/codegen/llvm.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
similarity index 95%
rename from test/unit/codegen/llvm.cpp
rename to test/unit/codegen/codegen_llvm_ir.cpp
index d644947e79..e44b2b15cd 100644
--- a/test/unit/codegen/llvm.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -22,13 +22,18 @@ using nmodl::parser::NmodlDriver;
 // Utility to get LLVM module as a string
 //=============================================================================
 
-std::string run_llvm_visitor(const std::string& text, bool opt = false) {
+std::string run_llvm_visitor(const std::string& text,
+                             bool opt = false,
+                             bool use_single_precision = false) {
     NmodlDriver driver;
     const auto& ast = driver.parse_string(text);
 
     SymtabVisitor().visit_program(*ast);
 
-    codegen::CodegenLLVMVisitor llvm_visitor("unknown", ".", opt);
+    codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
+                                             /*output_dir=*/".",
+                                             opt,
+                                             use_single_precision);
     llvm_visitor.visit_program(*ast);
     return llvm_visitor.print_module();
 }
@@ -47,14 +52,15 @@ SCENARIO("Binary expression", "[visitor][llvm]") {
         )";
 
         THEN("variables are loaded and add instruction is created") {
-            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::string module_string =
+                run_llvm_visitor(nmodl_text, /*opt=*/false, /*use_single_precision=*/true);
             std::smatch m;
 
-            std::regex rhs(R"(%1 = load double, double\* %b)");
-            std::regex lhs(R"(%2 = load double, double\* %a)");
-            std::regex res(R"(%3 = fadd double %2, %1)");
+            std::regex rhs(R"(%1 = load float, float\* %b)");
+            std::regex lhs(R"(%2 = load float, float\* %a)");
+            std::regex res(R"(%3 = fadd float %2, %1)");
 
-            // Check the values are loaded correctly and added
+            // Check the float values are loaded correctly and added
             REQUIRE(std::regex_search(module_string, m, rhs));
             REQUIRE(std::regex_search(module_string, m, lhs));
             REQUIRE(std::regex_search(module_string, m, res));

From 1f4c8dcf32d9d39a2a30749a1a828223521bb8fa Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Tue, 12 Jan 2021 10:55:23 +0100
Subject: [PATCH 016/105] Avoid converting LOCAL statement in all
 StatementBlocks (#492)

* visit_statement_block of all FUNCTION and PROCEDURE
    blocks was called resulting in changing LOCAL
    statement to DOUBLE statement
  * As statement block doesn't need to be visited for this
    purpose, rename function to convert_local_statement
  * Call convert_local_statement only when required i.e.
    only when codegen function creation time.

fixes #491
---
 src/codegen/llvm/codegen_llvm_helper_visitor.cpp | 8 +++++++-
 src/codegen/llvm/codegen_llvm_helper_visitor.hpp | 3 ++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 341ab03fb6..4dec93c52e 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -123,6 +123,9 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
     auto block = node.get_statement_block()->clone();
     const auto& statements = block->get_statements();
 
+    /// convert local statement to codegenvar statement
+    convert_local_statement(*block);
+
     /// insert return variable at the start of the block
     ast::CodegenVarVector codegen_vars;
     codegen_vars.emplace_back(new ast::CodegenVar(0, return_var->clone()));
@@ -356,7 +359,7 @@ void CodegenLLVMHelperVisitor::convert_to_instance_variable(ast::Node& node,
  * first statement in the vector. We have to remove LOCAL statement and convert
  * it to CodegenVarListStatement that will represent all variables as double.
  */
-void CodegenLLVMHelperVisitor::visit_statement_block(ast::StatementBlock& node) {
+void CodegenLLVMHelperVisitor::convert_local_statement(ast::StatementBlock& node) {
     /// first process all children blocks if any
     node.visit_children(*this);
 
@@ -475,6 +478,9 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     /// convert all variables inside loop body to instance variables
     convert_to_instance_variable(*loop_block, loop_index_var);
 
+    /// convert local statement to codegenvar statement
+    convert_local_statement(*loop_block);
+
     /// create for loop node
     auto for_loop_statement = std::make_shared<ast::CodegenForStatement>(initialization,
                                                                          condition,
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index 6b1684e7d1..1db659c1b4 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -75,7 +75,8 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
 
     void convert_to_instance_variable(ast::Node& node, std::string& index_var);
 
-    void visit_statement_block(ast::StatementBlock& node) override;
+    void convert_local_statement(ast::StatementBlock& node);
+
     void visit_procedure_block(ast::ProcedureBlock& node) override;
     void visit_function_block(ast::FunctionBlock& node) override;
     void visit_nrn_state_block(ast::NrnStateBlock& node) override;

From aa639de9d04e145a355426ca688f0a2970be8931 Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Wed, 13 Jan 2021 11:55:57 +0100
Subject: [PATCH 017/105] Handle CodegenVarType type in JSON printer  (#494)

* Handle CodegenVarType type in JSON printer
  - As AstNodeType is enum type and node itself,
    we need to print that explicitly
* Indent json visitor jinja template
 - initially template was not indented as code generated
   was not looking good
 - now all generated code is automatically clang-formatted
   so it's less of a concern. Readability is important.

fixes #493
---
 src/language/node_info.py                     |  1 +
 src/language/nodes.py                         |  4 ++
 .../templates/visitors/json_visitor.cpp       | 47 +++++++++++--------
 3 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/src/language/node_info.py b/src/language/node_info.py
index 6b9a74243f..b08041f0a4 100644
--- a/src/language/node_info.py
+++ b/src/language/node_info.py
@@ -166,6 +166,7 @@
 STRING_NODE = "String"
 UNIT_BLOCK = "UnitBlock"
 AST_NODETYPE_NODE= "AstNodeType"
+CODEGEN_VAR_TYPE_NODE = "CodegenVarType"
 
 # name of variable in prime node which represent order of derivative
 ORDER_VAR_NAME = "order"
diff --git a/src/language/nodes.py b/src/language/nodes.py
index ad4ee818c6..d6a804a315 100644
--- a/src/language/nodes.py
+++ b/src/language/nodes.py
@@ -140,6 +140,10 @@ def is_name_node(self):
     def is_ast_nodetype_node(self):
         return self.class_name == node_info.AST_NODETYPE_NODE
 
+    @property
+    def is_codegen_var_type_node(self):
+        return self.class_name == node_info.CODEGEN_VAR_TYPE_NODE
+
     @property
     def is_enum_node(self):
         data_type = node_info.DATA_TYPES[self.class_name]
diff --git a/src/language/templates/visitors/json_visitor.cpp b/src/language/templates/visitors/json_visitor.cpp
index 11bd9cf556..4633ce338e 100644
--- a/src/language/templates/visitors/json_visitor.cpp
+++ b/src/language/templates/visitors/json_visitor.cpp
@@ -22,33 +22,40 @@ using namespace ast;
 {% for node in nodes %}
 void JSONVisitor::visit_{{ node.class_name|snake_case }}(const {{ node.class_name }}& node) {
     {% if node.has_children() %}
-    printer->push_block(node.get_node_type_name());
-    if (embed_nmodl) {
-        printer->add_block_property("nmodl", to_nmodl(node));
-    }
-    node.visit_children(*this);
-    {% if node.is_data_type_node %}
+        printer->push_block(node.get_node_type_name());
+        if (embed_nmodl) {
+            printer->add_block_property("nmodl", to_nmodl(node));
+        }
+        node.visit_children(*this);
+        {% if node.is_data_type_node %}
             {% if node.is_integer_node %}
-    if(!node.get_macro()) {
-        std::stringstream ss;
-        ss << node.eval();
-        printer->add_node(ss.str());
-    }
+                if(!node.get_macro()) {
+                    std::stringstream ss;
+                    ss << node.eval();
+                    printer->add_node(ss.str());
+                }
             {% else %}
-    std::stringstream ss;
-    ss << node.eval();
-    printer->add_node(ss.str());
+                std::stringstream ss;
+                ss << node.eval();
+                printer->add_node(ss.str());
             {% endif %}
         {% endif %}
-    printer->pop_block();
+
+        {% if node.is_codegen_var_type_node %}
+            printer->add_node(ast::to_string(node.get_type()));
+        {% endif %}
+
+        printer->pop_block();
+
         {% if node.is_program_node %}
-    if (node.get_parent() == nullptr) {
-        flush();
-    }
+            if (node.get_parent() == nullptr) {
+                flush();
+            }
         {% endif %}
+
     {% else %}
-    (void)node;
-    printer->add_node("{{ node.class_name }}");
+        (void)node;
+        printer->add_node("{{ node.class_name }}");
     {% endif %}
 }
 

From a541c69741d0eda1343572cf4b83385e758e4dd1 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Mon, 25 Jan 2021 15:59:04 +0300
Subject: [PATCH 018/105] Integrating LLVM helper into LLVM visitor (#497)

* LLVM Helper visitor now can return a vector of `CodegenFunction`s.
* LLVM Helper visitor has been integrated into LLVM visitor:
   - The type of variables is still double by default, but can also be inferred from `CodegenVarType` node.
   - Procedure's return type changed to int (so that error codes can be returned in the future).
   - New visitor functions added: for `CodegenReturn`, `CodegenFunction`, `CodegenVarList` and `CodegenVarType`.
---
 .../llvm/codegen_llvm_helper_visitor.cpp      |   6 +-
 .../llvm/codegen_llvm_helper_visitor.hpp      |   8 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 234 ++++++++++--------
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  17 +-
 test/unit/codegen/codegen_llvm_ir.cpp         |  39 ++-
 5 files changed, 177 insertions(+), 127 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 4dec93c52e..751fecfc81 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -71,6 +71,11 @@ std::shared_ptr<ast::Expression> create_expression(const std::string& code) {
     return std::make_shared<ast::WrappedExpression>(rhs->clone());
 }
 
+CodegenFunctionVector CodegenLLVMHelperVisitor::get_codegen_functions(const ast::Program& node) {
+    const_cast<ast::Program&>(node).accept(*this);
+    return codegen_functions;
+}
+
 /**
  * \brief Add code generation function for FUNCTION or PROCEDURE block
  * @param node AST node representing FUNCTION or PROCEDURE
@@ -98,7 +103,6 @@ std::shared_ptr<ast::Expression> create_expression(const std::string& code) {
  *
  * We perform following transformations so that code generation backends
  * will have minimum logic:
- *  - Add return type
  *  - Add type for the function arguments
  *  - Define variables and return variable
  *  - Add return type (int for PROCEDURE and double for FUNCTION)
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index 1db659c1b4..0ec3792b9d 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -21,6 +21,9 @@
 namespace nmodl {
 namespace codegen {
 
+
+typedef std::vector<std::shared_ptr<ast::CodegenFunction>> CodegenFunctionVector;
+
 /**
  * @addtogroup llvm_codegen_details
  * @{
@@ -46,7 +49,7 @@ namespace codegen {
  */
 class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     /// newly generated code generation specific functions
-    std::vector<std::shared_ptr<ast::CodegenFunction>> codegen_functions;
+    CodegenFunctionVector codegen_functions;
 
     /// ast information for code generation
     codegen::CodegenInfo info;
@@ -61,6 +64,9 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
   public:
     CodegenLLVMHelperVisitor() = default;
 
+    /// run visitor and return code generation functions
+    CodegenFunctionVector get_codegen_functions(const ast::Program& node);
+
     void ion_read_statements(BlockType type,
                              std::vector<std::string>& int_variables,
                              std::vector<std::string>& double_variables,
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 6f134149e3..2d762c0e92 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -9,7 +9,6 @@
 #include "codegen/llvm/codegen_llvm_helper_visitor.hpp"
 
 #include "ast/all.hpp"
-#include "codegen/codegen_helper_visitor.hpp"
 #include "visitors/rename_visitor.hpp"
 
 #include "llvm/IR/BasicBlock.h"
@@ -28,8 +27,10 @@ namespace codegen {
 /****************************************************************************************/
 
 bool CodegenLLVMVisitor::check_array_bounds(const ast::IndexedName& node, unsigned index) {
-    llvm::Type* array_type =
-        local_named_values->lookup(node.get_node_name())->getType()->getPointerElementType();
+    llvm::Type* array_type = current_func->getValueSymbolTable()
+                                 ->lookup(node.get_node_name())
+                                 ->getType()
+                                 ->getPointerElementType();
     unsigned length = array_type->getArrayNumElements();
     return 0 <= index && index < length;
 }
@@ -40,7 +41,7 @@ llvm::Value* CodegenLLVMVisitor::create_gep(const std::string& name, unsigned in
     indices.push_back(llvm::ConstantInt::get(index_type, 0));
     indices.push_back(llvm::ConstantInt::get(index_type, index));
 
-    return builder.CreateInBoundsGEP(local_named_values->lookup(name), indices);
+    return builder.CreateInBoundsGEP(current_func->getValueSymbolTable()->lookup(name), indices);
 }
 
 llvm::Value* CodegenLLVMVisitor::codegen_indexed_name(const ast::IndexedName& node) {
@@ -65,6 +66,21 @@ unsigned CodegenLLVMVisitor::get_array_index_or_length(const ast::IndexedName& i
     return static_cast<unsigned>(*macro->get_value());
 }
 
+llvm::Type* CodegenLLVMVisitor::get_codegen_var_type(const ast::CodegenVarType& node) {
+    switch (node.get_type()) {
+    case ast::AstNodeType::BOOLEAN:
+        return llvm::Type::getInt1Ty(*context);
+    case ast::AstNodeType::DOUBLE:
+        return get_default_fp_type();
+    case ast::AstNodeType::INTEGER:
+        return llvm::Type::getInt32Ty(*context);
+    case ast::AstNodeType::VOID:
+        return llvm::Type::getVoidTy(*context);
+    default:
+        throw std::runtime_error("Error: expecting a type in CodegenVarType node\n");
+    }
+}
+
 llvm::Type* CodegenLLVMVisitor::get_default_fp_type() {
     if (use_single_precision)
         return llvm::Type::getFloatTy(*context);
@@ -138,18 +154,16 @@ void CodegenLLVMVisitor::create_function_call(llvm::Function* func,
     values.push_back(call);
 }
 
-void CodegenLLVMVisitor::emit_procedure_or_function_declaration(const ast::Block& node) {
+void CodegenLLVMVisitor::emit_procedure_or_function_declaration(const ast::CodegenFunction& node) {
     const auto& name = node.get_node_name();
-    const auto& parameters = node.get_parameters();
+    const auto& arguments = node.get_arguments();
 
     // Procedure or function parameters are doubles by default.
     std::vector<llvm::Type*> arg_types;
-    for (size_t i = 0; i < parameters.size(); ++i)
-        arg_types.push_back(get_default_fp_type());
+    for (size_t i = 0; i < arguments.size(); ++i)
+        arg_types.push_back(get_codegen_var_type(*arguments[i]->get_type()));
 
-    // If visiting a function, the return type is a double by default.
-    llvm::Type* return_type = node.is_function_block() ? get_default_fp_type()
-                                                       : llvm::Type::getVoidTy(*context);
+    llvm::Type* return_type = get_codegen_var_type(*node.get_return_type());
 
     // Create a function that is automatically inserted into module's symbol table.
     llvm::Function::Create(llvm::FunctionType::get(return_type, arg_types, /*isVarArg=*/false),
@@ -194,7 +208,7 @@ void CodegenLLVMVisitor::visit_assign_op(const ast::BinaryExpression& node, llvm
 
     const auto& identifier = var->get_name();
     if (identifier->is_name()) {
-        llvm::Value* alloca = local_named_values->lookup(var->get_node_name());
+        llvm::Value* alloca = current_func->getValueSymbolTable()->lookup(var->get_node_name());
         builder.CreateStore(rhs, alloca);
     } else if (identifier->is_indexed_name()) {
         auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
@@ -242,62 +256,6 @@ llvm::Value* CodegenLLVMVisitor::visit_comparison_bin_op(llvm::Value* lhs,
     }
 }
 
-void CodegenLLVMVisitor::visit_procedure_or_function(const ast::Block& node) {
-    const auto& name = node.get_node_name();
-    const auto& parameters = node.get_parameters();
-    llvm::Function* func = module->getFunction(name);
-
-    // Create the entry basic block of the function/procedure and point the local named values table
-    // to the symbol table.
-    llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", func);
-    builder.SetInsertPoint(body);
-    local_named_values = func->getValueSymbolTable();
-
-    // When processing a function, it returns a value named <function_name> in NMODL. Therefore, we
-    // first run RenameVisitor to rename it into ret_<function_name>. This will aid in avoiding
-    // symbolic conflicts. Then, allocate the return variable on the local stack.
-    std::string return_var_name = "ret_" + name;
-    const auto& block = node.get_statement_block();
-    if (node.is_function_block()) {
-        visitor::RenameVisitor v(name, return_var_name);
-        block->accept(v);
-        builder.CreateAlloca(llvm::Type::getDoubleTy(*context),
-                             /*ArraySize=*/nullptr,
-                             return_var_name);
-    }
-
-    // Allocate parameters on the stack and add them to the symbol table.
-    unsigned i = 0;
-    for (auto& arg: func->args()) {
-        std::string arg_name = parameters[i++].get()->get_node_name();
-        llvm::Value* alloca = builder.CreateAlloca(arg.getType(), /*ArraySize=*/nullptr, arg_name);
-        arg.setName(arg_name);
-        builder.CreateStore(&arg, alloca);
-    }
-
-    // Process function or procedure body.
-    const auto& statements = block->get_statements();
-    for (const auto& statement: statements) {
-        // \todo: Support other statement types.
-        if (statement->is_local_list_statement() || statement->is_expression_statement())
-            statement->accept(*this);
-    }
-
-    // Add the terminator. If visiting function, we need to return the value specified by
-    // ret_<function_name>.
-    if (node.is_function_block()) {
-        llvm::Value* return_var = builder.CreateLoad(local_named_values->lookup(return_var_name));
-        builder.CreateRet(return_var);
-    } else {
-        builder.CreateRetVoid();
-    }
-
-    // Clear local values stack and remove the pointer to the local symbol table.
-    values.clear();
-    local_named_values = nullptr;
-}
-
-
 /****************************************************************************************/
 /*                            Overloaded visitor routines                               */
 /****************************************************************************************/
@@ -353,13 +311,101 @@ void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) {
     values.push_back(constant);
 }
 
+void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node) {
+    const auto& name = node.get_node_name();
+    const auto& arguments = node.get_arguments();
+    llvm::Function* func = module->getFunction(name);
+    current_func = func;
+
+    // Create the entry basic block of the function/procedure and point the local named values table
+    // to the symbol table.
+    llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", func);
+    builder.SetInsertPoint(body);
+
+    // When processing a function, it returns a value named <function_name> in NMODL. Therefore, we
+    // first run RenameVisitor to rename it into ret_<function_name>. This will aid in avoiding
+    // symbolic conflicts.
+    std::string return_var_name = "ret_" + name;
+    const auto& block = node.get_statement_block();
+    visitor::RenameVisitor v(name, return_var_name);
+    block->accept(v);
+
+
+    // Allocate parameters on the stack and add them to the symbol table.
+    unsigned i = 0;
+    for (auto& arg: func->args()) {
+        std::string arg_name = arguments[i++].get()->get_node_name();
+        llvm::Value* alloca = builder.CreateAlloca(arg.getType(), /*ArraySize=*/nullptr, arg_name);
+        arg.setName(arg_name);
+        builder.CreateStore(&arg, alloca);
+    }
+
+    // Process function or procedure body. The return statement is handled in a separate visitor.
+    const auto& statements = block->get_statements();
+    for (const auto& statement: statements) {
+        // \todo: Support other statement types.
+        if (statement->is_codegen_var_list_statement() || statement->is_expression_statement() ||
+            statement->is_codegen_return_statement())
+            statement->accept(*this);
+    }
+
+    // If function has a void return type, add a terminator not handled by CodegenReturnVar.
+    if (node.is_void())
+        builder.CreateRetVoid();
+
+    // Clear local values stack and remove the pointer to the local symbol table.
+    values.clear();
+    current_func = nullptr;
+}
+
+void CodegenLLVMVisitor::visit_codegen_return_statement(const ast::CodegenReturnStatement& node) {
+    if (!node.get_statement()->is_name())
+        throw std::runtime_error("Error: CodegenReturnStatement must contain a name node\n");
+
+    std::string ret = "ret_" + current_func->getName().str();
+    llvm::Value* ret_value = builder.CreateLoad(current_func->getValueSymbolTable()->lookup(ret));
+    builder.CreateRet(ret_value);
+}
+
+void CodegenLLVMVisitor::visit_codegen_var_list_statement(
+    const ast::CodegenVarListStatement& node) {
+    llvm::Type* scalar_var_type = get_codegen_var_type(*node.get_var_type());
+    for (const auto& variable: node.get_variables()) {
+        std::string name = variable->get_node_name();
+        const auto& identifier = variable->get_name();
+        // Local variable can be a scalar (Node AST class) or an array (IndexedName AST class). For
+        // each case, create memory allocations with the corresponding LLVM type.
+        llvm::Type* var_type;
+        if (identifier->is_indexed_name()) {
+            auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
+            unsigned length = get_array_index_or_length(*indexed_name);
+            var_type = llvm::ArrayType::get(scalar_var_type, length);
+        } else if (identifier->is_name()) {
+            // This case corresponds to a scalar local variable. Its type is double by default.
+            var_type = scalar_var_type;
+        } else {
+            throw std::runtime_error("Error: Unsupported local variable type");
+        }
+        llvm::Value* alloca = builder.CreateAlloca(var_type, /*ArraySize=*/nullptr, name);
+
+        // Check if the variable we process is a procedure return variable (i.e. it has a name
+        // "ret_<current_function_name>" and the function return type is integer). If so, initialise
+        // it to 0.
+        std::string ret_val_name = "ret_" + current_func->getName().str();
+        if (name == ret_val_name && current_func->getReturnType()->isIntegerTy()) {
+            llvm::Value* zero = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*context), 0);
+            builder.CreateStore(zero, alloca);
+        }
+    }
+}
+
 void CodegenLLVMVisitor::visit_double(const ast::Double& node) {
     const auto& constant = llvm::ConstantFP::get(get_default_fp_type(), node.get_value());
     values.push_back(constant);
 }
 
 void CodegenLLVMVisitor::visit_function_block(const ast::FunctionBlock& node) {
-    visit_procedure_or_function(node);
+    // do nothing. \todo: remove old function blocks from ast.
 }
 
 void CodegenLLVMVisitor::visit_function_call(const ast::FunctionCall& node) {
@@ -384,41 +430,19 @@ void CodegenLLVMVisitor::visit_integer(const ast::Integer& node) {
     values.push_back(constant);
 }
 
-void CodegenLLVMVisitor::visit_local_list_statement(const ast::LocalListStatement& node) {
-    for (const auto& variable: node.get_variables()) {
-        std::string name = variable->get_node_name();
-        const auto& identifier = variable->get_name();
-        // Local variable can be a scalar (Node AST class) or an array (IndexedName AST class). For
-        // each case, create memory allocations with the corresponding LLVM type.
-        llvm::Type* var_type;
-        if (identifier->is_indexed_name()) {
-            auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
-            unsigned length = get_array_index_or_length(*indexed_name);
-            var_type = llvm::ArrayType::get(get_default_fp_type(), length);
-        } else if (identifier->is_name()) {
-            // This case corresponds to a scalar local variable. Its type is double by default.
-            var_type = get_default_fp_type();
-        } else {
-            throw std::runtime_error("Error: Unsupported local variable type");
-        }
-        builder.CreateAlloca(var_type, /*ArraySize=*/nullptr, name);
-    }
-}
-
 void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
-    // Before generating LLVM, gather information about AST. For now, information about functions
-    // and procedures is used only.
-    CodegenHelperVisitor v;
-    CodegenInfo info = v.analyze(node);
-
-    // For every function and procedure, generate its declaration. Thus, we can look up
+    // Before generating LLVM:
+    //   - convert function and procedure blocks into CodegenFunctions
+    //   - gather information about AST. For now, information about functions
+    //     and procedures is used only.
+    CodegenLLVMHelperVisitor v;
+    const auto& functions = v.get_codegen_functions(node);
+
+    // For every function, generate its declaration. Thus, we can look up
     // `llvm::Function` in the symbol table in the module.
-    for (const auto& func: info.functions) {
+    for (const auto& func: functions) {
         emit_procedure_or_function_declaration(*func);
     }
-    for (const auto& proc: info.procedures) {
-        emit_procedure_or_function_declaration(*proc);
-    }
 
     // Set the AST symbol table.
     sym_tab = node.get_symbol_table();
@@ -433,16 +457,10 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
 
     // Keep this for easier development (maybe move to debug mode later).
     std::cout << print_module();
-
-    // not used yet : this will be used at the beginning of this function
-    {
-        CodegenLLVMHelperVisitor v;
-        v.visit_program(const_cast<ast::Program&>(node));
-    }
 }
 
 void CodegenLLVMVisitor::visit_procedure_block(const ast::ProcedureBlock& node) {
-    visit_procedure_or_function(node);
+    // do nothing. \todo: remove old procedures from ast.
 }
 
 void CodegenLLVMVisitor::visit_unary_expression(const ast::UnaryExpression& node) {
@@ -466,7 +484,7 @@ void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
 
     llvm::Value* ptr;
     if (identifier->is_name())
-        ptr = local_named_values->lookup(node.get_node_name());
+        ptr = current_func->getValueSymbolTable()->lookup(node.get_node_name());
 
     if (identifier->is_indexed_name()) {
         auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 066bdf35e3..c6123a040d 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -67,8 +67,8 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     // Stack to hold visited values
     std::vector<llvm::Value*> values;
 
-    // Pointer to the local symbol table.
-    llvm::ValueSymbolTable* local_named_values = nullptr;
+    // Pointer to the current function.
+    llvm::Function* current_func = nullptr;
 
     // Pointer to AST symbol table.
     symtab::SymbolTable* sym_tab;
@@ -134,6 +134,13 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      */
     unsigned get_array_index_or_length(const ast::IndexedName& node);
 
+    /**
+     * Returns LLVM type for the given CodegenVarType node
+     * \param node CodegenVarType
+     * \return LLVM type
+     */
+    llvm::Type* get_codegen_var_type(const ast::CodegenVarType& node);
+
     /**
      * Returns 64-bit or 32-bit LLVM floating type
      * \return     \c LLVM floating point type according to `use_single_precision` flag
@@ -163,7 +170,7 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      *
      * \param node the AST node representing the function or procedure in NMODL
      */
-    void emit_procedure_or_function_declaration(const ast::Block& node);
+    void emit_procedure_or_function_declaration(const ast::CodegenFunction& node);
 
     /**
      * Return module pointer
@@ -216,11 +223,13 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     // Visitors
     void visit_binary_expression(const ast::BinaryExpression& node) override;
     void visit_boolean(const ast::Boolean& node) override;
+    void visit_codegen_function(const ast::CodegenFunction& node) override;
+    void visit_codegen_return_statement(const ast::CodegenReturnStatement& node) override;
+    void visit_codegen_var_list_statement(const ast::CodegenVarListStatement& node) override;
     void visit_double(const ast::Double& node) override;
     void visit_function_block(const ast::FunctionBlock& node) override;
     void visit_function_call(const ast::FunctionCall& node) override;
     void visit_integer(const ast::Integer& node) override;
-    void visit_local_list_statement(const ast::LocalListStatement& node) override;
     void visit_procedure_block(const ast::ProcedureBlock& node) override;
     void visit_program(const ast::Program& node) override;
     void visit_unary_expression(const ast::UnaryExpression& node) override;
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index e44b2b15cd..c328113f93 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -198,12 +198,12 @@ SCENARIO("Function call", "[visitor][llvm]") {
             }
         )";
 
-        THEN("a void call instruction is created") {
+        THEN("an int call instruction is created") {
             std::string module_string = run_llvm_visitor(nmodl_text);
             std::smatch m;
 
             // Check for call instruction.
-            std::regex call(R"(call void @bar\(\))");
+            std::regex call(R"(call i32 @bar\(\))");
             REQUIRE(std::regex_search(module_string, m, call));
         }
     }
@@ -408,13 +408,20 @@ SCENARIO("Procedure", "[visitor][llvm]") {
             PROCEDURE empty() {}
         )";
 
-        THEN("empty void function is produced") {
+        THEN("a function returning 0 integer is produced") {
             std::string module_string = run_llvm_visitor(nmodl_text);
             std::smatch m;
 
-            // Check procedure has empty body with a void return.
-            std::regex procedure(R"(define void @empty\(\) \{\n(\s)*ret void\n\})");
-            REQUIRE(std::regex_search(module_string, m, procedure));
+            // Check procedure has empty body with a dummy 0 allocation.
+            std::regex signature(R"(define i32 @empty)");
+            std::regex alloc(R"(%ret_empty = alloca i32)");
+            std::regex store(R"(store i32 0, i32\* %ret_empty)");
+            std::regex load(R"(%1 = load i32, i32\* %ret_empty)");
+            std::regex ret(R"(ret i32 %1)");
+            REQUIRE(std::regex_search(module_string, m, signature));
+            REQUIRE(std::regex_search(module_string, m, alloc));
+            REQUIRE(std::regex_search(module_string, m, store));
+            REQUIRE(std::regex_search(module_string, m, ret));
         }
     }
 
@@ -423,23 +430,29 @@ SCENARIO("Procedure", "[visitor][llvm]") {
             PROCEDURE with_argument(x) {}
         )";
 
-        THEN("void function is produced with arguments allocated on stack") {
+        THEN("int function is produced with arguments allocated on stack") {
             std::string module_string = run_llvm_visitor(nmodl_text);
             std::smatch m;
 
             // Check procedure signature.
-            std::regex function_signature(R"(define void @with_argument\(double %x1\) \{)");
+            std::regex function_signature(R"(define i32 @with_argument\(double %x1\) \{)");
             REQUIRE(std::regex_search(module_string, m, function_signature));
 
+            // Check dummy return.
+            std::regex dummy_alloca(R"(%ret_with_argument = alloca i32)");
+            std::regex dummy_store(R"(store i32 0, i32\* %ret_with_argument)");
+            std::regex dummy_load(R"(%1 = load i32, i32\* %ret_with_argument)");
+            std::regex ret(R"(ret i32 %1)");
+            REQUIRE(std::regex_search(module_string, m, dummy_alloca));
+            REQUIRE(std::regex_search(module_string, m, dummy_store));
+            REQUIRE(std::regex_search(module_string, m, dummy_load));
+            REQUIRE(std::regex_search(module_string, m, ret));
+
             // Check that procedure arguments are allocated on the local stack.
             std::regex alloca_instr(R"(%x = alloca double)");
             std::regex store_instr(R"(store double %x1, double\* %x)");
             REQUIRE(std::regex_search(module_string, m, alloca_instr));
             REQUIRE(std::regex_search(module_string, m, store_instr));
-
-            // Check terminator.
-            std::regex terminator(R"(ret void)");
-            REQUIRE(std::regex_search(module_string, m, terminator));
         }
     }
 }
@@ -493,7 +506,7 @@ SCENARIO("Dead code removal", "[visitor][llvm][opt]") {
 
             // Check if the values are optimised out
             std::regex empty_proc(
-                R"(define void @add\(double %a1, double %b2\) \{\n(\s)*ret void\n\})");
+                R"(define i32 @add\(double %a1, double %b2\) \{\n(\s)*ret i32 0\n\})");
             REQUIRE(std::regex_search(module_string, m, empty_proc));
         }
     }

From 87baf0f8f4f677f7cf8a928fd072ac4c920ae354 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Mon, 25 Jan 2021 17:06:22 +0300
Subject: [PATCH 019/105] LLVM code generation for if/else statements (#499)

* Added a new code generation function for conditional statements (`if`, `else if`, `else` and their nested variations).
* Added tests for the new code generation:
   - IR unit tests.
   - Execution tests.
* Fixed FP and integer comparison ordering in macros.

fixes #468
---
 src/codegen/llvm/codegen_llvm_visitor.cpp    |  86 +++++++-
 src/codegen/llvm/codegen_llvm_visitor.hpp    |   1 +
 test/unit/codegen/codegen_llvm_execution.cpp |  28 +++
 test/unit/codegen/codegen_llvm_ir.cpp        | 203 +++++++++++++++++++
 4 files changed, 314 insertions(+), 4 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 2d762c0e92..bde36f3dd4 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -26,6 +26,11 @@ namespace codegen {
 /*                            Helper routines                                           */
 /****************************************************************************************/
 
+static bool is_supported_statement(const ast::Statement& statement) {
+    return statement.is_codegen_var_list_statement() || statement.is_expression_statement() ||
+           statement.is_codegen_return_statement() || statement.is_if_statement();
+}
+
 bool CodegenLLVMVisitor::check_array_bounds(const ast::IndexedName& node, unsigned index) {
     llvm::Type* array_type = current_func->getValueSymbolTable()
                                  ->lookup(node.get_node_name())
@@ -234,7 +239,7 @@ llvm::Value* CodegenLLVMVisitor::visit_comparison_bin_op(llvm::Value* lhs,
     llvm::Value* result;
 
     switch (bin_op) {
-#define DISPATCH(binary_op, f_llvm_op, i_llvm_op)            \
+#define DISPATCH(binary_op, i_llvm_op, f_llvm_op)            \
     case binary_op:                                          \
         if (lhs_type->isDoubleTy() || lhs_type->isFloatTy()) \
             result = f_llvm_op(lhs, rhs);                    \
@@ -343,9 +348,7 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
     // Process function or procedure body. The return statement is handled in a separate visitor.
     const auto& statements = block->get_statements();
     for (const auto& statement: statements) {
-        // \todo: Support other statement types.
-        if (statement->is_codegen_var_list_statement() || statement->is_expression_statement() ||
-            statement->is_codegen_return_statement())
+        if (is_supported_statement(*statement))
             statement->accept(*this);
     }
 
@@ -424,6 +427,81 @@ void CodegenLLVMVisitor::visit_function_call(const ast::FunctionCall& node) {
     }
 }
 
+void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
+    // Get the current and the next blocks within the function.
+    llvm::BasicBlock* curr_block = builder.GetInsertBlock();
+    llvm::BasicBlock* next = curr_block->getNextNode();
+    llvm::Function* func = curr_block->getParent();
+
+    // Add a true block and a merge block where the control flow merges.
+    llvm::BasicBlock* true_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next);
+    llvm::BasicBlock* merge_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next);
+
+    // Add condition to the current block.
+    node.get_condition()->accept(*this);
+    llvm::Value* cond = values.back();
+    values.pop_back();
+
+    // Process the true block.
+    builder.SetInsertPoint(true_block);
+    for (const auto& statement: node.get_statement_block()->get_statements()) {
+        if (is_supported_statement(*statement))
+            statement->accept(*this);
+    }
+    builder.CreateBr(merge_block);
+
+    // Save the merge block and proceed with codegen for `else if` statements.
+    llvm::BasicBlock* exit = merge_block;
+    for (const auto& else_if: node.get_elseifs()) {
+        // Link the current block to the true and else blocks.
+        llvm::BasicBlock* else_block =
+            llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block);
+        builder.SetInsertPoint(curr_block);
+        builder.CreateCondBr(cond, true_block, else_block);
+
+        // Process else block.
+        builder.SetInsertPoint(else_block);
+        else_if->get_condition()->accept(*this);
+        cond = values.back();
+        values.pop_back();
+
+        // Reassign true and merge blocks respectively. Note that the new merge block has to be
+        // connected to the old merge block (tmp).
+        true_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block);
+        llvm::BasicBlock* tmp = merge_block;
+        merge_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block);
+        builder.SetInsertPoint(merge_block);
+        builder.CreateBr(tmp);
+
+        // Process true block.
+        builder.SetInsertPoint(true_block);
+        for (const auto& statement: else_if->get_statement_block()->get_statements()) {
+            if (is_supported_statement(*statement))
+                statement->accept(*this);
+        }
+        builder.CreateBr(merge_block);
+        curr_block = else_block;
+    }
+
+    // Finally, generate code for `else` statement if it exists.
+    const auto& elses = node.get_elses();
+    llvm::BasicBlock* else_block;
+    if (elses) {
+        else_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block);
+        builder.SetInsertPoint(else_block);
+        for (const auto& statement: elses->get_statement_block()->get_statements()) {
+            if (is_supported_statement(*statement))
+                statement->accept(*this);
+        }
+        builder.CreateBr(merge_block);
+    } else {
+        else_block = merge_block;
+    }
+    builder.SetInsertPoint(curr_block);
+    builder.CreateCondBr(cond, true_block, else_block);
+    builder.SetInsertPoint(exit);
+}
+
 void CodegenLLVMVisitor::visit_integer(const ast::Integer& node) {
     const auto& constant = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*context),
                                                   node.get_value());
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index c6123a040d..28129b2fb8 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -229,6 +229,7 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void visit_double(const ast::Double& node) override;
     void visit_function_block(const ast::FunctionBlock& node) override;
     void visit_function_call(const ast::FunctionCall& node) override;
+    void visit_if_statement(const ast::IfStatement& node) override;
     void visit_integer(const ast::Integer& node) override;
     void visit_procedure_block(const ast::ProcedureBlock& node) override;
     void visit_program(const ast::Program& node) override;
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index 6f1bf7b8ca..34311bf2c3 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -114,6 +114,30 @@ SCENARIO("Optimised arithmetic expression", "[llvm][runner]") {
                 arithmetic = x * y / (x + y)
             }
 
+            FUNCTION conditionals() {
+                LOCAL x, y, z
+                x = 100
+                y = -100
+                z = 0
+                if (x == 200) {
+                    conditionals = 1
+                } else if (x == 400) {
+                    conditionals = 2
+                } else if (x == 100) {
+                    if (y == -100 && z != 0) {
+                        conditionals = 3
+                    } else {
+                        if (y < -99 && z == 0) {
+                          conditionals = 4
+                        } else {
+                            conditionals = 5
+                        }
+                    }
+                } else {
+                    conditionals = 6
+                }
+            }
+
             FUNCTION bar() {
                 LOCAL i, j
                 i = 2
@@ -151,6 +175,10 @@ SCENARIO("Optimised arithmetic expression", "[llvm][runner]") {
             auto constant_result = runner.run<double>("constant");
             REQUIRE(fabs(constant_result - 10.0) < EPSILON);
 
+            // Check nested conditionals
+            auto conditionals_result = runner.run<double>("conditionals");
+            REQUIRE(fabs(conditionals_result - 4.0) < EPSILON);
+
             // Check constant folding.
             auto arithmetic_result = runner.run<double>("arithmetic");
             REQUIRE(fabs(arithmetic_result - 2.1) < EPSILON);
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index c328113f93..292256193c 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -147,6 +147,209 @@ SCENARIO("Define", "[visitor][llvm]") {
     }
 }
 
+//=============================================================================
+// If/Else statements and comparison operators
+//=============================================================================
+
+SCENARIO("Comparison", "[visitor][llvm]") {
+    GIVEN("Procedure with comparison operators") {
+        std::string nmodl_text = R"(
+            PROCEDURE foo(x) {
+                if (x < 10) {
+
+                } else if (x >= 10 && x <= 100) {
+
+                } else if (x == 120) {
+
+                } else if (!(x != 200)) {
+
+                }
+            }
+        )";
+
+        THEN("correct LLVM instructions are produced") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check less than.
+            std::regex lt(R"(fcmp olt double %(.+), 1\.000000e\+01)");
+            REQUIRE(std::regex_search(module_string, m, lt));
+
+            // Check greater or equal than and logical and.
+            std::regex ge(R"(fcmp ole double %(.+), 1\.000000e\+02)");
+            std::regex logical_and(R"(and i1 %(.+), %(.+))");
+            REQUIRE(std::regex_search(module_string, m, ge));
+            REQUIRE(std::regex_search(module_string, m, logical_and));
+
+            // Check equals.
+            std::regex eq(R"(fcmp oeq double %(.+), 1\.200000e\+02)");
+            REQUIRE(std::regex_search(module_string, m, eq));
+
+            // Check not equals.
+            std::regex ne(R"(fcmp one double %(.+), 2\.000000e\+02)");
+            REQUIRE(std::regex_search(module_string, m, ne));
+        }
+    }
+}
+
+SCENARIO("If/Else", "[visitor][llvm]") {
+    GIVEN("Function with only if statement") {
+        std::string nmodl_text = R"(
+            FUNCTION foo(y) {
+                LOCAL x
+                x = 100
+                if (y == 20) {
+                    x = 20
+                }
+                foo = x + y
+            }
+        )";
+
+        THEN("correct LLVM instructions are produced") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            std::regex cond_br(
+                "br i1 %2, label %3, label %4\n"
+                "\n"
+                "3:.*\n"
+                "  store double 2\\.000000e\\+01, double\\* %x.*\n"
+                "  br label %4\n"
+                "\n"
+                "4:");
+            REQUIRE(std::regex_search(module_string, m, cond_br));
+        }
+    }
+
+    GIVEN("Function with both if and else statements") {
+        std::string nmodl_text = R"(
+            FUNCTION sign(x) {
+                LOCAL s
+                if (x < 0) {
+                    s = -1
+                } else {
+                    s = 1
+                }
+                sign = s
+            }
+        )";
+
+        THEN("correct LLVM instructions are produced") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            std::regex if_else_br(
+                "br i1 %2, label %3, label %4\n"
+                "\n"
+                "3:.*\n"
+                "  store double -1\\.000000e\\+00, double\\* %s.*\n"
+                "  br label %5\n"
+                "\n"
+                "4:.*\n"
+                "  store double 1\\.000000e\\+00, double\\* %s.*\n"
+                "  br label %5\n"
+                "\n"
+                "5:");
+            REQUIRE(std::regex_search(module_string, m, if_else_br));
+        }
+    }
+
+    GIVEN("Function with both if and else if statements") {
+        std::string nmodl_text = R"(
+            FUNCTION bar(x) {
+                LOCAL s
+                s = -1
+                if (x <= 0) {
+                    s = 0
+                } else if (0 < x && x <= 1) {
+                    s = 1
+                }
+                bar = s
+            }
+        )";
+
+        THEN("correct LLVM instructions are produced") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            std::regex if_else_if(
+                "br i1 %2, label %3, label %4\n"
+                "\n"
+                "3:.*\n"
+                "  .*\n"
+                "  br label %12\n"
+                "\n"
+                "4:.*\n"
+                "  .*\n"
+                "  .*\n"
+                "  .*\n"
+                "  .*\n"
+                "  %.+ = and i1 %.+, %.+\n"
+                "  br i1 %.+, label %10, label %11\n"
+                "\n"
+                "10:.*\n"
+                "  .*\n"
+                "  br label %11\n"
+                "\n"
+                "11:.*\n"
+                "  br label %12\n"
+                "\n"
+                "12:");
+            REQUIRE(std::regex_search(module_string, m, if_else_if));
+        }
+    }
+
+    GIVEN("Function with if, else if anf else statements") {
+        std::string nmodl_text = R"(
+            FUNCTION bar(x) {
+                LOCAL s
+                if (x <= 0) {
+                    s = 0
+                } else if (0 < x && x <= 1) {
+                    s = 1
+                } else {
+                    s = 100
+                }
+                bar = s
+            }
+        )";
+
+        THEN("correct LLVM instructions are produced") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            std::regex if_else_if_else(
+                "br i1 %2, label %3, label %4\n"
+                "\n"
+                "3:.*\n"
+                "  .*\n"
+                "  br label %13\n"
+                "\n"
+                "4:.*\n"
+                "  .*\n"
+                "  .*\n"
+                "  .*\n"
+                "  .*\n"
+                "  %9 = and i1 %.+, %.+\n"
+                "  br i1 %9, label %10, label %11\n"
+                "\n"
+                "10:.*\n"
+                "  .*\n"
+                "  br label %12\n"
+                "\n"
+                "11:.*\n"
+                "  .*\n"
+                "  br label %12\n"
+                "\n"
+                "12:.*\n"
+                "  br label %13\n"
+                "\n"
+                "13:");
+            REQUIRE(std::regex_search(module_string, m, if_else_if_else));
+        }
+    }
+}
+
 //=============================================================================
 // FunctionBlock
 //=============================================================================

From 05b721f1f37a58553af6ec0bd637ad941c72bd6a Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 26 Jan 2021 09:27:31 +0300
Subject: [PATCH 020/105] Added error handling for values not in scope (#502)

Added error handling when a non-scope value is looked up. Before, such a lookup would yield a nullptr, therefore leading to a segmentation fault. This PR adds a lookup function that wraps around value symbol lookup, and throws an error with a message if nullptr is returned.
---
 src/codegen/llvm/codegen_llvm_visitor.cpp | 18 +++++++++++-------
 src/codegen/llvm/codegen_llvm_visitor.hpp |  6 ++++++
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index bde36f3dd4..86619b899e 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -32,10 +32,7 @@ static bool is_supported_statement(const ast::Statement& statement) {
 }
 
 bool CodegenLLVMVisitor::check_array_bounds(const ast::IndexedName& node, unsigned index) {
-    llvm::Type* array_type = current_func->getValueSymbolTable()
-                                 ->lookup(node.get_node_name())
-                                 ->getType()
-                                 ->getPointerElementType();
+    llvm::Type* array_type = lookup(node.get_node_name())->getType()->getPointerElementType();
     unsigned length = array_type->getArrayNumElements();
     return 0 <= index && index < length;
 }
@@ -46,7 +43,7 @@ llvm::Value* CodegenLLVMVisitor::create_gep(const std::string& name, unsigned in
     indices.push_back(llvm::ConstantInt::get(index_type, 0));
     indices.push_back(llvm::ConstantInt::get(index_type, index));
 
-    return builder.CreateInBoundsGEP(current_func->getValueSymbolTable()->lookup(name), indices);
+    return builder.CreateInBoundsGEP(lookup(name), indices);
 }
 
 llvm::Value* CodegenLLVMVisitor::codegen_indexed_name(const ast::IndexedName& node) {
@@ -177,6 +174,13 @@ void CodegenLLVMVisitor::emit_procedure_or_function_declaration(const ast::Codeg
                            *module);
 }
 
+llvm::Value* CodegenLLVMVisitor::lookup(const std::string& name) {
+    auto val = current_func->getValueSymbolTable()->lookup(name);
+    if (!val)
+        throw std::runtime_error("Error: variable " + name + " is not in scope\n");
+    return val;
+}
+
 llvm::Value* CodegenLLVMVisitor::visit_arithmetic_bin_op(llvm::Value* lhs,
                                                          llvm::Value* rhs,
                                                          unsigned op) {
@@ -213,7 +217,7 @@ void CodegenLLVMVisitor::visit_assign_op(const ast::BinaryExpression& node, llvm
 
     const auto& identifier = var->get_name();
     if (identifier->is_name()) {
-        llvm::Value* alloca = current_func->getValueSymbolTable()->lookup(var->get_node_name());
+        llvm::Value* alloca = lookup(var->get_node_name());
         builder.CreateStore(rhs, alloca);
     } else if (identifier->is_indexed_name()) {
         auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
@@ -562,7 +566,7 @@ void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
 
     llvm::Value* ptr;
     if (identifier->is_name())
-        ptr = current_func->getValueSymbolTable()->lookup(node.get_node_name());
+        ptr = lookup(node.get_node_name());
 
     if (identifier->is_indexed_name()) {
         auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 28129b2fb8..82c0c038ca 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -180,6 +180,12 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
         return std::move(module);
     }
 
+    /**
+     * Lookup the given name in the current function's symbol table
+     * \return LLVM value
+     */
+    llvm::Value* lookup(const std::string& name);
+
     /**
      * Visit nmodl arithmetic binary operator
      * \param lhs LLVM value of evaluated lhs expression

From 5077c68c9386a94eab21e053e453cf2662bfd5e9 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 26 Jan 2021 11:19:06 +0300
Subject: [PATCH 021/105] Added support for WHILE statement (#501)

Added support for WHILE statement code generation. Corresponding tests for IR generation and execution were also added.

Additional visitor for StatementBlock was added to reduce code duplication.

fixes #500
---
 src/codegen/llvm/codegen_llvm_visitor.cpp    | 59 ++++++++++++++------
 src/codegen/llvm/codegen_llvm_visitor.hpp    |  2 +
 test/unit/codegen/codegen_llvm_execution.cpp | 21 +++++++
 test/unit/codegen/codegen_llvm_ir.cpp        | 44 +++++++++++++++
 4 files changed, 108 insertions(+), 18 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 86619b899e..831c43317a 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -28,7 +28,8 @@ namespace codegen {
 
 static bool is_supported_statement(const ast::Statement& statement) {
     return statement.is_codegen_var_list_statement() || statement.is_expression_statement() ||
-           statement.is_codegen_return_statement() || statement.is_if_statement();
+           statement.is_codegen_return_statement() || statement.is_if_statement() ||
+           statement.is_while_statement();
 }
 
 bool CodegenLLVMVisitor::check_array_bounds(const ast::IndexedName& node, unsigned index) {
@@ -314,6 +315,14 @@ void CodegenLLVMVisitor::visit_binary_expression(const ast::BinaryExpression& no
     values.push_back(result);
 }
 
+void CodegenLLVMVisitor::visit_statement_block(const ast::StatementBlock& node) {
+    const auto& statements = node.get_statements();
+    for (const auto& statement: statements) {
+        if (is_supported_statement(*statement))
+            statement->accept(*this);
+    }
+}
+
 void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) {
     const auto& constant = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*context),
                                                   node.get_value());
@@ -350,11 +359,7 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
     }
 
     // Process function or procedure body. The return statement is handled in a separate visitor.
-    const auto& statements = block->get_statements();
-    for (const auto& statement: statements) {
-        if (is_supported_statement(*statement))
-            statement->accept(*this);
-    }
+    block->accept(*this);
 
     // If function has a void return type, add a terminator not handled by CodegenReturnVar.
     if (node.is_void())
@@ -448,10 +453,7 @@ void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
 
     // Process the true block.
     builder.SetInsertPoint(true_block);
-    for (const auto& statement: node.get_statement_block()->get_statements()) {
-        if (is_supported_statement(*statement))
-            statement->accept(*this);
-    }
+    node.get_statement_block()->accept(*this);
     builder.CreateBr(merge_block);
 
     // Save the merge block and proceed with codegen for `else if` statements.
@@ -479,10 +481,7 @@ void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
 
         // Process true block.
         builder.SetInsertPoint(true_block);
-        for (const auto& statement: else_if->get_statement_block()->get_statements()) {
-            if (is_supported_statement(*statement))
-                statement->accept(*this);
-        }
+        else_if->get_statement_block()->accept(*this);
         builder.CreateBr(merge_block);
         curr_block = else_block;
     }
@@ -493,10 +492,7 @@ void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
     if (elses) {
         else_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block);
         builder.SetInsertPoint(else_block);
-        for (const auto& statement: elses->get_statement_block()->get_statements()) {
-            if (is_supported_statement(*statement))
-                statement->accept(*this);
-        }
+        elses->get_statement_block()->accept(*this);
         builder.CreateBr(merge_block);
     } else {
         else_block = merge_block;
@@ -578,5 +574,32 @@ void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
     values.push_back(var);
 }
 
+void CodegenLLVMVisitor::visit_while_statement(const ast::WhileStatement& node) {
+    // Get the current and the next blocks within the function.
+    llvm::BasicBlock* curr_block = builder.GetInsertBlock();
+    llvm::BasicBlock* next = curr_block->getNextNode();
+    llvm::Function* func = curr_block->getParent();
+
+    // Add a header and the body blocks.
+    llvm::BasicBlock* header = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next);
+    llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next);
+    llvm::BasicBlock* exit = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next);
+
+    builder.CreateBr(header);
+    builder.SetInsertPoint(header);
+
+    // Generate code for condition and create branch to the body block.
+    node.get_condition()->accept(*this);
+    llvm::Value* condition = values.back();
+    values.pop_back();
+    builder.CreateCondBr(condition, body, exit);
+
+    builder.SetInsertPoint(body);
+    node.get_statement_block()->accept(*this);
+    builder.CreateBr(header);
+
+    builder.SetInsertPoint(exit);
+}
+
 }  // namespace codegen
 }  // namespace nmodl
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 82c0c038ca..3003a119b5 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -229,6 +229,7 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     // Visitors
     void visit_binary_expression(const ast::BinaryExpression& node) override;
     void visit_boolean(const ast::Boolean& node) override;
+    void visit_statement_block(const ast::StatementBlock& node) override;
     void visit_codegen_function(const ast::CodegenFunction& node) override;
     void visit_codegen_return_statement(const ast::CodegenReturnStatement& node) override;
     void visit_codegen_var_list_statement(const ast::CodegenVarListStatement& node) override;
@@ -241,6 +242,7 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void visit_program(const ast::Program& node) override;
     void visit_unary_expression(const ast::UnaryExpression& node) override;
     void visit_var_name(const ast::VarName& node) override;
+    void visit_while_statement(const ast::WhileStatement& node) override;
 
     // \todo: move this to debug mode (e.g. -v option or --dump-ir)
     std::string print_module() const {
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index 34311bf2c3..90e8fb3cc2 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -59,6 +59,23 @@ SCENARIO("Arithmetic expression", "[llvm][runner]") {
             }
 
             PROCEDURE foo() {}
+
+            FUNCTION loop() {
+                LOCAL i, j, sum, result
+                result = 0
+                j = 0
+                WHILE (j < 2) {
+                    i = 0
+                    sum = 0
+                    WHILE (i < 10) {
+                        sum = sum + i
+                        i = i + 1
+                    }
+                    j = j + 1
+                    result = result + sum
+                }
+                loop = result
+            }
         )";
 
 
@@ -86,6 +103,9 @@ SCENARIO("Arithmetic expression", "[llvm][runner]") {
 
             auto function_call_result = runner.run<double>("function_call");
             REQUIRE(fabs(function_call_result - 1.0) < EPSILON);
+
+            auto loop_result = runner.run<double>("loop");
+            REQUIRE(fabs(loop_result - 90.0) < EPSILON);
         }
     }
 }
@@ -151,6 +171,7 @@ SCENARIO("Optimised arithmetic expression", "[llvm][runner]") {
             }
 
             PROCEDURE foo() {}
+
         )";
 
 
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 292256193c..d16b02b2f5 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -690,6 +690,50 @@ SCENARIO("Unary expression", "[visitor][llvm]") {
     }
 }
 
+//=============================================================================
+// WhileStatement
+//=============================================================================
+
+SCENARIO("While", "[visitor][llvm]") {
+    GIVEN("Procedure with a simple while loop") {
+        std::string nmodl_text = R"(
+            FUNCTION loop() {
+                LOCAL i
+                i = 0
+                WHILE (i < 10) {
+                    i = i + 1
+                }
+                loop = 0
+            }
+        )";
+
+        THEN("correct loop is created") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            std::regex loop(
+                "  br label %1\n"
+                "\n"
+                "1:.*\n"
+                "  %2 = load double, double\\* %i.*\n"
+                "  %3 = fcmp olt double %2, 1\\.000000e\\+01\n"
+                "  br i1 %3, label %4, label %7\n"
+                "\n"
+                "4:.*\n"
+                "  %5 = load double, double\\* %i.*\n"
+                "  %6 = fadd double %5, 1\\.000000e\\+00\n"
+                "  store double %6, double\\* %i.*\n"
+                "  br label %1\n"
+                "\n"
+                "7:.*\n"
+                "  store double 0\\.000000e\\+00, double\\* %ret_loop.*\n");
+            // Check that 3 blocks are created: header, body and exit blocks. Also, there must be
+            // a backedge from the body to the header.
+            REQUIRE(std::regex_search(module_string, m, loop));
+        }
+    }
+}
+
 //=============================================================================
 // Optimization : dead code removal
 //=============================================================================

From 2223d00336fcb3fd8fbc44ed7bbf00ef6e13e517 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Mon, 1 Feb 2021 22:01:33 +0100
Subject: [PATCH 022/105] Create mechanism instance struct in LLVM IR (#507)

* Moved info related function to codegen_info
  - Moved get_float_variables, codegen_int_variables,
     codegen_global_variables, codegen_shadow_variables
     into CodegenHelper
  - Move small utility functions from CodegenCVisitor to codeged_utils
* Add proper variables to the mech_Instance
* Adding LLVMStructBlock
* Added test and visitor
* Fix llvm codegen tests with x[0-9].*
* Fixes after rebasing llvm branch on master (15.9.2022)
---
 CMakeLists.txt                                |   2 +-
 src/codegen/codegen_c_visitor.cpp             | 298 ++----------------
 src/codegen/codegen_c_visitor.hpp             | 123 --------
 src/codegen/codegen_helper_visitor.cpp        |   5 +
 src/codegen/codegen_helper_visitor.hpp        |  10 +
 src/codegen/codegen_info.cpp                  | 206 ++++++++++++
 src/codegen/codegen_info.hpp                  | 141 +++++++++
 src/codegen/codegen_ispc_visitor.cpp          |  18 +-
 src/codegen/llvm/CMakeLists.txt               |   9 +-
 .../llvm/codegen_llvm_helper_visitor.cpp      |  41 ++-
 .../llvm/codegen_llvm_helper_visitor.hpp      |   3 +
 src/codegen/llvm/codegen_llvm_visitor.cpp     |  17 +
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  10 +
 src/language/code_generator.cmake             |   1 +
 src/language/nmodl.yaml                       |  12 +
 src/main.cpp                                  |   6 +-
 test/unit/CMakeLists.txt                      |   2 +
 test/unit/codegen/codegen_llvm_execution.cpp  |   2 +-
 test/unit/codegen/codegen_llvm_ir.cpp         |  48 ++-
 19 files changed, 528 insertions(+), 426 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bd32ebcf0b..a98eee7cfc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -180,7 +180,7 @@ cpp_cc_find_python_module(yaml 3.12 REQUIRED)
 # Find LLVM dependencies
 # =============================================================================
 if(NMODL_ENABLE_LLVM)
-  include(LLVMHelper)
+  include(cmake/LLVMHelper.cmake)
   include_directories(${LLVM_INCLUDE_DIRS})
   add_definitions(-DNMODL_LLVM_BACKEND)
 endif()
diff --git a/src/codegen/codegen_c_visitor.cpp b/src/codegen/codegen_c_visitor.cpp
index 4e800013a4..af464f3957 100644
--- a/src/codegen/codegen_c_visitor.cpp
+++ b/src/codegen/codegen_c_visitor.cpp
@@ -353,49 +353,6 @@ bool CodegenCVisitor::statement_to_skip(const Statement& node) {
 }
 
 
-bool CodegenCVisitor::net_send_buffer_required() const noexcept {
-    if (net_receive_required() && !info.artificial_cell) {
-        if (info.net_event_used || info.net_send_used || info.is_watch_used()) {
-            return true;
-        }
-    }
-    return false;
-}
-
-
-bool CodegenCVisitor::net_receive_buffering_required() const noexcept {
-    return info.point_process && !info.artificial_cell && info.net_receive_node != nullptr;
-}
-
-
-bool CodegenCVisitor::nrn_state_required() const noexcept {
-    if (info.artificial_cell) {
-        return false;
-    }
-    return info.nrn_state_block != nullptr || breakpoint_exist();
-}
-
-
-bool CodegenCVisitor::nrn_cur_required() const noexcept {
-    return info.breakpoint_node != nullptr && !info.currents.empty();
-}
-
-
-bool CodegenCVisitor::net_receive_exist() const noexcept {
-    return info.net_receive_node != nullptr;
-}
-
-
-bool CodegenCVisitor::breakpoint_exist() const noexcept {
-    return info.breakpoint_node != nullptr;
-}
-
-
-bool CodegenCVisitor::net_receive_required() const noexcept {
-    return net_receive_exist();
-}
-
-
 /**
  * \details When floating point data type is not default (i.e. double) then we
  * have to copy old array to new type (for range variables).
@@ -420,7 +377,7 @@ bool CodegenCVisitor::state_variable(const std::string& name) const {
 
 int CodegenCVisitor::position_of_float_var(const std::string& name) const {
     int index = 0;
-    for (const auto& var: codegen_float_variables) {
+    for (const auto& var: info.codegen_float_variables) {
         if (var->get_name() == name) {
             return index;
         }
@@ -432,7 +389,7 @@ int CodegenCVisitor::position_of_float_var(const std::string& name) const {
 
 int CodegenCVisitor::position_of_int_var(const std::string& name) const {
     int index = 0;
-    for (const auto& var: codegen_int_variables) {
+    for (const auto& var: info.codegen_int_variables) {
         if (var.symbol->get_name() == name) {
             return index;
         }
@@ -557,11 +514,11 @@ int CodegenCVisitor::float_variables_size() const {
         float_size++;
     }
     /// for g_unused variable
-    if (breakpoint_exist()) {
+    if (info.breakpoint_exist()) {
         float_size++;
     }
     /// for tsave variable
-    if (net_receive_exist()) {
+    if (info.net_receive_exist()) {
         float_size++;
     }
     return float_size;
@@ -826,198 +783,6 @@ void CodegenCVisitor::update_index_semantics() {
 }
 
 
-std::vector<SymbolType> CodegenCVisitor::get_float_variables() {
-    // sort with definition order
-    auto comparator = [](const SymbolType& first, const SymbolType& second) -> bool {
-        return first->get_definition_order() < second->get_definition_order();
-    };
-
-    auto assigned = info.assigned_vars;
-    auto states = info.state_vars;
-
-    // each state variable has corresponding Dstate variable
-    for (auto& state: states) {
-        auto name = "D" + state->get_name();
-        auto symbol = make_symbol(name);
-        if (state->is_array()) {
-            symbol->set_as_array(state->get_length());
-        }
-        symbol->set_definition_order(state->get_definition_order());
-        assigned.push_back(symbol);
-    }
-    std::sort(assigned.begin(), assigned.end(), comparator);
-
-    auto variables = info.range_parameter_vars;
-    variables.insert(variables.end(),
-                     info.range_assigned_vars.begin(),
-                     info.range_assigned_vars.end());
-    variables.insert(variables.end(), info.range_state_vars.begin(), info.range_state_vars.end());
-    variables.insert(variables.end(), assigned.begin(), assigned.end());
-
-    if (info.vectorize) {
-        variables.push_back(make_symbol(naming::VOLTAGE_UNUSED_VARIABLE));
-    }
-
-    if (breakpoint_exist()) {
-        std::string name = info.vectorize ? naming::CONDUCTANCE_UNUSED_VARIABLE
-                                          : naming::CONDUCTANCE_VARIABLE;
-
-        // make sure conductance variable like `g` is not already defined
-        if (auto r = std::find_if(variables.cbegin(),
-                                  variables.cend(),
-                                  [&](const auto& s) { return name == s->get_name(); });
-            r == variables.cend()) {
-            variables.push_back(make_symbol(name));
-        }
-    }
-
-    if (net_receive_exist()) {
-        variables.push_back(make_symbol(naming::T_SAVE_VARIABLE));
-    }
-    return variables;
-}
-
-
-/**
- * IndexVariableInfo has following constructor arguments:
- *      - symbol
- *      - is_vdata   (false)
- *      - is_index   (false
- *      - is_integer (false)
- *
- * Which variables are constant qualified?
- *
- *  - node area is read only
- *  - read ion variables are read only
- *  - style_ionname is index / offset
- */
-// NOLINTNEXTLINE(readability-function-cognitive-complexity)
-std::vector<IndexVariableInfo> CodegenCVisitor::get_int_variables() {
-    std::vector<IndexVariableInfo> variables;
-    if (info.point_process) {
-        variables.emplace_back(make_symbol(naming::NODE_AREA_VARIABLE));
-        variables.back().is_constant = true;
-        /// note that this variable is not printed in neuron implementation
-        if (info.artificial_cell) {
-            variables.emplace_back(make_symbol(naming::POINT_PROCESS_VARIABLE), true);
-        } else {
-            variables.emplace_back(make_symbol(naming::POINT_PROCESS_VARIABLE), false, false, true);
-            variables.back().is_constant = true;
-        }
-    }
-
-    for (const auto& ion: info.ions) {
-        bool need_style = false;
-        std::unordered_map<std::string, int> ion_vars;  // used to keep track of the variables to
-                                                        // not have doubles between read/write. Same
-                                                        // name variables are allowed
-        for (const auto& var: ion.reads) {
-            const std::string name = naming::ION_VARNAME_PREFIX + var;
-            variables.emplace_back(make_symbol(name));
-            variables.back().is_constant = true;
-            ion_vars[name] = static_cast<int>(variables.size() - 1);
-        }
-
-        /// symbol for di_ion_dv var
-        std::shared_ptr<symtab::Symbol> ion_di_dv_var = nullptr;
-
-        for (const auto& var: ion.writes) {
-            const std::string name = naming::ION_VARNAME_PREFIX + var;
-
-            const auto ion_vars_it = ion_vars.find(name);
-            if (ion_vars_it != ion_vars.end()) {
-                variables[ion_vars_it->second].is_constant = false;
-            } else {
-                variables.emplace_back(make_symbol(naming::ION_VARNAME_PREFIX + var));
-            }
-            if (ion.is_ionic_current(var)) {
-                ion_di_dv_var = make_symbol(std::string(naming::ION_VARNAME_PREFIX) + "di" +
-                                            ion.name + "dv");
-            }
-            if (ion.is_intra_cell_conc(var) || ion.is_extra_cell_conc(var)) {
-                need_style = true;
-            }
-        }
-
-        /// insert after read/write variables but before style ion variable
-        if (ion_di_dv_var != nullptr) {
-            variables.emplace_back(ion_di_dv_var);
-        }
-
-        if (need_style) {
-            variables.emplace_back(make_symbol("style_" + ion.name), false, true);
-            variables.back().is_constant = true;
-        }
-    }
-
-    for (const auto& var: info.pointer_variables) {
-        auto name = var->get_name();
-        if (var->has_any_property(NmodlType::pointer_var)) {
-            variables.emplace_back(make_symbol(name));
-        } else {
-            variables.emplace_back(make_symbol(name), true);
-        }
-    }
-
-    if (info.diam_used) {
-        variables.emplace_back(make_symbol(naming::DIAM_VARIABLE));
-    }
-
-    if (info.area_used) {
-        variables.emplace_back(make_symbol(naming::AREA_VARIABLE));
-    }
-
-    // for non-artificial cell, when net_receive buffering is enabled
-    // then tqitem is an offset
-    if (info.net_send_used) {
-        if (info.artificial_cell) {
-            variables.emplace_back(make_symbol(naming::TQITEM_VARIABLE), true);
-        } else {
-            variables.emplace_back(make_symbol(naming::TQITEM_VARIABLE), false, false, true);
-            variables.back().is_constant = true;
-        }
-        info.tqitem_index = static_cast<int>(variables.size() - 1);
-    }
-
-    /**
-     * \note Variables for watch statements : there is one extra variable
-     * used in coreneuron compared to actual watch statements for compatibility
-     * with neuron (which uses one extra Datum variable)
-     */
-    if (!info.watch_statements.empty()) {
-        for (int i = 0; i < info.watch_statements.size() + 1; i++) {
-            variables.emplace_back(make_symbol(fmt::format("watch{}", i)), false, false, true);
-        }
-    }
-    return variables;
-}
-
-
-/**
- * \details When we enable fine level parallelism at channel level, we have do updates
- * to ion variables in atomic way. As cpus don't have atomic instructions in
- * simd loop, we have to use shadow vectors for every ion variables. Here
- * we return list of all such variables.
- *
- * \todo If conductances are specified, we don't need all below variables
- */
-std::vector<SymbolType> CodegenCVisitor::get_shadow_variables() {
-    std::vector<SymbolType> variables;
-    for (const auto& ion: info.ions) {
-        for (const auto& var: ion.writes) {
-            variables.push_back({make_symbol(shadow_varname(naming::ION_VARNAME_PREFIX + var))});
-            if (ion.is_ionic_current(var)) {
-                variables.push_back({make_symbol(shadow_varname(
-                    std::string(naming::ION_VARNAME_PREFIX) + "di" + ion.name + "dv"))});
-            }
-        }
-    }
-    variables.push_back({make_symbol("ml_rhs")});
-    variables.push_back({make_symbol("ml_d")});
-    return variables;
-}
-
-
 /****************************************************************************************/
 /*                      Routines must be overloaded in backend                          */
 /****************************************************************************************/
@@ -2080,8 +1845,8 @@ std::string CodegenCVisitor::process_verbatim_text(std::string const& text) {
 
 
 std::string CodegenCVisitor::register_mechanism_arguments() const {
-    auto nrn_cur = nrn_cur_required() ? method_name(naming::NRN_CUR_METHOD) : "nullptr";
-    auto nrn_state = nrn_state_required() ? method_name(naming::NRN_STATE_METHOD) : "nullptr";
+    auto nrn_cur = info.nrn_cur_required() ? method_name(naming::NRN_CUR_METHOD) : "nullptr";
+    auto nrn_state = info.nrn_state_required() ? method_name(naming::NRN_STATE_METHOD) : "nullptr";
     auto nrn_alloc = method_name(naming::NRN_ALLOC_METHOD);
     auto nrn_init = method_name(naming::NRN_INIT_METHOD);
     auto const nrn_private_constructor = method_name(naming::NRN_PRIVATE_CONSTRUCTOR_METHOD);
@@ -2203,7 +1968,7 @@ void CodegenCVisitor::print_num_variable_getter() {
 
 
 void CodegenCVisitor::print_net_receive_arg_size_getter() {
-    if (!net_receive_exist()) {
+    if (!info.net_receive_exist()) {
         return;
     }
     printer->add_newline(2);
@@ -2398,17 +2163,18 @@ std::string CodegenCVisitor::get_variable_name(const std::string& name, bool use
     // clang-format on
 
     // float variable
-    auto f = std::find_if(codegen_float_variables.begin(),
-                          codegen_float_variables.end(),
+    auto f = std::find_if(info.codegen_float_variables.begin(),
+                          info.codegen_float_variables.end(),
                           symbol_comparator);
-    if (f != codegen_float_variables.end()) {
+    if (f != info.codegen_float_variables.end()) {
         return float_variable_name(*f, use_instance);
     }
 
     // integer variable
-    auto i =
-        std::find_if(codegen_int_variables.begin(), codegen_int_variables.end(), index_comparator);
-    if (i != codegen_int_variables.end()) {
+    auto i = std::find_if(info.codegen_int_variables.begin(),
+                          info.codegen_int_variables.end(),
+                          index_comparator);
+    if (i != info.codegen_int_variables.end()) {
         return int_variable_name(*i, varname, use_instance);
     }
 
@@ -2421,10 +2187,10 @@ std::string CodegenCVisitor::get_variable_name(const std::string& name, bool use
     }
 
     // shadow variable
-    auto s = std::find_if(codegen_shadow_variables.begin(),
-                          codegen_shadow_variables.end(),
+    auto s = std::find_if(info.codegen_shadow_variables.begin(),
+                          info.codegen_shadow_variables.end(),
                           symbol_comparator);
-    if (s != codegen_shadow_variables.end()) {
+    if (s != info.codegen_shadow_variables.end()) {
         return ion_shadow_variable_name(*s);
     }
 
@@ -2989,7 +2755,7 @@ void CodegenCVisitor::print_mechanism_register() {
     if (info.artificial_cell) {
         printer->fmt_line("add_nrn_artcell(mech_type, {});", info.tqitem_index);
     }
-    if (net_receive_buffering_required()) {
+    if (info.net_receive_buffering_required()) {
         printer->fmt_line("hoc_register_net_receive_buffering({}, mech_type);",
                           method_name("net_buf_receive"));
     }
@@ -3109,14 +2875,14 @@ void CodegenCVisitor::print_mechanism_range_var_structure(bool print_initialiser
                           print_initialisers ? fmt::format("{{&coreneuron::{}}}", name)
                                              : std::string{});
     }
-    for (auto& var: codegen_float_variables) {
+    for (auto& var: info.codegen_float_variables) {
         auto name = var->get_name();
         auto type = get_range_var_float_type(var);
         auto qualifier = is_constant_variable(name) ? "const " : "";
         printer->fmt_line(
             "{}{}* {}{}{};", qualifier, type, ptr_type_qualifier(), name, value_initialise);
     }
-    for (auto& var: codegen_int_variables) {
+    for (auto& var: info.codegen_int_variables) {
         auto name = var.symbol->get_name();
         if (var.is_index || var.is_integer) {
             auto qualifier = var.is_constant ? "const " : "";
@@ -3296,9 +3062,9 @@ void CodegenCVisitor::print_instance_variable_setup() {
     for (auto const& [var, type]: info.neuron_global_variables) {
         ptr_members.push_back(var->get_name());
     }
-    ptr_members.reserve(ptr_members.size() + codegen_float_variables.size() +
-                        codegen_int_variables.size());
-    for (auto& var: codegen_float_variables) {
+    ptr_members.reserve(ptr_members.size() + info.codegen_float_variables.size() +
+                        info.codegen_int_variables.size());
+    for (auto& var: info.codegen_float_variables) {
         auto name = var->get_name();
         auto range_var_type = get_range_var_float_type(var);
         if (float_type == range_var_type) {
@@ -3315,7 +3081,7 @@ void CodegenCVisitor::print_instance_variable_setup() {
         id += var->get_length();
     }
 
-    for (auto& var: codegen_int_variables) {
+    for (auto& var: info.codegen_int_variables) {
         auto name = var.symbol->get_name();
         auto const variable = [&var]() {
             if (var.is_index || var.is_integer) {
@@ -3963,7 +3729,7 @@ void CodegenCVisitor::print_net_receive_loop_end() {
 
 
 void CodegenCVisitor::print_net_receive_buffering(bool need_mech_inst) {
-    if (!net_receive_required() || info.artificial_cell) {
+    if (!info.net_receive_required() || info.artificial_cell) {
         return;
     }
     printer->add_newline(2);
@@ -4013,7 +3779,7 @@ void CodegenCVisitor::print_net_send_buffering_grow() {
 }
 
 void CodegenCVisitor::print_net_send_buffering() {
-    if (!net_send_buffer_required()) {
+    if (!info.net_send_buffer_required()) {
         return;
     }
 
@@ -4077,7 +3843,7 @@ void CodegenCVisitor::visit_for_netcon(const ast::ForNetcon& node) {
 }
 
 void CodegenCVisitor::print_net_receive_kernel() {
-    if (!net_receive_required()) {
+    if (!info.net_receive_required()) {
         return;
     }
     codegen = true;
@@ -4140,7 +3906,7 @@ void CodegenCVisitor::print_net_receive_kernel() {
 
 
 void CodegenCVisitor::print_net_receive() {
-    if (!net_receive_required()) {
+    if (!info.net_receive_required()) {
         return;
     }
     codegen = true;
@@ -4298,7 +4064,7 @@ void CodegenCVisitor::visit_solution_expression(const SolutionExpression& node)
 
 
 void CodegenCVisitor::print_nrn_state() {
-    if (!nrn_state_required()) {
+    if (!info.nrn_state_required()) {
         return;
     }
     codegen = true;
@@ -4515,7 +4281,7 @@ void CodegenCVisitor::print_fast_imem_calculation() {
 }
 
 void CodegenCVisitor::print_nrn_cur() {
-    if (!nrn_cur_required()) {
+    if (!info.nrn_cur_required()) {
         return;
     }
 
@@ -4681,10 +4447,6 @@ void CodegenCVisitor::setup(const Program& node) {
         logger->warn("CodegenCVisitor : MOD file uses non-thread safe constructs of NMODL");
     }
 
-    codegen_float_variables = get_float_variables();
-    codegen_int_variables = get_int_variables();
-    codegen_shadow_variables = get_shadow_variables();
-
     update_index_semantics();
     rename_function_arguments();
 }
diff --git a/src/codegen/codegen_c_visitor.hpp b/src/codegen/codegen_c_visitor.hpp
index 0a80e52d01..0d08da907f 100644
--- a/src/codegen/codegen_c_visitor.hpp
+++ b/src/codegen/codegen_c_visitor.hpp
@@ -65,41 +65,6 @@ enum class MemberType {
     thread
 };
 
-
-/**
- * \class IndexVariableInfo
- * \brief Helper to represent information about index/int variables
- *
- */
-struct IndexVariableInfo {
-    /// symbol for the variable
-    const std::shared_ptr<symtab::Symbol> symbol;
-
-    /// if variable reside in vdata field of NrnThread
-    /// typically true for bbcore pointer
-    bool is_vdata = false;
-
-    /// if this is pure index (e.g. style_ion) variables is directly
-    /// index and shouldn't be printed with data/vdata
-    bool is_index = false;
-
-    /// if this is an integer (e.g. tqitem, point_process) variable which
-    /// is printed as array accesses
-    bool is_integer = false;
-
-    /// if the variable is qualified as constant (this is property of IndexVariable)
-    bool is_constant = false;
-
-    IndexVariableInfo(std::shared_ptr<symtab::Symbol> symbol,
-                      bool is_vdata = false,
-                      bool is_index = false,
-                      bool is_integer = false)
-        : symbol(std::move(symbol))
-        , is_vdata(is_vdata)
-        , is_index(is_index)
-        , is_integer(is_integer) {}
-};
-
 /** @} */  // end of codegen_details
 
 
@@ -163,11 +128,6 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
      */
     symtab::SymbolTable* program_symtab = nullptr;
 
-    /**
-     * All float variables for the model
-     */
-    std::vector<SymbolType> codegen_float_variables;
-
     /**
      * All int variables for the model
      */
@@ -364,26 +324,6 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
     }
 
 
-    /**
-     * Constructs a shadow variable name
-     * \param name The name of the variable
-     * \return     The name of the variable prefixed with \c shadow_
-     */
-    std::string shadow_varname(const std::string& name) const {
-        return "shadow_" + name;
-    }
-
-
-    /**
-     * Creates a temporary symbol
-     * \param name The name of the symbol
-     * \return     A symbol based on the given name
-     */
-    SymbolType make_symbol(const std::string& name) const {
-        return std::make_shared<symtab::Symbol>(name, ModToken());
-    }
-
-
     /**
      * Checks if the given variable name belongs to a state variable
      * \param name The variable name
@@ -392,36 +332,6 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
     bool state_variable(const std::string& name) const;
 
 
-    /**
-     * Check if net receive/send buffering kernels required
-     */
-    bool net_receive_buffering_required() const noexcept;
-
-
-    /**
-     * Check if nrn_state function is required
-     */
-    bool nrn_state_required() const noexcept;
-
-
-    /**
-     * Check if nrn_cur function is required
-     */
-    bool nrn_cur_required() const noexcept;
-
-
-    /**
-     * Check if net_receive function is required
-     */
-    bool net_receive_required() const noexcept;
-
-
-    /**
-     * Check if net_send_buffer is required
-     */
-    bool net_send_buffer_required() const noexcept;
-
-
     /**
      * Check if setup_range_variable function is required
      * \return
@@ -429,18 +339,6 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
     bool range_variable_setup_required() const noexcept;
 
 
-    /**
-     * Check if net_receive node exist
-     */
-    bool net_receive_exist() const noexcept;
-
-
-    /**
-     * Check if breakpoint node exist
-     */
-    bool breakpoint_exist() const noexcept;
-
-
     /**
      * Check if given method is defined in this model
      * \param name The name of the method to check
@@ -608,27 +506,6 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
     void update_index_semantics();
 
 
-    /**
-     * Determine all \c float variables required during code generation
-     * \return A \c vector of \c float variables
-     */
-    std::vector<SymbolType> get_float_variables();
-
-
-    /**
-     * Determine all \c int variables required during code generation
-     * \return A \c vector of \c int variables
-     */
-    std::vector<IndexVariableInfo> get_int_variables();
-
-
-    /**
-     * Determine all ion write variables that require shadow vectors during code generation
-     * \return A \c vector of ion variables
-     */
-    std::vector<SymbolType> get_shadow_variables();
-
-
     /**
      * Print the items in a vector as a list
      *
diff --git a/src/codegen/codegen_helper_visitor.cpp b/src/codegen/codegen_helper_visitor.cpp
index 45a0c5c53f..4e81e1f0a9 100644
--- a/src/codegen/codegen_helper_visitor.cpp
+++ b/src/codegen/codegen_helper_visitor.cpp
@@ -24,6 +24,7 @@ using namespace ast;
 using symtab::syminfo::NmodlType;
 using symtab::syminfo::Status;
 
+
 /**
  * How symbols are stored in NEURON? See notes written in markdown file.
  *
@@ -285,6 +286,7 @@ void CodegenHelperVisitor::find_non_range_variables() {
     // clang-format on
 }
 
+
 /**
  * Find range variables i.e. ones that are belong to per instance allocation
  *
@@ -711,6 +713,9 @@ void CodegenHelperVisitor::visit_program(const ast::Program& node) {
     find_non_range_variables();
     find_table_variables();
     find_neuron_global_variables();
+    info.get_int_variables();
+    info.get_shadow_variables();
+    info.get_float_variables();
 }
 
 
diff --git a/src/codegen/codegen_helper_visitor.hpp b/src/codegen/codegen_helper_visitor.hpp
index 614f93732e..611eccb788 100644
--- a/src/codegen/codegen_helper_visitor.hpp
+++ b/src/codegen/codegen_helper_visitor.hpp
@@ -76,6 +76,16 @@ class CodegenHelperVisitor: public visitor::ConstAstVisitor {
     void find_neuron_global_variables();
     static void sort_with_mod2c_symbol_order(std::vector<SymbolType>& symbols);
 
+    /**
+     * Check if breakpoint node exist
+     */
+    bool breakpoint_exist() const noexcept;
+
+    /**
+     * Check if net_receive node exist
+     */
+    bool net_receive_exist() const noexcept;
+
   public:
     CodegenHelperVisitor() = default;
 
diff --git a/src/codegen/codegen_info.cpp b/src/codegen/codegen_info.cpp
index f0a173c0a2..5538519565 100644
--- a/src/codegen/codegen_info.cpp
+++ b/src/codegen/codegen_info.cpp
@@ -20,6 +20,16 @@ using namespace fmt::literals;
 using symtab::syminfo::NmodlType;
 using visitor::VarUsageVisitor;
 
+SymbolType make_symbol(const std::string& name) {
+    return std::make_shared<symtab::Symbol>(name, ModToken());
+}
+
+
+std::string shadow_varname(const std::string& name) {
+    return "shadow_" + name;
+}
+
+
 /// if any ion has write variable
 bool CodegenInfo::ion_has_write_variable() const {
     return std::any_of(ions.begin(), ions.end(), [](auto const& ion) {
@@ -180,5 +190,201 @@ bool CodegenInfo::is_an_instance_variable(const std::string& varname) const {
     return false;
 }
 
+
+/**
+ * IndexVariableInfo has following constructor arguments:
+ *      - symbol
+ *      - is_vdata   (false)
+ *      - is_index   (false
+ *      - is_integer (false)
+ *
+ * Which variables are constant qualified?
+ *
+ *  - node area is read only
+ *  - read ion variables are read only
+ *  - style_ionname is index / offset
+ */
+void CodegenInfo::get_int_variables() {
+    if (point_process) {
+        codegen_int_variables.emplace_back(make_symbol(naming::NODE_AREA_VARIABLE));
+        codegen_int_variables.back().is_constant = true;
+        /// note that this variable is not printed in neuron implementation
+        if (artificial_cell) {
+            codegen_int_variables.emplace_back(make_symbol(naming::POINT_PROCESS_VARIABLE), true);
+        } else {
+            codegen_int_variables.emplace_back(make_symbol(naming::POINT_PROCESS_VARIABLE),
+                                               false,
+                                               false,
+                                               true);
+            codegen_int_variables.back().is_constant = true;
+        }
+    }
+
+    for (const auto& ion: ions) {
+        bool need_style = false;
+        std::unordered_map<std::string, int> ion_vars;  // used to keep track of the variables to
+                                                        // not have doubles between read/write. Same
+                                                        // name variables are allowed
+        for (const auto& var: ion.reads) {
+            const std::string name = "ion_" + var;
+            codegen_int_variables.emplace_back(make_symbol(name));
+            codegen_int_variables.back().is_constant = true;
+            ion_vars[name] = codegen_int_variables.size() - 1;
+        }
+
+        /// symbol for di_ion_dv var
+        std::shared_ptr<symtab::Symbol> ion_di_dv_var = nullptr;
+
+        for (const auto& var: ion.writes) {
+            const std::string name = "ion_" + var;
+
+            const auto ion_vars_it = ion_vars.find(name);
+            if (ion_vars_it != ion_vars.end()) {
+                codegen_int_variables[ion_vars_it->second].is_constant = false;
+            } else {
+                codegen_int_variables.emplace_back(make_symbol("ion_" + var));
+            }
+            if (ion.is_ionic_current(var)) {
+                ion_di_dv_var = make_symbol("ion_di" + ion.name + "dv");
+            }
+            if (ion.is_intra_cell_conc(var) || ion.is_extra_cell_conc(var)) {
+                need_style = true;
+            }
+        }
+
+        /// insert after read/write variables but before style ion variable
+        if (ion_di_dv_var != nullptr) {
+            codegen_int_variables.emplace_back(ion_di_dv_var);
+        }
+
+        if (need_style) {
+            codegen_int_variables.emplace_back(make_symbol("style_" + ion.name), false, true);
+            codegen_int_variables.back().is_constant = true;
+        }
+    }
+
+    for (const auto& var: pointer_variables) {
+        auto name = var->get_name();
+        if (var->has_any_property(NmodlType::pointer_var)) {
+            codegen_int_variables.emplace_back(make_symbol(name));
+        } else {
+            codegen_int_variables.emplace_back(make_symbol(name), true);
+        }
+    }
+
+    if (diam_used) {
+        codegen_int_variables.emplace_back(make_symbol(naming::DIAM_VARIABLE));
+    }
+
+    if (area_used) {
+        codegen_int_variables.emplace_back(make_symbol(naming::AREA_VARIABLE));
+    }
+
+    // for non-artificial cell, when net_receive buffering is enabled
+    // then tqitem is an offset
+    if (net_send_used) {
+        if (artificial_cell) {
+            codegen_int_variables.emplace_back(make_symbol(naming::TQITEM_VARIABLE), true);
+        } else {
+            codegen_int_variables.emplace_back(make_symbol(naming::TQITEM_VARIABLE),
+                                               false,
+                                               false,
+                                               true);
+            codegen_int_variables.back().is_constant = true;
+        }
+        tqitem_index = codegen_int_variables.size() - 1;
+    }
+
+    /**
+     * \note Variables for watch statements : there is one extra variable
+     * used in coreneuron compared to actual watch statements for compatibility
+     * with neuron (which uses one extra Datum variable)
+     */
+    if (!watch_statements.empty()) {
+        for (int i = 0; i < watch_statements.size() + 1; i++) {
+            codegen_int_variables.emplace_back(make_symbol(fmt::format("watch{}",i)),
+                                               false,
+                                               false,
+                                               true);
+        }
+    }
+}
+
+
+/**
+ * \details When we enable fine level parallelism at channel level, we have do updates
+ * to ion variables in atomic way. As cpus don't have atomic instructions in
+ * simd loop, we have to use shadow vectors for every ion variables. Here
+ * we return list of all such variables.
+ *
+ * \todo If conductances are specified, we don't need all below variables
+ */
+void CodegenInfo::get_shadow_variables() {
+    for (const auto& ion: ions) {
+        for (const auto& var: ion.writes) {
+            codegen_shadow_variables.push_back({make_symbol(shadow_varname("ion_" + var))});
+            if (ion.is_ionic_current(var)) {
+                codegen_shadow_variables.push_back(
+                    {make_symbol(shadow_varname("ion_di" + ion.name + "dv"))});
+            }
+        }
+    }
+    codegen_shadow_variables.push_back({make_symbol("ml_rhs")});
+    codegen_shadow_variables.push_back({make_symbol("ml_d")});
+}
+
+
+void CodegenInfo::get_float_variables() {
+    // sort with definition order
+    auto comparator = [](const SymbolType& first, const SymbolType& second) -> bool {
+        return first->get_definition_order() < second->get_definition_order();
+    };
+
+    auto assigned = assigned_vars;
+    auto states = state_vars;
+
+    // each state variable has corresponding Dstate variable
+    for (auto& state: states) {
+        auto name = "D" + state->get_name();
+        auto symbol = make_symbol(name);
+        if (state->is_array()) {
+            symbol->set_as_array(state->get_length());
+        }
+        symbol->set_definition_order(state->get_definition_order());
+        assigned.push_back(symbol);
+    }
+    std::sort(assigned.begin(), assigned.end(), comparator);
+
+    codegen_float_variables = range_parameter_vars;
+    codegen_float_variables.insert(codegen_float_variables.end(),
+                                   range_assigned_vars.begin(),
+                                   range_assigned_vars.end());
+    codegen_float_variables.insert(codegen_float_variables.end(),
+                                   range_state_vars.begin(),
+                                   range_state_vars.end());
+    codegen_float_variables.insert(codegen_float_variables.end(), assigned.begin(), assigned.end());
+
+    if (vectorize) {
+        codegen_float_variables.push_back(make_symbol(naming::VOLTAGE_UNUSED_VARIABLE));
+    }
+
+    if (breakpoint_exist()) {
+        std::string name = vectorize ? naming::CONDUCTANCE_UNUSED_VARIABLE
+                                          : naming::CONDUCTANCE_VARIABLE;
+
+        // make sure conductance variable like `g` is not already defined
+        if (auto r = std::find_if(codegen_float_variables.cbegin(),
+                                  codegen_float_variables.cend(),
+                                  [&](const auto& s) { return name == s->get_name(); });
+            r == codegen_float_variables.cend()) {
+            codegen_float_variables.push_back(make_symbol(name));
+        }
+    }
+
+    if (net_receive_exist()) {
+        codegen_float_variables.push_back(make_symbol(naming::T_SAVE_VARIABLE));
+    }
+}
+
 }  // namespace codegen
 }  // namespace nmodl
diff --git a/src/codegen/codegen_info.hpp b/src/codegen/codegen_info.hpp
index 3ece7ae902..d1278fd4dd 100644
--- a/src/codegen/codegen_info.hpp
+++ b/src/codegen/codegen_info.hpp
@@ -22,6 +22,56 @@
 namespace nmodl {
 namespace codegen {
 
+using SymbolType = std::shared_ptr<symtab::Symbol>;
+
+/**
+ * Creates a temporary symbol
+ * \param name The name of the symbol
+ * \return     A symbol based on the given name
+ */
+SymbolType make_symbol(const std::string& name);
+
+/**
+ * Constructs a shadow variable name
+ * \param name The name of the variable
+ * \return     The name of the variable prefixed with \c shadow_
+ */
+std::string shadow_varname(const std::string& name);
+
+/**
+ * \class IndexVariableInfo
+ * \brief Helper to represent information about index/int variables
+ *
+ */
+struct IndexVariableInfo {
+    /// symbol for the variable
+    const std::shared_ptr<symtab::Symbol> symbol;
+
+    /// if variable reside in vdata field of NrnThread
+    /// typically true for bbcore pointer
+    bool is_vdata = false;
+
+    /// if this is pure index (e.g. style_ion) variables is directly
+    /// index and shouldn't be printed with data/vdata
+    bool is_index = false;
+
+    /// if this is an integer (e.g. tqitem, point_process) variable which
+    /// is printed as array accesses
+    bool is_integer = false;
+
+    /// if the variable is qualified as constant (this is property of IndexVariable)
+    bool is_constant = false;
+
+    IndexVariableInfo(std::shared_ptr<symtab::Symbol> symbol,
+                      bool is_vdata = false,
+                      bool is_index = false,
+                      bool is_integer = false)
+        : symbol(std::move(symbol))
+        , is_vdata(is_vdata)
+        , is_index(is_index)
+        , is_integer(is_integer) {}
+};
+
 /**
  * @addtogroup codegen_details
  * @{
@@ -146,6 +196,9 @@ enum BlockType {
     /// initial block
     Initial,
 
+    /// constructor block
+    Constructor,
+
     /// destructor block
     Destructor,
 
@@ -164,6 +217,9 @@ enum BlockType {
     /// net_receive block
     NetReceive,
 
+    /// before / after block
+    BeforeAfter,
+
     /// fake ending block type for loops on the enums. Keep it at the end
     BlockTypeEnd
 };
@@ -389,6 +445,15 @@ struct CodegenInfo {
     /// new one used in print_ion_types
     std::vector<SymbolType> use_ion_variables;
 
+    /// all int variables for the model
+    std::vector<IndexVariableInfo> codegen_int_variables;
+
+    /// all ion variables that could be possibly written
+    std::vector<SymbolType> codegen_shadow_variables;
+
+    /// all float variables for the model
+    std::vector<SymbolType> codegen_float_variables;
+
     /// this is the order in which they appear in derivative block
     /// this is required while printing them in initlist function
     std::vector<SymbolType> prime_variables_by_order;
@@ -476,6 +541,64 @@ struct CodegenInfo {
     /// true if WatchStatement uses voltage v variable
     bool is_voltage_used_by_watch_statements() const;
 
+    /**
+     * Check if net_send_buffer is required
+     */
+    bool net_send_buffer_required() const noexcept {
+        if (net_receive_required() && !artificial_cell) {
+            if (net_event_used || net_send_used || is_watch_used()) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Check if net receive/send buffering kernels required
+     */
+    bool net_receive_buffering_required() const noexcept {
+        return point_process && !artificial_cell && net_receive_node != nullptr;
+    }
+
+    /**
+     * Check if nrn_state function is required
+     */
+    bool nrn_state_required() const noexcept {
+        if (artificial_cell) {
+            return false;
+        }
+        return nrn_state_block != nullptr || currents.empty();
+    }
+
+    /**
+     * Check if nrn_cur function is required
+     */
+    bool nrn_cur_required() const noexcept {
+        return breakpoint_node != nullptr && !currents.empty();
+    }
+
+    /**
+     * Check if net_receive node exist
+     */
+    bool net_receive_exist() const noexcept {
+        return net_receive_node != nullptr;
+    }
+
+    /**
+     * Check if breakpoint node exist
+     */
+    bool breakpoint_exist() const noexcept {
+        return breakpoint_node != nullptr;
+    }
+
+
+    /**
+     * Check if net_receive function is required
+     */
+    bool net_receive_required() const noexcept {
+        return net_receive_exist();
+    }
+
     /**
      * Checks if the given variable name belongs to a state variable
      * \param name The variable name
@@ -518,6 +641,24 @@ struct CodegenInfo {
 
     /// if we need a call back to wrote_conc in neuron/coreneuron
     bool require_wrote_conc = false;
+
+    /**
+     * Determine all \c int variables required during code generation
+     * \return A \c vector of \c int variables
+     */
+    void get_int_variables();
+
+    /**
+     * Determine all ion write variables that require shadow vectors during code generation
+     * \return A \c vector of ion variables
+     */
+    void get_shadow_variables();
+
+    /**
+     * Determine all \c float variables required during code generation
+     * \return A \c vector of \c float variables
+     */
+    void get_float_variables();
 };
 
 /** @} */  // end of codegen_backends
diff --git a/src/codegen/codegen_ispc_visitor.cpp b/src/codegen/codegen_ispc_visitor.cpp
index fa1621178c..ce7628691c 100644
--- a/src/codegen/codegen_ispc_visitor.cpp
+++ b/src/codegen/codegen_ispc_visitor.cpp
@@ -435,7 +435,7 @@ void CodegenIspcVisitor::print_ion_variable() {
 /****************************************************************************************/
 
 void CodegenIspcVisitor::print_net_receive_buffering_wrapper() {
-    if (!net_receive_required() || info.artificial_cell) {
+    if (!info.net_receive_required() || info.artificial_cell) {
         return;
     }
     printer->add_newline(2);
@@ -509,17 +509,17 @@ void CodegenIspcVisitor::print_backend_compute_routine_decl() {
         printer->fmt_line("extern \"C\" void {}({});", compute_function, get_parameter_str(params));
     }
 
-    if (nrn_cur_required() && !emit_fallback[BlockType::Equation]) {
+    if (info.nrn_cur_required() && !emit_fallback[BlockType::Equation]) {
         compute_function = compute_method_name(BlockType::Equation);
         printer->fmt_line("extern \"C\" void {}({});", compute_function, get_parameter_str(params));
     }
 
-    if (nrn_state_required() && !emit_fallback[BlockType::State]) {
+    if (info.nrn_state_required() && !emit_fallback[BlockType::State]) {
         compute_function = compute_method_name(BlockType::State);
         printer->fmt_line("extern \"C\" void {}({});", compute_function, get_parameter_str(params));
     }
 
-    if (net_receive_required()) {
+    if (info.net_receive_required()) {
         auto net_recv_params = ParamVector();
         net_recv_params.emplace_back("", fmt::format("{}*", instance_struct()), "", "inst");
         net_recv_params.emplace_back("", "NrnThread*", "", "nt");
@@ -540,7 +540,7 @@ bool CodegenIspcVisitor::check_incompatibilities() {
     };
 
     // instance vars
-    if (check_incompatible_var_name<SymbolType>(codegen_float_variables,
+    if (check_incompatible_var_name<SymbolType>(info.codegen_float_variables,
                                                 get_name_from_symbol_type_vector)) {
         return true;
     }
@@ -607,11 +607,11 @@ bool CodegenIspcVisitor::check_incompatibilities() {
                                    visitor::calls_function(*info.net_receive_node, "net_send")));
 
     emit_fallback[BlockType::Equation] = emit_fallback[BlockType::Equation] ||
-                                         (nrn_cur_required() && info.breakpoint_node &&
+                                         (info.nrn_cur_required() && info.breakpoint_node &&
                                           has_incompatible_nodes(*info.breakpoint_node));
 
     emit_fallback[BlockType::State] = emit_fallback[BlockType::State] ||
-                                      (nrn_state_required() && info.nrn_state_block &&
+                                      (info.nrn_state_required() && info.nrn_state_block &&
                                        has_incompatible_nodes(*info.nrn_state_block));
 
 
@@ -668,7 +668,7 @@ void CodegenIspcVisitor::print_block_wrappers_initial_equation_state() {
         print_wrapper_routine(naming::NRN_INIT_METHOD, BlockType::Initial);
     }
 
-    if (nrn_cur_required()) {
+    if (info.nrn_cur_required()) {
         if (emit_fallback[BlockType::Equation]) {
             logger->warn("Falling back to C backend for emitting breakpoint block");
             fallback_codegen.print_nrn_cur();
@@ -677,7 +677,7 @@ void CodegenIspcVisitor::print_block_wrappers_initial_equation_state() {
         }
     }
 
-    if (nrn_state_required()) {
+    if (info.nrn_state_required()) {
         if (emit_fallback[BlockType::State]) {
             logger->warn("Falling back to C backend for emitting state block");
             fallback_codegen.print_nrn_state();
diff --git a/src/codegen/llvm/CMakeLists.txt b/src/codegen/llvm/CMakeLists.txt
index bd54f4143d..8b2f78a6cb 100644
--- a/src/codegen/llvm/CMakeLists.txt
+++ b/src/codegen/llvm/CMakeLists.txt
@@ -14,19 +14,18 @@ set(LLVM_CODEGEN_SOURCE_FILES
 # =============================================================================
 
 include_directories(${LLVM_INCLUDE_DIRS})
-add_library(runner_obj OBJECT ${LLVM_CODEGEN_SOURCE_FILES})
-add_dependencies(runner_obj lexer_obj)
-set_property(TARGET runner_obj PROPERTY POSITION_INDEPENDENT_CODE ON)
-
-add_library(llvm_codegen STATIC $<TARGET_OBJECTS:runner_obj>)
+add_library(llvm_codegen OBJECT ${LLVM_CODEGEN_SOURCE_FILES})
+set_property(TARGET llvm_codegen PROPERTY POSITION_INDEPENDENT_CODE ON)
 
 add_dependencies(llvm_codegen lexer util visitor)
+target_link_libraries(llvm_codegen PRIVATE util)
 
 if(NOT NMODL_AS_SUBPROJECT)
   add_executable(nmodl_llvm_runner main.cpp)
 
   target_link_libraries(
     nmodl_llvm_runner
+    CLI11::CLI11
     llvm_codegen
     codegen
     visitor
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 751fecfc81..769fcf9f01 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -158,6 +158,23 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
     codegen_functions.push_back(function);
 }
 
+std::shared_ptr<ast::InstanceStruct> CodegenLLVMHelperVisitor::create_instance_struct() {
+    ast::CodegenVarVector codegen_vars;
+    /// float variables are standard pointers to float vectors
+    for (auto& float_var: info.codegen_float_variables) {
+        auto name = new ast::Name(new ast::String(float_var->get_name()));
+        auto codegen_var = new ast::CodegenVar(1, name);
+        codegen_vars.emplace_back(codegen_var);
+    }
+    /// int variables are pointers to indexes for other vectors
+    for (auto& int_var: info.codegen_int_variables) {
+        auto name = new ast::Name(new ast::String(int_var.symbol->get_name()));
+        auto codegen_var = new ast::CodegenVar(1, name);
+        codegen_vars.emplace_back(codegen_var);
+    }
+    return std::make_shared<ast::InstanceStruct>(codegen_vars);
+}
+
 static void append_statements_from_block(ast::StatementVector& statements,
                                          const std::shared_ptr<ast::StatementBlock>& block) {
     const auto& block_statements = block->get_statements();
@@ -206,11 +223,11 @@ void CodegenLLVMHelperVisitor::ion_read_statements(BlockType type,
         // ion variable to be read
         std::string& ion_varname = variable_names.second;
         // index for reading ion variable
-        std::string index_varname = "{}_id"_format(varname);
+        std::string index_varname = fmt::format("{}_id", varname);
         // first load the index
-        std::string index_statement = "{} = {}_index[id]"_format(index_varname, ion_varname);
+        std::string index_statement = fmt::format("{} = {}_index[id]", index_varname, ion_varname);
         // now assign the value
-        std::string read_statement = "{} = {}[{}]"_format(varname, ion_varname, index_varname);
+        std::string read_statement = fmt::format("{} = {}[{}]", varname, ion_varname, index_varname);
         // push index definition, index statement and actual read statement
         int_variables.push_back(index_varname);
         index_statements.push_back(visitor::create_statement(index_statement));
@@ -267,11 +284,11 @@ void CodegenLLVMHelperVisitor::ion_write_statements(BlockType type,
     /// create write ion and corresponding index statements
     auto create_write_statements = [&](std::string ion_varname, std::string op, std::string rhs) {
         // index for writing ion variable
-        std::string index_varname = "{}_id"_format(ion_varname);
+        std::string index_varname = fmt::format("{}_id", ion_varname);
         // load index
-        std::string index_statement = "{} = {}_index[id]"_format(index_varname, ion_varname);
+        std::string index_statement = fmt::format("{} = {}_index[id]", index_varname, ion_varname);
         // ion variable to write (with index)
-        std::string ion_to_write = "{}[{}]"_format(ion_varname, index_varname);
+        std::string ion_to_write = fmt::format("{}[{}]", ion_varname, index_varname);
         // push index definition, index statement and actual write statement
         int_variables.push_back(index_varname);
         index_statements.push_back(visitor::create_statement(index_statement));
@@ -294,7 +311,7 @@ void CodegenLLVMHelperVisitor::ion_write_statements(BlockType type,
                     // for synapse type
                     if (info.point_process) {
                         auto area = codegen::naming::NODE_AREA_VARIABLE;
-                        rhs += "*(1.e2/{})"_format(area);
+                        rhs += fmt::format("*(1.e2/{})", area);
                     }
                     create_write_statements(lhs, op, rhs);
                 }
@@ -318,10 +335,10 @@ void CodegenLLVMHelperVisitor::ion_write_statements(BlockType type,
                 index = 2;
             } else {
                 /// \todo Unhandled case also in neuron implementation
-                throw std::logic_error("codegen error for {} ion"_format(ion.name));
+                throw std::logic_error(fmt::format("codegen error for {} ion", ion.name));
             }
-            std::string ion_type_name = "{}_type"_format(ion.name);
-            std::string lhs = "int {}"_format(ion_type_name);
+            std::string ion_type_name = fmt::format("{}_type", ion.name);
+            std::string lhs = fmt::format("int {}", ion_type_name);
             std::string op = "=";
             std::string rhs = ion_type_name;
             create_write_statements(lhs, op, rhs);
@@ -523,7 +540,11 @@ void CodegenLLVMHelperVisitor::visit_program(ast::Program& node) {
     for (auto& fun: codegen_functions) {
         node.emplace_back_node(fun);
     }
+
+    auto llvm_instance_struct = create_instance_struct();
+    node.emplace_back_node(llvm_instance_struct);
 }
 
+
 }  // namespace codegen
 }  // namespace nmodl
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index 0ec3792b9d..5634d39bd8 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -61,6 +61,9 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     /// create new function for FUNCTION or PROCEDURE block
     void create_function_for_node(ast::Block& node);
 
+    /// create new InstanceStruct
+    std::shared_ptr<ast::InstanceStruct> create_instance_struct();
+
   public:
     CodegenLLVMHelperVisitor() = default;
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 831c43317a..1433b5a648 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -90,6 +90,12 @@ llvm::Type* CodegenLLVMVisitor::get_default_fp_type() {
     return llvm::Type::getDoubleTy(*context);
 }
 
+llvm::Type* CodegenLLVMVisitor::get_default_fp_ptr_type() {
+    if (use_single_precision)
+        return llvm::Type::getFloatPtrTy(*context);
+    return llvm::Type::getDoublePtrTy(*context);
+}
+
 void CodegenLLVMVisitor::run_llvm_opt_passes() {
     /// run some common optimisation passes that are commonly suggested
     fpm.add(llvm::createInstructionCombiningPass());
@@ -574,6 +580,17 @@ void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
     values.push_back(var);
 }
 
+void CodegenLLVMVisitor::visit_instance_struct(const ast::InstanceStruct& node) {
+    std::vector<llvm::Type*> members;
+    for (const auto& variable: node.get_codegen_vars()) {
+        members.push_back(get_default_fp_ptr_type());
+    }
+
+    llvm_struct = llvm::StructType::create(*context, mod_filename + "_Instance");
+    llvm_struct->setBody(members);
+    module->getOrInsertGlobal("inst", llvm_struct);
+}
+
 void CodegenLLVMVisitor::visit_while_statement(const ast::WhileStatement& node) {
     // Get the current and the next blocks within the function.
     llvm::BasicBlock* curr_block = builder.GetInsertBlock();
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 3003a119b5..7a5488de43 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -79,6 +79,9 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     // Use 32-bit floating-point type if true. Otherwise, use deafult 64-bit.
     bool use_single_precision;
 
+    // LLVM mechanism struct
+    llvm::StructType* llvm_struct;
+
     /**
      *\brief Run LLVM optimisation passes on generated IR
      *
@@ -147,6 +150,12 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      */
     llvm::Type* get_default_fp_type();
 
+    /**
+     * Returns pointer to 64-bit or 32-bit LLVM floating type
+     * \return     \c LLVM pointer to floating point type according to `use_single_precision` flag
+     */
+    llvm::Type* get_default_fp_ptr_type();
+
     /**
      * Create a function call to an external method
      * \param name external method name
@@ -242,6 +251,7 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void visit_program(const ast::Program& node) override;
     void visit_unary_expression(const ast::UnaryExpression& node) override;
     void visit_var_name(const ast::VarName& node) override;
+    void visit_instance_struct(const ast::InstanceStruct& node) override;
     void visit_while_statement(const ast::WhileStatement& node) override;
 
     // \todo: move this to debug mode (e.g. -v option or --dump-ir)
diff --git a/src/language/code_generator.cmake b/src/language/code_generator.cmake
index acc7dec8b6..4c2db10cc6 100644
--- a/src/language/code_generator.cmake
+++ b/src/language/code_generator.cmake
@@ -117,6 +117,7 @@ set(AST_GENERATED_SOURCES
     ${PROJECT_BINARY_DIR}/src/ast/independent_definition.hpp
     ${PROJECT_BINARY_DIR}/src/ast/indexed_name.hpp
     ${PROJECT_BINARY_DIR}/src/ast/initial_block.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/instance_struct.hpp
     ${PROJECT_BINARY_DIR}/src/ast/integer.hpp
     ${PROJECT_BINARY_DIR}/src/ast/kinetic_block.hpp
     ${PROJECT_BINARY_DIR}/src/ast/lag_statement.hpp
diff --git a/src/language/nmodl.yaml b/src/language/nmodl.yaml
index ef8fbbe49c..2f0bd32bc6 100644
--- a/src/language/nmodl.yaml
+++ b/src/language/nmodl.yaml
@@ -438,6 +438,18 @@
                               is base class and defines common interface for these nodes.
 
                       children:
+                        - InstanceStruct:
+                            nmodl: "INSTANCE_STRUCT "
+                            members:
+                              - codegen_vars:
+                                  brief: "Vector of CodegenVars"
+                                  type: CodegenVar
+                                  vector: true
+                                  add: true
+                                  separator: "\\n    "
+                                  prefix: {value: "{\\n    ", force: true}
+                                  suffix: {value: "\\n}", force: true}
+                            brief: "LLVM IR Struct that holds the mechanism instance's variables"
 
                         - ParamBlock:
                             nmodl: "PARAMETER "
diff --git a/src/main.cpp b/src/main.cpp
index 38f8cbdcb2..32925a037a 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -298,13 +298,13 @@ int main(int argc, const char* argv[]) {
     auto llvm_opt = app.add_subcommand("llvm", "LLVM code generation option")->ignore_case();
     llvm_opt->add_flag("--ir",
         llvm_ir,
-        "Generate LLVM IR ({})"_format(llvm_ir))->ignore_case();
+        fmt::format("Generate LLVM IR ({})", llvm_ir))->ignore_case();
     llvm_opt->add_flag("--opt",
         llvm_opt_passes,
-        "Run LLVM optimisation passes ({})"_format(llvm_opt_passes))->ignore_case();
+        fmt::format("Run LLVM optimisation passes ({})", llvm_opt_passes))->ignore_case();
     llvm_opt->add_flag("--single-precision",
                        llvm_float_type,
-                       "Use single precision floating-point types ({})"_format(llvm_float_type))->ignore_case();
+                       fmt::format("Use single precision floating-point types ({})", llvm_float_type))->ignore_case();
 #endif
     // clang-format on
 
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 88ecd3b75d..18741d41ac 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -112,6 +112,7 @@ if(NMODL_ENABLE_LLVM)
   add_executable(test_llvm_runner visitor/main.cpp codegen/codegen_llvm_execution.cpp)
   target_link_libraries(
     testllvm
+    Catch2::Catch2
     llvm_codegen
     codegen
     visitor
@@ -124,6 +125,7 @@ if(NMODL_ENABLE_LLVM)
     ${LLVM_LIBS_TO_LINK})
   target_link_libraries(
     test_llvm_runner
+    Catch2::Catch2
     llvm_codegen
     codegen
     visitor
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index 90e8fb3cc2..a14f7a226c 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -5,7 +5,7 @@
  * Lesser General Public License. See top-level LICENSE file for details.
  *************************************************************************/
 
-#include <catch/catch.hpp>
+#include <catch2/catch.hpp>
 #include <regex>
 
 #include "ast/program.hpp"
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index d16b02b2f5..34274e031e 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -5,7 +5,7 @@
  * Lesser General Public License. See top-level LICENSE file for details.
  *************************************************************************/
 
-#include <catch/catch.hpp>
+#include <catch2/catch.hpp>
 #include <regex>
 
 #include "ast/program.hpp"
@@ -367,12 +367,12 @@ SCENARIO("Function", "[visitor][llvm]") {
             std::smatch m;
 
             // Check function signature. The return type should be the default double type.
-            std::regex function_signature(R"(define double @foo\(double %x1\) \{)");
+            std::regex function_signature(R"(define double @foo\(double %x[0-9].*\) \{)");
             REQUIRE(std::regex_search(module_string, m, function_signature));
 
             // Check that function arguments are allocated on the local stack.
             std::regex alloca_instr(R"(%x = alloca double)");
-            std::regex store_instr(R"(store double %x1, double\* %x)");
+            std::regex store_instr(R"(store double %x[0-9].*, double\* %x)");
             REQUIRE(std::regex_search(module_string, m, alloca_instr));
             REQUIRE(std::regex_search(module_string, m, store_instr));
 
@@ -638,7 +638,7 @@ SCENARIO("Procedure", "[visitor][llvm]") {
             std::smatch m;
 
             // Check procedure signature.
-            std::regex function_signature(R"(define i32 @with_argument\(double %x1\) \{)");
+            std::regex function_signature(R"(define i32 @with_argument\(double %x[0-9].*\) \{)");
             REQUIRE(std::regex_search(module_string, m, function_signature));
 
             // Check dummy return.
@@ -653,7 +653,7 @@ SCENARIO("Procedure", "[visitor][llvm]") {
 
             // Check that procedure arguments are allocated on the local stack.
             std::regex alloca_instr(R"(%x = alloca double)");
-            std::regex store_instr(R"(store double %x1, double\* %x)");
+            std::regex store_instr(R"(store double %x[0-9].*, double\* %x)");
             REQUIRE(std::regex_search(module_string, m, alloca_instr));
             REQUIRE(std::regex_search(module_string, m, store_instr));
         }
@@ -753,8 +753,44 @@ SCENARIO("Dead code removal", "[visitor][llvm][opt]") {
 
             // Check if the values are optimised out
             std::regex empty_proc(
-                R"(define i32 @add\(double %a1, double %b2\) \{\n(\s)*ret i32 0\n\})");
+                R"(define i32 @add\(double %a[0-9].*, double %b[0-9].*\) \{\n(\s)*ret i32 0\n\})");
             REQUIRE(std::regex_search(module_string, m, empty_proc));
         }
     }
 }
+
+//=============================================================================
+// Create Instance Struct
+//=============================================================================
+
+SCENARIO("Creation of Instance Struct", "[visitor][llvm][instance_struct]") {
+    GIVEN("NEURON block with RANGE variables and IONS") {
+        std::string nmodl_text = R"(
+            NEURON {
+                USEION na READ ena WRITE ina
+                NONSPECIFIC_CURRENT il
+                RANGE minf, hinf
+            }
+
+            STATE {
+                m
+            }
+
+            ASSIGNED {
+                v (mV)
+                celsius (degC)
+                minf
+                hinf
+            }
+        )";
+
+        THEN("create struct with the declared variables") {
+            std::string module_string = run_llvm_visitor(nmodl_text, true);
+            std::smatch m;
+
+            std::regex instance_struct_declaration(
+                R"(%unknown_Instance = type \{ double\*, double\*, double\*, double\*, double\*, double\*, double\*, double\*, double\*, double\* \})");
+            REQUIRE(std::regex_search(module_string, m, instance_struct_declaration));
+        }
+    }
+}

From 9ca602e2edc1e056670aa68a5e77569e58478dce Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Wed, 3 Feb 2021 22:45:41 +0300
Subject: [PATCH 023/105] Printf support in LLVM IR codegen (#510)

- Added support for string function arguments. These are
   converted into global `i8` array values.
- Added support for `printf` function call with variable number
   of arguments.
- Refactored function/procedure call argument processing into
   a separate function.

fixes #510
---
 src/codegen/llvm/codegen_llvm_visitor.cpp | 57 +++++++++++++++++++----
 src/codegen/llvm/codegen_llvm_visitor.hpp | 18 +++++--
 test/unit/codegen/codegen_llvm_ir.cpp     | 36 ++++++++++++++
 3 files changed, 96 insertions(+), 15 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 1433b5a648..3bb3b38dfc 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -116,6 +116,11 @@ void CodegenLLVMVisitor::run_llvm_opt_passes() {
 
 void CodegenLLVMVisitor::create_external_method_call(const std::string& name,
                                                      const ast::ExpressionVector& arguments) {
+    if (name == "printf") {
+        create_printf_call(arguments);
+        return;
+    }
+
     std::vector<llvm::Value*> argument_values;
     std::vector<llvm::Type*> argument_types;
     for (const auto& arg: arguments) {
@@ -145,24 +150,39 @@ void CodegenLLVMVisitor::create_function_call(llvm::Function* func,
                                               const std::string& name,
                                               const ast::ExpressionVector& arguments) {
     // Check that function is called with the expected number of arguments.
-    if (arguments.size() != func->arg_size()) {
+    if (!func->isVarArg() && arguments.size() != func->arg_size()) {
         throw std::runtime_error("Error: Incorrect number of arguments passed");
     }
 
-    // Process each argument and add it to a vector to pass to the function call instruction. Note
-    // that type checks are not needed here as NMODL operates on doubles by default.
+    // Pack function call arguments to vector and create a call instruction.
     std::vector<llvm::Value*> argument_values;
-    for (const auto& arg: arguments) {
-        arg->accept(*this);
-        llvm::Value* value = values.back();
-        values.pop_back();
-        argument_values.push_back(value);
-    }
-
+    argument_values.reserve(arguments.size());
+    pack_function_call_arguments(arguments, argument_values);
     llvm::Value* call = builder.CreateCall(func, argument_values);
     values.push_back(call);
 }
 
+void CodegenLLVMVisitor::create_printf_call(const ast::ExpressionVector& arguments) {
+    // First, create printf declaration or insert it if it does not exit.
+    std::string name = "printf";
+    llvm::Function* printf = module->getFunction(name);
+    if (!printf) {
+        llvm::Type* ptr_type = llvm::Type::getInt8PtrTy(*context);
+        llvm::Type* i32_type = llvm::Type::getInt32Ty(*context);
+        llvm::FunctionType* printf_type =
+            llvm::FunctionType::get(i32_type, ptr_type, /*isVarArg=*/true);
+
+        printf =
+            llvm::Function::Create(printf_type, llvm::Function::ExternalLinkage, name, *module);
+    }
+
+    // Create a call instruction.
+    std::vector<llvm::Value*> argument_values;
+    argument_values.reserve(arguments.size());
+    pack_function_call_arguments(arguments, argument_values);
+    builder.CreateCall(printf, argument_values);
+}
+
 void CodegenLLVMVisitor::emit_procedure_or_function_declaration(const ast::CodegenFunction& node) {
     const auto& name = node.get_node_name();
     const auto& arguments = node.get_arguments();
@@ -188,6 +208,23 @@ llvm::Value* CodegenLLVMVisitor::lookup(const std::string& name) {
     return val;
 }
 
+void CodegenLLVMVisitor::pack_function_call_arguments(const ast::ExpressionVector& arguments,
+                                                      std::vector<llvm::Value*>& arg_values) {
+    for (const auto& arg: arguments) {
+        if (arg->is_string()) {
+            // If the argument is a string, create a global i8* variable with it.
+            auto string_arg = std::dynamic_pointer_cast<ast::String>(arg);
+            llvm::Value* str = builder.CreateGlobalStringPtr(string_arg->get_value());
+            arg_values.push_back(str);
+        } else {
+            arg->accept(*this);
+            llvm::Value* value = values.back();
+            values.pop_back();
+            arg_values.push_back(value);
+        }
+    }
+}
+
 llvm::Value* CodegenLLVMVisitor::visit_arithmetic_bin_op(llvm::Value* lhs,
                                                          llvm::Value* rhs,
                                                          unsigned op) {
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 7a5488de43..9bdbdef7e9 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -173,6 +173,11 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void create_function_call(llvm::Function* func,
                               const std::string& name,
                               const ast::ExpressionVector& arguments);
+    /**
+     * Create a function call to printf function
+     * \param arguments expressions passed as arguments to the printf call
+     */
+    void create_printf_call(const ast::ExpressionVector& arguments);
 
     /**
      * Emit function or procedure declaration in LLVM given the node
@@ -195,6 +200,14 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      */
     llvm::Value* lookup(const std::string& name);
 
+    /**
+     * Fills values vector with processed NMODL function call arguments
+     * \param arguments expression vector
+     * \param arg_values vector of LLVM IR values to fill
+     */
+    void pack_function_call_arguments(const ast::ExpressionVector& arguments,
+                                      std::vector<llvm::Value*>& arg_values);
+
     /**
      * Visit nmodl arithmetic binary operator
      * \param lhs LLVM value of evaluated lhs expression
@@ -229,11 +242,6 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      */
     llvm::Value* visit_comparison_bin_op(llvm::Value* lhs, llvm::Value* rhs, unsigned op);
 
-    /**
-     * Visit nmodl function or procedure
-     * \param node the AST node representing the function or procedure in NMODL
-     */
-    void visit_procedure_or_function(const ast::Block& node);
 
     // Visitors
     void visit_binary_expression(const ast::BinaryExpression& node) override;
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 34274e031e..fbebf63720 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -474,6 +474,42 @@ SCENARIO("Function call", "[visitor][llvm]") {
         }
     }
 
+    GIVEN("A call to printf") {
+        std::string nmodl_text = R"(
+            PROCEDURE bar() {
+                LOCAL i
+                i = 0
+                printf("foo")
+                printf("bar %d", i)
+            }
+        )";
+
+        THEN("printf is declared and global string values are created") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check for global string values.
+            std::regex str1(
+                R"(@[0-9]+ = private unnamed_addr constant \[6 x i8\] c\"\\22foo\\22\\00\")");
+            std::regex str2(
+                R"(@[0-9]+ = private unnamed_addr constant \[9 x i8\] c\"\\22bar %d\\22\\00\")");
+            REQUIRE(std::regex_search(module_string, m, str1));
+            REQUIRE(std::regex_search(module_string, m, str2));
+
+            // Check for printf declaration.
+            std::regex declaration(R"(declare i32 @printf\(i8\*, \.\.\.\))");
+            REQUIRE(std::regex_search(module_string, m, declaration));
+
+            // Check the correct calls are made.
+            std::regex call1(
+                R"(call i32 \(i8\*, \.\.\.\) @printf\(i8\* getelementptr inbounds \(\[6 x i8\], \[6 x i8\]\* @[0-9]+, i32 0, i32 0\)\))");
+            std::regex call2(
+                R"(call i32 \(i8\*, \.\.\.\) @printf\(i8\* getelementptr inbounds \(\[9 x i8\], \[9 x i8\]\* @[0-9]+, i32 0, i32 0\), double %[0-9]+\))");
+            REQUIRE(std::regex_search(module_string, m, call1));
+            REQUIRE(std::regex_search(module_string, m, call2));
+        }
+    }
+
     GIVEN("A call to function with the wrong number of arguments") {
         std::string nmodl_text = R"(
             FUNCTION foo(x, y) {

From b3f6fa2a71b76ad02c9e42811858aefc32e997f5 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Mon, 15 Feb 2021 11:21:58 +0100
Subject: [PATCH 024/105] =?UTF-8?q?Fix=20issue=20error:=20=E2=80=98runtime?=
 =?UTF-8?q?=5Ferror=E2=80=99=20is=20not=20a=20member=20of=20=E2=80=98std?=
 =?UTF-8?q?=E2=80=99=20(#512)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/language/templates/ast/ast_decl.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/language/templates/ast/ast_decl.hpp b/src/language/templates/ast/ast_decl.hpp
index dd03c3d282..82e43048fc 100644
--- a/src/language/templates/ast/ast_decl.hpp
+++ b/src/language/templates/ast/ast_decl.hpp
@@ -14,6 +14,7 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include <stdexcept>
 
 /// \file
 /// \brief Auto generated  AST node types and aliases declaration

From f15e3c58402836005a0947fa4fdf123913ed0950 Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
Date: Fri, 5 Mar 2021 19:56:29 +0100
Subject: [PATCH 025/105] Move code gen specific InstanceStruct node to
 codegen.yaml (#526)

* Move code gen specific InstanceStruct node to codegen.yaml
  - nmodl.yaml file is more for language constructs
  - InstanceStruct is specific for code generation and hence
    move it to codegen.yaml
* Update CI scripts
* fix cmake-format with v==0.6.13
---
 src/language/codegen.yaml | 12 ++++++++++++
 src/language/nmodl.yaml   | 12 ------------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/language/codegen.yaml b/src/language/codegen.yaml
index 81421ecff9..fd00696dd4 100644
--- a/src/language/codegen.yaml
+++ b/src/language/codegen.yaml
@@ -143,6 +143,18 @@
                                   brief: "Body of the function"
                                   type: StatementBlock
                                   getter: {override: true}
+                        - InstanceStruct:
+                            nmodl: "INSTANCE_STRUCT "
+                            members:
+                              - codegen_vars:
+                                  brief: "Vector of CodegenVars"
+                                  type: CodegenVar
+                                  vector: true
+                                  add: true
+                                  separator: "\\n    "
+                                  prefix: {value: "{\\n    ", force: true}
+                                  suffix: {value: "\\n}", force: true}
+                            brief: "LLVM IR Struct that holds the mechanism instance's variables"
                   - WrappedExpression:
                       brief: "Wrap any other expression type"
                       members:
diff --git a/src/language/nmodl.yaml b/src/language/nmodl.yaml
index 2f0bd32bc6..ef8fbbe49c 100644
--- a/src/language/nmodl.yaml
+++ b/src/language/nmodl.yaml
@@ -438,18 +438,6 @@
                               is base class and defines common interface for these nodes.
 
                       children:
-                        - InstanceStruct:
-                            nmodl: "INSTANCE_STRUCT "
-                            members:
-                              - codegen_vars:
-                                  brief: "Vector of CodegenVars"
-                                  type: CodegenVar
-                                  vector: true
-                                  add: true
-                                  separator: "\\n    "
-                                  prefix: {value: "{\\n    ", force: true}
-                                  suffix: {value: "\\n}", force: true}
-                            brief: "LLVM IR Struct that holds the mechanism instance's variables"
 
                         - ParamBlock:
                             nmodl: "PARAMETER "

From f5dc06bf257bc510f43b698fac008eeb7cf9aafd Mon Sep 17 00:00:00 2001
From: Pramod S Kumbhar <pramod.s.kumbhar@gmail.com>
Date: Sat, 27 Feb 2021 13:15:09 +0100
Subject: [PATCH 026/105] * Improvements to codegen helper (Part I)  - instance
 structure now contains all global variables  - instance structure now
 contains index variables for ions  - nrn_state kernel now has all variables
 converted to instance  - InstanceVarHelper added to query variable and it's
 location * Support for codegen variable with type * Add nmodl_to_json helper
 added in main.cpp * Added --vector-width CLI option * Add instance struct
 argument to nrn_state_hh * Add comments as TODOs to support LLVM IR
 generation

Note that this commit and next commit (Part II) are required to
make LLVM IR code generation working. Vector IR generation is
working except indirect indexes. See comment in #531.
---
 src/codegen/codegen_naming.hpp                |  3 +
 .../llvm/codegen_llvm_helper_visitor.cpp      | 78 ++++++++++++++-----
 .../llvm/codegen_llvm_helper_visitor.hpp      | 71 ++++++++++++++++-
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 39 +++++++++-
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  8 ++
 src/language/code_generator.cmake             |  3 +-
 src/language/codegen.yaml                     | 25 ++++--
 src/language/node_info.py                     |  1 +
 src/language/nodes.py                         |  4 +
 .../templates/visitors/nmodl_visitor.cpp      |  7 +-
 src/main.cpp                                  | 68 +++++++++-------
 11 files changed, 246 insertions(+), 61 deletions(-)

diff --git a/src/codegen/codegen_naming.hpp b/src/codegen/codegen_naming.hpp
index 9739285bc4..98d1003734 100644
--- a/src/codegen/codegen_naming.hpp
+++ b/src/codegen/codegen_naming.hpp
@@ -80,6 +80,9 @@ static constexpr char VOLTAGE_UNUSED_VARIABLE[] = "v_unused";
 /// variable t indicating last execution time of net receive block
 static constexpr char T_SAVE_VARIABLE[] = "tsave";
 
+/// global variable second_order
+static constexpr char SECOND_ORDER_VARIABLE[] = "secondorder";
+
 /// shadow rhs variable in neuron thread structure
 static constexpr char NTHREAD_RHS_SHADOW[] = "_shadow_rhs";
 
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 769fcf9f01..43dc496291 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -141,12 +141,12 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
     block->emplace_back_statement(return_statement);
 
     /// prepare function arguments based original node arguments
-    ast::CodegenArgumentVector arguments;
+    ast::CodegenVarWithTypeVector arguments;
     for (const auto& param: node.get_parameters()) {
         /// create new type and name for creating new ast node
         auto type = new ast::CodegenVarType(FLOAT_TYPE);
         auto var = param->get_name()->clone();
-        arguments.emplace_back(new ast::CodegenArgument(type, var));
+        arguments.emplace_back(new ast::CodegenVarWithType(type, 0, var));
     }
 
     /// return type of the function is same as return variable type
@@ -159,19 +159,43 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
 }
 
 std::shared_ptr<ast::InstanceStruct> CodegenLLVMHelperVisitor::create_instance_struct() {
-    ast::CodegenVarVector codegen_vars;
+    ast::CodegenVarWithTypeVector codegen_vars;
+
+    auto add_var_with_type =
+        [&](const std::string& name, const ast::AstNodeType type, int is_pointer) {
+            auto var_name = new ast::Name(new ast::String(name));
+            auto var_type = new ast::CodegenVarType(type);
+            auto codegen_var = new ast::CodegenVarWithType(var_type, is_pointer, var_name);
+            codegen_vars.emplace_back(codegen_var);
+        };
+
     /// float variables are standard pointers to float vectors
     for (auto& float_var: info.codegen_float_variables) {
-        auto name = new ast::Name(new ast::String(float_var->get_name()));
-        auto codegen_var = new ast::CodegenVar(1, name);
-        codegen_vars.emplace_back(codegen_var);
+        add_var_with_type(float_var->get_name(), FLOAT_TYPE, 1);
     }
+
     /// int variables are pointers to indexes for other vectors
     for (auto& int_var: info.codegen_int_variables) {
-        auto name = new ast::Name(new ast::String(int_var.symbol->get_name()));
-        auto codegen_var = new ast::CodegenVar(1, name);
-        codegen_vars.emplace_back(codegen_var);
+        add_var_with_type(int_var.symbol->get_name(), FLOAT_TYPE, 1);
+    }
+
+    // for integer variables, there should be index
+    for (auto& int_var: info.codegen_int_variables) {
+        std::string var_name = int_var.symbol->get_name() + "_index";
+        add_var_with_type(var_name, INTEGER_TYPE, 1);
     }
+
+    // add voltage and node index
+    add_var_with_type("voltage", FLOAT_TYPE, 1);
+    add_var_with_type("node_index", INTEGER_TYPE, 1);
+
+    // add dt, t, celsius
+    add_var_with_type(naming::NTHREAD_T_VARIABLE, FLOAT_TYPE, 0);
+    add_var_with_type(naming::NTHREAD_DT_VARIABLE, FLOAT_TYPE, 0);
+    add_var_with_type(naming::CELSIUS_VARIABLE, FLOAT_TYPE, 0);
+    add_var_with_type(naming::SECOND_ORDER_VARIABLE, INTEGER_TYPE, 0);
+    add_var_with_type(MECH_NODECOUNT_VAR, INTEGER_TYPE, 0);
+
     return std::make_shared<ast::InstanceStruct>(codegen_vars);
 }
 
@@ -362,13 +386,24 @@ void CodegenLLVMHelperVisitor::convert_to_instance_variable(ast::Node& node,
     auto variables = collect_nodes(node, {ast::AstNodeType::VAR_NAME});
     for (auto& v: variables) {
         auto variable = std::dynamic_pointer_cast<ast::VarName>(v);
-        /// if variable is of type instance then convert it to index
-        if (info.is_an_instance_variable(variable->get_node_name())) {
+        auto variable_name = variable->get_node_name();
+
+        /// all instance variables defined in the mod file should be converted to
+        /// indexed variables based on the loop iteration variable
+        if (info.is_an_instance_variable(variable_name)) {
             auto name = variable->get_name()->clone();
             auto index = new ast::Name(new ast::String(index_var));
             auto indexed_name = std::make_shared<ast::IndexedName>(name, index);
             variable->set_name(indexed_name);
         }
+
+        /// instance_var_helper check of instance variables from mod file as well
+        /// as extra variables like ion index variables added for code generation
+        if (instance_var_helper.is_an_instance_variable(variable_name)) {
+            auto name = new ast::Name(new ast::String(MECH_INSTANCE_VAR));
+            auto var = std::make_shared<ast::CodegenInstanceVar>(name, variable->clone());
+            variable->set_name(var);
+        }
     }
 }
 
@@ -438,7 +473,7 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     /// loop constructs : initialization, condition and increment
     const auto& initialization = create_statement_as_expression("id = 0");
     const auto& condition = create_expression("id < node_count");
-    const auto& increment = create_statement_as_expression("id = id + 1");
+    const auto& increment = create_statement_as_expression("id = id + {}"_format(vector_width));
 
     /// loop body : initialization + solve blocks
     ast::StatementVector loop_def_statements;
@@ -496,9 +531,6 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     /// now construct a new code block which will become the body of the loop
     auto loop_block = std::make_shared<ast::StatementBlock>(loop_body);
 
-    /// convert all variables inside loop body to instance variables
-    convert_to_instance_variable(*loop_block, loop_index_var);
-
     /// convert local statement to codegenvar statement
     convert_local_statement(*loop_block);
 
@@ -508,6 +540,9 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
                                                                          increment,
                                                                          loop_block);
 
+    /// convert all variables inside loop body to instance variables
+    convert_to_instance_variable(*for_loop_statement, loop_index_var);
+
     /// loop itself becomes one of the statement in the function
     function_statements.push_back(for_loop_statement);
 
@@ -520,7 +555,12 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     auto return_type = new ast::CodegenVarType(ast::AstNodeType::VOID);
 
     /// \todo : currently there are no arguments
-    ast::CodegenArgumentVector code_arguments;
+    ast::CodegenVarWithTypeVector code_arguments;
+
+    auto instance_var_type = new ast::CodegenVarType(ast::AstNodeType::INSTANCE_STRUCT);
+    auto instance_var_name = new ast::Name(new ast::String("mech"));
+    auto instance_var = new ast::CodegenVarWithType(instance_var_type, 1, instance_var_name);
+    code_arguments.emplace_back(instance_var);
 
     /// finally, create new function
     auto function =
@@ -535,14 +575,16 @@ void CodegenLLVMHelperVisitor::visit_program(ast::Program& node) {
     CodegenHelperVisitor v;
     info = v.analyze(node);
 
+    instance_var_helper.instance = create_instance_struct();
+    node.emplace_back_node(instance_var_helper.instance);
+
     logger->info("Running CodegenLLVMHelperVisitor");
     node.visit_children(*this);
     for (auto& fun: codegen_functions) {
         node.emplace_back_node(fun);
     }
 
-    auto llvm_instance_struct = create_instance_struct();
-    node.emplace_back_node(llvm_instance_struct);
+    std::cout << nmodl::to_nmodl(node);
 }
 
 
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index 5634d39bd8..981372b4d5 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -14,6 +14,7 @@
 
 #include <string>
 
+#include "ast/instance_struct.hpp"
 #include "codegen/codegen_info.hpp"
 #include "symtab/symbol_table.hpp"
 #include "visitors/ast_visitor.hpp"
@@ -21,7 +22,7 @@
 namespace nmodl {
 namespace codegen {
 
-
+using namespace fmt::literals;
 typedef std::vector<std::shared_ptr<ast::CodegenFunction>> CodegenFunctionVector;
 
 /**
@@ -29,6 +30,57 @@ typedef std::vector<std::shared_ptr<ast::CodegenFunction>> CodegenFunctionVector
  * @{
  */
 
+/**
+ * \class InstanceVarHelper
+ * \brief Helper to query instance variables information
+ *
+ * For LLVM IR generation we need to know the variable, it's type and
+ * location in the instance structure. This helper provides convenient
+ * functions to query this information.
+ */
+struct InstanceVarHelper {
+    /// pointer to instance node in the AST
+    std::shared_ptr<ast::InstanceStruct> instance;
+
+    /// find variable with given name and return the iterator
+    ast::CodegenVarWithTypeVector::const_iterator find_variable(
+        const ast::CodegenVarWithTypeVector& vars,
+        const std::string& name) {
+        return find_if(vars.begin(),
+                       vars.end(),
+                       [&](const std::shared_ptr<ast::CodegenVarWithType>& v) {
+                           return v->get_node_name() == name;
+                       });
+    }
+
+    /// check if given variable is instance variable
+    bool is_an_instance_variable(const std::string& name) {
+        const auto& vars = instance->get_codegen_vars();
+        return find_variable(vars, name) != vars.end();
+    }
+
+    /// return codegen variable with a given name
+    const std::shared_ptr<ast::CodegenVarWithType>& get_variable(const std::string& name) {
+        const auto& vars = instance->get_codegen_vars();
+        auto it = find_variable(vars, name);
+        if (it == vars.end()) {
+            throw std::runtime_error("Can not find variable with name {}"_format(name));
+        }
+        return *it;
+    }
+
+    /// return position of the variable in the instance structure
+    int get_variable_index(const std::string& name) {
+        const auto& vars = instance->get_codegen_vars();
+        auto it = find_variable(vars, name);
+        if (it == vars.end()) {
+            throw std::runtime_error("Can not find codegen variable with name {}"_format(name));
+        }
+        return (it - vars.begin());
+    }
+};
+
+
 /**
  * \class CodegenLLVMHelperVisitor
  * \brief Helper visitor for AST information to help code generation backends
@@ -48,16 +100,26 @@ typedef std::vector<std::shared_ptr<ast::CodegenFunction>> CodegenFunctionVector
  * these will be common across all backends.
  */
 class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
+    // explicit vectorisation width
+    int vector_width;
+
     /// newly generated code generation specific functions
     CodegenFunctionVector codegen_functions;
 
     /// ast information for code generation
     codegen::CodegenInfo info;
 
+    /// mechanism data helper
+    InstanceVarHelper instance_var_helper;
+
     /// default integer and float node type
     const ast::AstNodeType INTEGER_TYPE = ast::AstNodeType::INTEGER;
     const ast::AstNodeType FLOAT_TYPE = ast::AstNodeType::DOUBLE;
 
+    /// name of the mechanism instance parameter
+    const std::string MECH_INSTANCE_VAR = "mech";
+    const std::string MECH_NODECOUNT_VAR = "node_count";
+
     /// create new function for FUNCTION or PROCEDURE block
     void create_function_for_node(ast::Block& node);
 
@@ -65,7 +127,12 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     std::shared_ptr<ast::InstanceStruct> create_instance_struct();
 
   public:
-    CodegenLLVMHelperVisitor() = default;
+    CodegenLLVMHelperVisitor(int vector_width)
+        : vector_width(vector_width){};
+
+    const InstanceVarHelper& get_instance_var_helper() {
+        return instance_var_helper;
+    }
 
     /// run visitor and return code generation functions
     CodegenFunctionVector get_codegen_functions(const ast::Program& node);
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 3bb3b38dfc..80bdfd20e3 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -6,7 +6,6 @@
  *************************************************************************/
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
-#include "codegen/llvm/codegen_llvm_helper_visitor.hpp"
 
 #include "ast/all.hpp"
 #include "visitors/rename_visitor.hpp"
@@ -79,6 +78,8 @@ llvm::Type* CodegenLLVMVisitor::get_codegen_var_type(const ast::CodegenVarType&
         return llvm::Type::getInt32Ty(*context);
     case ast::AstNodeType::VOID:
         return llvm::Type::getVoidTy(*context);
+    // TODO :: George/Ioannis : Here we have to also return INSTANCE_STRUCT type
+    //         as it is used as an argument to nrn_state function
     default:
         throw std::runtime_error("Error: expecting a type in CodegenVarType node\n");
     }
@@ -556,8 +557,13 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     //   - convert function and procedure blocks into CodegenFunctions
     //   - gather information about AST. For now, information about functions
     //     and procedures is used only.
-    CodegenLLVMHelperVisitor v;
+    CodegenLLVMHelperVisitor v{vector_width};
     const auto& functions = v.get_codegen_functions(node);
+    instance_var_helper = v.get_instance_var_helper();
+
+    // TODO :: George / Ioannis :: before emitting procedures, we have
+    //         to emmit INSTANCE_STRUCT type as it's used as an argument.
+    //         Currently it's done in node.visit_children which is late.
 
     // For every function, generate its declaration. Thus, we can look up
     // `llvm::Function` in the symbol table in the module.
@@ -603,6 +609,16 @@ void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
     if (!identifier->is_name() && !identifier->is_indexed_name())
         throw std::runtime_error("Error: Unsupported variable type");
 
+    // TODO :: George :: here instance_var_helper can be used to query
+    // variable type and it's index into structure
+    auto name = node.get_node_name();
+
+    auto codegen_var_with_type = instance_var_helper.get_variable(name);
+    auto codegen_var_index = instance_var_helper.get_variable_index(name);
+    // this will be INTEGER or DOUBLE
+    auto var_type = codegen_var_with_type->get_type()->get_type();
+    auto is_pointer = codegen_var_with_type->get_is_pointer();
+
     llvm::Value* ptr;
     if (identifier->is_name())
         ptr = lookup(node.get_node_name());
@@ -620,7 +636,24 @@ void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
 void CodegenLLVMVisitor::visit_instance_struct(const ast::InstanceStruct& node) {
     std::vector<llvm::Type*> members;
     for (const auto& variable: node.get_codegen_vars()) {
-        members.push_back(get_default_fp_ptr_type());
+        // TODO :: Ioannis / George :: we have now double*, int*, double and int
+        //         variables in the instance structure. Each variable is of type
+        //         ast::CodegenVarWithType. So we can query variable type and if
+        //         it's pointer.
+        auto is_pointer = variable->get_is_pointer();
+        auto type = variable->get_type()->get_type();
+
+        // todo : clean up ?
+        if (type == ast::AstNodeType::DOUBLE) {
+            auto llvm_type = is_pointer ? get_default_fp_ptr_type() : get_default_fp_type();
+            members.push_back(llvm_type);
+        } else {
+            if (is_pointer) {
+                members.push_back(llvm::Type::getInt32PtrTy(*context));
+            } else {
+                members.push_back(llvm::Type::getInt32Ty(*context));
+            }
+        }
     }
 
     llvm_struct = llvm::StructType::create(*context, mod_filename + "_Instance");
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 9bdbdef7e9..b20a19bac7 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -18,6 +18,7 @@
 #include <ostream>
 #include <string>
 
+#include "codegen/llvm/codegen_llvm_helper_visitor.hpp"
 #include "symtab/symbol_table.hpp"
 #include "utils/logger.hpp"
 #include "visitors/ast_visitor.hpp"
@@ -56,6 +57,8 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     std::string output_dir;
 
   private:
+    InstanceVarHelper instance_var_helper;
+
     std::unique_ptr<llvm::LLVMContext> context = std::make_unique<llvm::LLVMContext>();
 
     std::unique_ptr<llvm::Module> module = std::make_unique<llvm::Module>(mod_filename, *context);
@@ -79,6 +82,9 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     // Use 32-bit floating-point type if true. Otherwise, use deafult 64-bit.
     bool use_single_precision;
 
+    // explicit vectorisation width
+    int vector_width;
+
     // LLVM mechanism struct
     llvm::StructType* llvm_struct;
 
@@ -100,11 +106,13 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     CodegenLLVMVisitor(const std::string& mod_filename,
                        const std::string& output_dir,
                        bool opt_passes,
+                       int vector_width = 1,
                        bool use_single_precision = false)
         : mod_filename(mod_filename)
         , output_dir(output_dir)
         , opt_passes(opt_passes)
         , use_single_precision(use_single_precision)
+        , vector_width(vector_width)
         , builder(*context)
         , fpm(module.get()) {}
 
diff --git a/src/language/code_generator.cmake b/src/language/code_generator.cmake
index 4c2db10cc6..1667437217 100644
--- a/src/language/code_generator.cmake
+++ b/src/language/code_generator.cmake
@@ -65,15 +65,16 @@ set(AST_GENERATED_SOURCES
     ${PROJECT_BINARY_DIR}/src/ast/block_comment.hpp
     ${PROJECT_BINARY_DIR}/src/ast/boolean.hpp
     ${PROJECT_BINARY_DIR}/src/ast/breakpoint_block.hpp
-    ${PROJECT_BINARY_DIR}/src/ast/codegen_argument.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_atomic_statement.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_for_statement.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_function.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/codegen_instance_var.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_return_statement.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_struct.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_var.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_var_list_statement.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_var_type.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/codegen_var_with_type.hpp
     ${PROJECT_BINARY_DIR}/src/ast/compartment.hpp
     ${PROJECT_BINARY_DIR}/src/ast/conductance_hint.hpp
     ${PROJECT_BINARY_DIR}/src/ast/conserve.hpp
diff --git a/src/language/codegen.yaml b/src/language/codegen.yaml
index fd00696dd4..01593035ac 100644
--- a/src/language/codegen.yaml
+++ b/src/language/codegen.yaml
@@ -49,17 +49,30 @@
                                   brief: "Name of the variable"
                                   type: Identifier
                                   node_name: true
-                        - CodegenArgument:
-                            brief: "Represent argument to a function"
+                        - CodegenVarWithType:
+                            brief: "Represent variable used for code generation"
                             members:
                               - type:
-                                  brief: "Type of the argument"
+                                  brief: "Type of the variable"
                                   type: CodegenVarType
                                   suffix: {value: " "}
+                              - is_pointer:
+                                  brief: "If variable is pointer type"
+                                  type: int
                               - name:
-                                  brief: "Name of the argument"
+                                  brief: "Name of the variable"
                                   type: Identifier
                                   node_name: true
+                        - CodegenInstanceVar:
+                            brief: "Represent instance variable"
+                            members:
+                              - instance_var:
+                                  brief: "Instance variable"
+                                  type: Name
+                                  suffix: {value: "->"}
+                              - member_var:
+                                  brief: "Member variable within instance"
+                                  type: Identifier
                   - Block:
                       children:
                         - NrnStateBlock:
@@ -134,7 +147,7 @@
                                   node_name: true
                               - arguments:
                                   brief: "Vector of the parameters to the function"
-                                  type: CodegenArgument
+                                  type: CodegenVarWithType
                                   vector: true
                                   prefix: {value: "(", force: true}
                                   suffix: {value: ")", force: true}
@@ -148,7 +161,7 @@
                             members:
                               - codegen_vars:
                                   brief: "Vector of CodegenVars"
-                                  type: CodegenVar
+                                  type: CodegenVarWithType
                                   vector: true
                                   add: true
                                   separator: "\\n    "
diff --git a/src/language/node_info.py b/src/language/node_info.py
index b08041f0a4..4a8e5fcc53 100644
--- a/src/language/node_info.py
+++ b/src/language/node_info.py
@@ -167,6 +167,7 @@
 UNIT_BLOCK = "UnitBlock"
 AST_NODETYPE_NODE= "AstNodeType"
 CODEGEN_VAR_TYPE_NODE = "CodegenVarType"
+CODEGEN_VAR_WITH_TYPE_NODE = "CodegenVarWithType"
 
 # name of variable in prime node which represent order of derivative
 ORDER_VAR_NAME = "order"
diff --git a/src/language/nodes.py b/src/language/nodes.py
index d6a804a315..fd8a64f528 100644
--- a/src/language/nodes.py
+++ b/src/language/nodes.py
@@ -144,6 +144,10 @@ def is_ast_nodetype_node(self):
     def is_codegen_var_type_node(self):
         return self.class_name == node_info.CODEGEN_VAR_TYPE_NODE
 
+    @property
+    def is_codegen_var_with_type_node(self):
+        return self.class_name == node_info.CODEGEN_VAR_WITH_TYPE_NODE
+
     @property
     def is_enum_node(self):
         data_type = node_info.DATA_TYPES[self.class_name]
diff --git a/src/language/templates/visitors/nmodl_visitor.cpp b/src/language/templates/visitors/nmodl_visitor.cpp
index 7956fefdd1..c4d43ec755 100644
--- a/src/language/templates/visitors/nmodl_visitor.cpp
+++ b/src/language/templates/visitors/nmodl_visitor.cpp
@@ -115,7 +115,12 @@ void NmodlPrintVisitor::visit_{{ node.class_name|snake_case}}(const {{ node.clas
     {% endif %}
     {% for child in node.children %}
         {% call guard(child.force_prefix, child.force_suffix) -%}
-        {% if child.is_base_type_node %}
+
+        {% if node.is_codegen_var_with_type_node and child.varname == "is_pointer" %}
+             if(node.get_{{ child.varname }}()) {
+                printer->add_element("*");
+             }
+        {% elif child.is_base_type_node %}
             {% if child.is_ast_nodetype_node %}
                printer->add_element(ast::to_string(node.get_{{child.varname}}()));
             {% endif %}
diff --git a/src/main.cpp b/src/main.cpp
index 32925a037a..31ba53c669 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -174,6 +174,9 @@ int main(int argc, const char* argv[]) {
 
     /// run llvm optimisation passes
     bool llvm_opt_passes(false);
+
+    /// llvm vector width;
+    int llvm_vec_width = 1;
 #endif
 
     // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers,readability-magic-numbers)
@@ -305,6 +308,9 @@ int main(int argc, const char* argv[]) {
     llvm_opt->add_flag("--single-precision",
                        llvm_float_type,
                        fmt::format("Use single precision floating-point types ({})", llvm_float_type))->ignore_case();
+    llvm_opt->add_option("--vector-width",
+        llvm_vec_width,
+        fmt::format("LLVM explicit vectorisation width ({})", llvm_vec_width))->ignore_case();
 #endif
     // clang-format on
 
@@ -334,16 +340,24 @@ int main(int argc, const char* argv[]) {
         }
     };
 
+    /// write ast to nmodl
+    const auto ast_to_json = [json_ast](ast::Program& ast, const std::string& filepath) {
+        if (json_ast) {
+            JSONVisitor(filepath).write(ast);
+            logger->info("AST to JSON transformation written to {}", filepath);
+        }
+    };
+
     for (const auto& file: mod_files) {
         logger->info("Processing {}", file);
 
         const auto modfile = utils::remove_extension(utils::base_name(file));
 
         /// create file path for nmodl file
-        auto filepath = [scratch_dir, modfile](const std::string& suffix) {
+        auto filepath = [scratch_dir, modfile](const std::string& suffix, const std::string& ext) {
             static int count = 0;
             return fmt::format(
-                "{}/{}.{}.{}.mod", scratch_dir, modfile, std::to_string(count++), suffix);
+                "{}/{}.{}.{}.{}", scratch_dir, modfile, std::to_string(count++), suffix, ext);
         };
 
         /// driver object creates lexer and parser, just call parser method
@@ -377,7 +391,7 @@ int main(int argc, const char* argv[]) {
         {
             logger->info("Running CVode to cnexp visitor");
             AfterCVodeToCnexpVisitor().visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("after_cvode_to_cnexp"));
+            ast_to_nmodl(*ast, filepath("after_cvode_to_cnexp", "mod"));
         }
 
         /// Rename variables that match ISPC compiler double constants
@@ -385,7 +399,7 @@ int main(int argc, const char* argv[]) {
             logger->info("Running ISPC variables rename visitor");
             IspcRenameVisitor(ast).visit_program(*ast);
             SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("ispc_double_rename"));
+            ast_to_nmodl(*ast, filepath("ispc_double_rename", "mod"));
         }
 
         /// GLOBAL to RANGE rename visitor
@@ -398,7 +412,7 @@ int main(int argc, const char* argv[]) {
             logger->info("Running GlobalToRange visitor");
             GlobalToRangeVisitor(*ast).visit_program(*ast);
             SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("global_to_range"));
+            ast_to_nmodl(*ast, filepath("global_to_range", "mod"));
         }
 
         /// LOCAL to ASSIGNED visitor
@@ -407,7 +421,7 @@ int main(int argc, const char* argv[]) {
             PerfVisitor().visit_program(*ast);
             LocalToAssignedVisitor().visit_program(*ast);
             SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("local_to_assigned"));
+            ast_to_nmodl(*ast, filepath("local_to_assigned", "mod"));
         }
 
         {
@@ -433,34 +447,26 @@ int main(int argc, const char* argv[]) {
             symtab->print(std::cout);
         }
 
-        ast_to_nmodl(*ast, filepath("ast"));
-
-        if (json_ast) {
-            std::string file{scratch_dir};
-            file += "/";
-            file += modfile;
-            file += ".ast.json";
-            logger->info("Writing AST into {}", file);
-            JSONVisitor(file).write(*ast);
-        }
+        ast_to_nmodl(*ast, filepath("ast", "mod"));
+        ast_to_json(*ast, filepath("ast", "json"));
 
         if (verbatim_rename) {
             logger->info("Running verbatim rename visitor");
             VerbatimVarRenameVisitor().visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("verbatim_rename"));
+            ast_to_nmodl(*ast, filepath("verbatim_rename", "mod"));
         }
 
         if (nmodl_const_folding) {
             logger->info("Running nmodl constant folding visitor");
             ConstantFolderVisitor().visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("constfold"));
+            ast_to_nmodl(*ast, filepath("constfold", "mod"));
         }
 
         if (nmodl_unroll) {
             logger->info("Running nmodl loop unroll visitor");
             LoopUnrollVisitor().visit_program(*ast);
             ConstantFolderVisitor().visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("unroll"));
+            ast_to_nmodl(*ast, filepath("unroll", "mod"));
             SymtabVisitor(update_symtab).visit_program(*ast);
         }
 
@@ -472,7 +478,7 @@ int main(int argc, const char* argv[]) {
             auto kineticBlockVisitor = KineticBlockVisitor();
             kineticBlockVisitor.visit_program(*ast);
             SymtabVisitor(update_symtab).visit_program(*ast);
-            const auto filename = filepath("kinetic");
+            const auto filename = filepath("kinetic", "mod");
             ast_to_nmodl(*ast, filename);
             if (nmodl_ast && kineticBlockVisitor.get_conserve_statement_count()) {
                 logger->warn(
@@ -486,7 +492,7 @@ int main(int argc, const char* argv[]) {
             logger->info("Running STEADYSTATE visitor");
             SteadystateVisitor().visit_program(*ast);
             SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("steadystate"));
+            ast_to_nmodl(*ast, filepath("steadystate", "mod"));
         }
 
         /// Parsing units fron "nrnunits.lib" and mod files
@@ -503,14 +509,14 @@ int main(int argc, const char* argv[]) {
         if (nmodl_inline) {
             logger->info("Running nmodl inline visitor");
             InlineVisitor().visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("inline"));
+            ast_to_nmodl(*ast, filepath("inline", "mod"));
         }
 
         if (local_rename) {
             logger->info("Running local variable rename visitor");
             LocalVarRenameVisitor().visit_program(*ast);
             SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("local_rename"));
+            ast_to_nmodl(*ast, filepath("local_rename", "mod"));
         }
 
         if (nmodl_localize) {
@@ -519,14 +525,14 @@ int main(int argc, const char* argv[]) {
             LocalizeVisitor(localize_verbatim).visit_program(*ast);
             LocalVarRenameVisitor().visit_program(*ast);
             SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("localize"));
+            ast_to_nmodl(*ast, filepath("localize", "mod"));
         }
 
         if (sympy_conductance) {
             logger->info("Running sympy conductance visitor");
             SympyConductanceVisitor().visit_program(*ast);
             SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("sympy_conductance"));
+            ast_to_nmodl(*ast, filepath("sympy_conductance", "mod"));
         }
 
         if (sympy_analytic || sparse_solver_exists(*ast)) {
@@ -537,19 +543,19 @@ int main(int argc, const char* argv[]) {
             logger->info("Running sympy solve visitor");
             SympySolverVisitor(sympy_pade, sympy_cse).visit_program(*ast);
             SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("sympy_solve"));
+            ast_to_nmodl(*ast, filepath("sympy_solve", "mod"));
         }
 
         {
             logger->info("Running cnexp visitor");
             NeuronSolveVisitor().visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("cnexp"));
+            ast_to_nmodl(*ast, filepath("cnexp", "mod"));
         }
 
         {
             SolveBlockVisitor().visit_program(*ast);
             SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("solveblock"));
+            ast_to_nmodl(*ast, filepath("solveblock", "mod"));
         }
 
         if (json_perfstat) {
@@ -618,9 +624,11 @@ int main(int argc, const char* argv[]) {
 #ifdef NMODL_LLVM_BACKEND
             if (llvm_ir) {
                 logger->info("Running LLVM backend code generator");
-                CodegenLLVMVisitor visitor(modfile, output_dir, llvm_opt_passes, llvm_float_type);
+                CodegenLLVMVisitor visitor(
+                    modfile, output_dir, llvm_opt_passes, llvm_vec_width, llvm_float_type);
                 visitor.visit_program(*ast);
-                ast_to_nmodl(*ast, filepath("llvm"));
+                ast_to_nmodl(*ast, filepath("llvm", "mod"));
+                ast_to_json(*ast, filepath("llvm", "json"));
             }
 #endif
         }

From 451fe175da4499eb5adbccabbfc00598e2efa980 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Sun, 7 Mar 2021 01:10:30 +0300
Subject: [PATCH 027/105] Addressing TODOs for Instance struct (#533) Part II  
 - remove undefined visit_codegen_instance_var   - Improved member creation
 for instance struct   - Instance struct type generation for kernel arguments 
  - Proper integration of instance struct   - Added scalar code generation for
 the kernel   - Removed instance test since it is not created explicitly
 anymore   - Fixed ordering for precision and width in LLVM Visitor   - Added
 vector induction variable   - Vectorised code for compute with direct loads
 fully functional   - Instance naming fixed   - (LLVM IR) Fixed compute vector
 code generation types   -  refactoring : improve coversion of double to int
 for     the loop expressions

---
 .../llvm/codegen_llvm_helper_visitor.cpp      |  83 ++--
 .../llvm/codegen_llvm_helper_visitor.hpp      |   7 +
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 387 ++++++++++++++----
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  48 ++-
 src/main.cpp                                  |   2 +-
 test/unit/codegen/codegen_llvm_ir.cpp         |  36 --
 6 files changed, 419 insertions(+), 144 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 43dc496291..b6b417a960 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -146,7 +146,7 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
         /// create new type and name for creating new ast node
         auto type = new ast::CodegenVarType(FLOAT_TYPE);
         auto var = param->get_name()->clone();
-        arguments.emplace_back(new ast::CodegenVarWithType(type, 0, var));
+        arguments.emplace_back(new ast::CodegenVarWithType(type, /*is_pointer=*/0, var));
     }
 
     /// return type of the function is same as return variable type
@@ -170,31 +170,31 @@ std::shared_ptr<ast::InstanceStruct> CodegenLLVMHelperVisitor::create_instance_s
         };
 
     /// float variables are standard pointers to float vectors
-    for (auto& float_var: info.codegen_float_variables) {
-        add_var_with_type(float_var->get_name(), FLOAT_TYPE, 1);
+    for (const auto& float_var: info.codegen_float_variables) {
+        add_var_with_type(float_var->get_name(), FLOAT_TYPE, /*is_pointer=*/1);
     }
 
     /// int variables are pointers to indexes for other vectors
-    for (auto& int_var: info.codegen_int_variables) {
-        add_var_with_type(int_var.symbol->get_name(), FLOAT_TYPE, 1);
+    for (const auto& int_var: info.codegen_int_variables) {
+        add_var_with_type(int_var.symbol->get_name(), FLOAT_TYPE, /*is_pointer=*/1);
     }
 
     // for integer variables, there should be index
-    for (auto& int_var: info.codegen_int_variables) {
+    for (const auto& int_var: info.codegen_int_variables) {
         std::string var_name = int_var.symbol->get_name() + "_index";
-        add_var_with_type(var_name, INTEGER_TYPE, 1);
+        add_var_with_type(var_name, INTEGER_TYPE, /*is_pointer=*/1);
     }
 
     // add voltage and node index
-    add_var_with_type("voltage", FLOAT_TYPE, 1);
-    add_var_with_type("node_index", INTEGER_TYPE, 1);
+    add_var_with_type("voltage", FLOAT_TYPE, /*is_pointer=*/1);
+    add_var_with_type("node_index", INTEGER_TYPE, /*is_pointer=*/1);
 
     // add dt, t, celsius
-    add_var_with_type(naming::NTHREAD_T_VARIABLE, FLOAT_TYPE, 0);
-    add_var_with_type(naming::NTHREAD_DT_VARIABLE, FLOAT_TYPE, 0);
-    add_var_with_type(naming::CELSIUS_VARIABLE, FLOAT_TYPE, 0);
-    add_var_with_type(naming::SECOND_ORDER_VARIABLE, INTEGER_TYPE, 0);
-    add_var_with_type(MECH_NODECOUNT_VAR, INTEGER_TYPE, 0);
+    add_var_with_type(naming::NTHREAD_T_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
+    add_var_with_type(naming::NTHREAD_DT_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
+    add_var_with_type(naming::CELSIUS_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
+    add_var_with_type(naming::SECOND_ORDER_VARIABLE, INTEGER_TYPE, /*is_pointer=*/0);
+    add_var_with_type(MECH_NODECOUNT_VAR, INTEGER_TYPE, /*is_pointer=*/0);
 
     return std::make_shared<ast::InstanceStruct>(codegen_vars);
 }
@@ -384,7 +384,7 @@ void CodegenLLVMHelperVisitor::convert_to_instance_variable(ast::Node& node,
                                                             std::string& index_var) {
     /// collect all variables in the node of type ast::VarName
     auto variables = collect_nodes(node, {ast::AstNodeType::VAR_NAME});
-    for (auto& v: variables) {
+    for (const auto& v: variables) {
         auto variable = std::dynamic_pointer_cast<ast::VarName>(v);
         auto variable_name = variable->get_node_name();
 
@@ -450,6 +450,44 @@ void CodegenLLVMHelperVisitor::visit_function_block(ast::FunctionBlock& node) {
     create_function_for_node(node);
 }
 
+/// Create asr::Varname node with given a given variable name
+static ast::VarName* create_varname(const std::string& varname) {
+    return new ast::VarName(new ast::Name(new ast::String(varname)), nullptr, nullptr);
+}
+
+/**
+ * Create for loop initialization expression
+ * @param code Usually "id = 0" as a string
+ * @return Expression representing code
+ * \todo : we can not use `create_statement_as_expression` function because
+ *         NMODL parser is using `ast::Double` type to represent all variables
+ *         including Integer. See #542.
+ */
+static std::shared_ptr<ast::Expression> loop_initialization_expression(
+    const std::string& induction_var) {
+    // create id = 0
+    const auto& id = create_varname(induction_var);
+    const auto& zero = new ast::Integer(0, nullptr);
+    return std::make_shared<ast::BinaryExpression>(id, ast::BinaryOperator(ast::BOP_ASSIGN), zero);
+}
+
+/**
+ * Create loop increment expression `id = id + width`
+ * \todo : same as loop_initialization_expression()
+ */
+static std::shared_ptr<ast::Expression> loop_increment_expression(const std::string& induction_var,
+                                                                  int vector_width) {
+    // first create id + x
+    const auto& id = create_varname(induction_var);
+    const auto& inc = new ast::Integer(vector_width, nullptr);
+    const auto& inc_expr =
+        new ast::BinaryExpression(id, ast::BinaryOperator(ast::BOP_ADDITION), inc);
+    // now create id = id + x
+    return std::make_shared<ast::BinaryExpression>(id->clone(),
+                                                   ast::BinaryOperator(ast::BOP_ASSIGN),
+                                                   inc_expr);
+}
+
 /**
  * \brief Convert ast::NrnStateBlock to corresponding code generation function nrn_state
  * @param node AST node representing ast::NrnStateBlock
@@ -471,9 +509,9 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     /// create now main compute part : for loop over channel instances
 
     /// loop constructs : initialization, condition and increment
-    const auto& initialization = create_statement_as_expression("id = 0");
-    const auto& condition = create_expression("id < node_count");
-    const auto& increment = create_statement_as_expression("id = id + {}"_format(vector_width));
+    const auto& initialization = loop_initialization_expression(INDUCTION_VAR);
+    const auto& condition = create_expression("{} < {}"_format(INDUCTION_VAR, MECH_NODECOUNT_VAR));
+    const auto& increment = loop_increment_expression(INDUCTION_VAR, vector_width);
 
     /// loop body : initialization + solve blocks
     ast::StatementVector loop_def_statements;
@@ -484,7 +522,8 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
         std::vector<std::string> double_variables{"v"};
 
         /// access node index and corresponding voltage
-        loop_index_statements.push_back(visitor::create_statement("node_id = node_index[id]"));
+        loop_index_statements.push_back(
+            visitor::create_statement("node_id = node_index[{}]"_format(INDUCTION_VAR)));
         loop_body_statements.push_back(visitor::create_statement("v = voltage[node_id]"));
 
         /// read ion variables
@@ -558,7 +597,7 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     ast::CodegenVarWithTypeVector code_arguments;
 
     auto instance_var_type = new ast::CodegenVarType(ast::AstNodeType::INSTANCE_STRUCT);
-    auto instance_var_name = new ast::Name(new ast::String("mech"));
+    auto instance_var_name = new ast::Name(new ast::String(MECH_INSTANCE_VAR));
     auto instance_var = new ast::CodegenVarWithType(instance_var_type, 1, instance_var_name);
     code_arguments.emplace_back(instance_var);
 
@@ -567,7 +606,7 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
         std::make_shared<ast::CodegenFunction>(return_type, name, code_arguments, function_block);
     codegen_functions.push_back(function);
 
-    std::cout << nmodl::to_nmodl(function);
+    std::cout << nmodl::to_nmodl(function) << std::endl;
 }
 
 void CodegenLLVMHelperVisitor::visit_program(ast::Program& node) {
@@ -583,8 +622,6 @@ void CodegenLLVMHelperVisitor::visit_program(ast::Program& node) {
     for (auto& fun: codegen_functions) {
         node.emplace_back_node(fun);
     }
-
-    std::cout << nmodl::to_nmodl(node);
 }
 
 
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index 981372b4d5..b67aa7ee09 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -120,6 +120,9 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     const std::string MECH_INSTANCE_VAR = "mech";
     const std::string MECH_NODECOUNT_VAR = "node_count";
 
+    /// name of induction variable used in the kernel.
+    const std::string INDUCTION_VAR = "id";
+
     /// create new function for FUNCTION or PROCEDURE block
     void create_function_for_node(ast::Block& node);
 
@@ -134,6 +137,10 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
         return instance_var_helper;
     }
 
+    std::string get_kernel_id() {
+        return INDUCTION_VAR;
+    }
+
     /// run visitor and return code generation functions
     CodegenFunctionVector get_codegen_functions(const ast::Program& node);
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 80bdfd20e3..62e69449b7 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -21,14 +21,22 @@ namespace nmodl {
 namespace codegen {
 
 
+static constexpr const char instance_struct_type_name[] = "__instance_var__type";
+
+// The prefix is used to create a vectorised id that can be used as index to GEPs. However, for
+// simple aligned vector loads and stores vector id is not needed. This is because we can bitcast
+// the pointer to the vector pointer! \todo: Consider removing this.
+static constexpr const char kernel_id_prefix[] = "__vec_";
+
+
 /****************************************************************************************/
 /*                            Helper routines                                           */
 /****************************************************************************************/
 
 static bool is_supported_statement(const ast::Statement& statement) {
     return statement.is_codegen_var_list_statement() || statement.is_expression_statement() ||
-           statement.is_codegen_return_statement() || statement.is_if_statement() ||
-           statement.is_while_statement();
+           statement.is_codegen_for_statement() || statement.is_codegen_return_statement() ||
+           statement.is_if_statement() || statement.is_while_statement();
 }
 
 bool CodegenLLVMVisitor::check_array_bounds(const ast::IndexedName& node, unsigned index) {
@@ -56,10 +64,82 @@ llvm::Value* CodegenLLVMVisitor::codegen_indexed_name(const ast::IndexedName& no
     return create_gep(node.get_node_name(), index);
 }
 
+llvm::Value* CodegenLLVMVisitor::codegen_instance_var(const ast::CodegenInstanceVar& node) {
+    const auto& member_node = node.get_member_var();
+    const auto& instance_name = node.get_instance_var()->get_node_name();
+    const auto& member_name = member_node->get_node_name();
+
+    if (!instance_var_helper.is_an_instance_variable(member_name))
+        throw std::runtime_error("Error: " + member_name + " is not a member of the instance!");
+
+    // Load the instance struct given its name from the ValueSymbolTable.
+    llvm::Value* instance_ptr = builder.CreateLoad(lookup(instance_name));
+
+    // Create a GEP instruction to get a pointer to the member.
+    int member_index = instance_var_helper.get_variable_index(member_name);
+    llvm::Type* index_type = llvm::Type::getInt32Ty(*context);
+
+    std::vector<llvm::Value*> indices;
+    indices.push_back(llvm::ConstantInt::get(index_type, 0));
+    indices.push_back(llvm::ConstantInt::get(index_type, member_index));
+    llvm::Value* member_ptr = builder.CreateInBoundsGEP(instance_ptr, indices);
+
+    // Get the member AST node from the instance AST node, for which we proceed with the code
+    // generation. If the member is scalar, return the pointer to it straight away.
+    auto codegen_var_with_type = instance_var_helper.get_variable(member_name);
+    if (!codegen_var_with_type->get_is_pointer()) {
+        return member_ptr;
+    }
+
+    // Otherwise, the codegen variable is a pointer, and the member AST node must be an IndexedName.
+    auto member_var_name = std::dynamic_pointer_cast<ast::VarName>(member_node);
+    if (!member_var_name->get_name()->is_indexed_name())
+        throw std::runtime_error("Error: " + member_name + " is not an IndexedName!");
+
+    // Proceed to creating a GEP instruction to get the pointer to the member's element. While LLVM
+    // Helper set the indices to be Name nodes, a sanity check is added here. Note that this step
+    // can be avoided if using `get_array_index_or_length()`. However, it does not support indexing
+    // with Name/Expression at the moment. \todo: Reuse `get_array_index_or_length()` here.
+    auto member_indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(
+        member_var_name->get_name());
+    if (!member_indexed_name->get_length()->is_name())
+        throw std::runtime_error("Error: " + member_name + " has a non-Name index!");
+
+    // Load the index variable that will be used to access the member's element. Since we index a
+    // pointer variable, we need to extend the 32-bit integer index variable to 64-bit.
+    llvm::Value* i32_index = builder.CreateLoad(
+        lookup(member_indexed_name->get_length()->get_node_name()));
+    llvm::Value* i64_index = builder.CreateSExt(i32_index, llvm::Type::getInt64Ty(*context));
+
+    // Create a indices vector for GEP to return the pointer to the element at the specified index.
+    std::vector<llvm::Value*> member_indices;
+    member_indices.push_back(i64_index);
+
+    // The codegen variable type is always a scalar, so we need to transform it to a pointer. Then
+    // load the member which would be indexed later.
+    llvm::Type* type = get_codegen_var_type(*codegen_var_with_type->get_type());
+    llvm::Value* instance_member =
+        builder.CreateLoad(llvm::PointerType::get(type, /*AddressSpace=*/0), member_ptr);
+
+
+    // If the code is vectorised, then bitcast to a vector pointer.
+    if (is_kernel_code && vector_width > 1) {
+        llvm::Type* vector_type =
+            llvm::PointerType::get(llvm::FixedVectorType::get(type, vector_width),
+                                   /*AddressSpace=*/0);
+        llvm::Value* instance_member_bitcasted = builder.CreateBitCast(instance_member,
+                                                                       vector_type);
+        return builder.CreateInBoundsGEP(instance_member_bitcasted, member_indices);
+    }
+
+    return builder.CreateInBoundsGEP(instance_member, member_indices);
+}
+
 unsigned CodegenLLVMVisitor::get_array_index_or_length(const ast::IndexedName& indexed_name) {
+    // \todo: Support indices with expressions and names: k[i + j] = ...
     auto integer = std::dynamic_pointer_cast<ast::Integer>(indexed_name.get_length());
     if (!integer)
-        throw std::runtime_error("Error: expecting integer index or length");
+        throw std::runtime_error("Error: only integer indices/length are supported!");
 
     // Check if integer value is taken from a macro.
     if (!integer->get_macro())
@@ -74,6 +154,8 @@ llvm::Type* CodegenLLVMVisitor::get_codegen_var_type(const ast::CodegenVarType&
         return llvm::Type::getInt1Ty(*context);
     case ast::AstNodeType::DOUBLE:
         return get_default_fp_type();
+    case ast::AstNodeType::INSTANCE_STRUCT:
+        return get_instance_struct_type();
     case ast::AstNodeType::INTEGER:
         return llvm::Type::getInt32Ty(*context);
     case ast::AstNodeType::VOID:
@@ -85,6 +167,26 @@ llvm::Type* CodegenLLVMVisitor::get_codegen_var_type(const ast::CodegenVarType&
     }
 }
 
+llvm::Value* CodegenLLVMVisitor::get_constant_int_vector(int value) {
+    llvm::Type* i32_type = llvm::Type::getInt32Ty(*context);
+    std::vector<llvm::Constant*> constants;
+    for (unsigned i = 0; i < vector_width; ++i) {
+        const auto& element = llvm::ConstantInt::get(i32_type, value);
+        constants.push_back(element);
+    }
+    return llvm::ConstantVector::get(constants);
+}
+
+llvm::Value* CodegenLLVMVisitor::get_constant_fp_vector(const std::string& value) {
+    llvm::Type* fp_type = get_default_fp_type();
+    std::vector<llvm::Constant*> constants;
+    for (unsigned i = 0; i < vector_width; ++i) {
+        const auto& element = llvm::ConstantFP::get(fp_type, value);
+        constants.push_back(element);
+    }
+    return llvm::ConstantVector::get(constants);
+}
+
 llvm::Type* CodegenLLVMVisitor::get_default_fp_type() {
     if (use_single_precision)
         return llvm::Type::getFloatTy(*context);
@@ -97,6 +199,59 @@ llvm::Type* CodegenLLVMVisitor::get_default_fp_ptr_type() {
     return llvm::Type::getDoublePtrTy(*context);
 }
 
+llvm::Type* CodegenLLVMVisitor::get_instance_struct_type() {
+    std::vector<llvm::Type*> members;
+    for (const auto& variable: instance_var_helper.instance->get_codegen_vars()) {
+        auto is_pointer = variable->get_is_pointer();
+        auto nmodl_type = variable->get_type()->get_type();
+
+        llvm::Type* i32_type = llvm::Type::getInt32Ty(*context);
+        llvm::Type* i32ptr_type = llvm::Type::getInt32PtrTy(*context);
+
+        switch (nmodl_type) {
+#define DISPATCH(type, llvm_ptr_type, llvm_type)                       \
+    case type:                                                         \
+        members.push_back(is_pointer ? (llvm_ptr_type) : (llvm_type)); \
+        break;
+
+            DISPATCH(ast::AstNodeType::DOUBLE, get_default_fp_ptr_type(), get_default_fp_type());
+            DISPATCH(ast::AstNodeType::INTEGER, i32ptr_type, i32_type);
+
+#undef DISPATCH
+        default:
+            throw std::runtime_error("Error: unsupported type found in instance struct");
+        }
+    }
+
+    llvm::StructType* llvm_struct_type =
+        llvm::StructType::create(*context, mod_filename + instance_struct_type_name);
+    llvm_struct_type->setBody(members);
+    return llvm::PointerType::get(llvm_struct_type, /*AddressSpace=*/0);
+}
+
+llvm::Value* CodegenLLVMVisitor::get_variable_ptr(const ast::VarName& node) {
+    const auto& identifier = node.get_name();
+    if (!identifier->is_name() && !identifier->is_indexed_name() &&
+        !identifier->is_codegen_instance_var()) {
+        throw std::runtime_error("Error: Unsupported variable type - " + node.get_node_name());
+    }
+
+    llvm::Value* ptr;
+    if (identifier->is_name())
+        ptr = lookup(node.get_node_name());
+
+    if (identifier->is_indexed_name()) {
+        auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
+        ptr = codegen_indexed_name(*indexed_name);
+    }
+
+    if (identifier->is_codegen_instance_var()) {
+        auto instance_var = std::dynamic_pointer_cast<ast::CodegenInstanceVar>(identifier);
+        ptr = codegen_instance_var(*instance_var);
+    }
+    return ptr;
+}
+
 void CodegenLLVMVisitor::run_llvm_opt_passes() {
     /// run some common optimisation passes that are commonly suggested
     fpm.add(llvm::createInstructionCombiningPass());
@@ -134,7 +289,7 @@ void CodegenLLVMVisitor::create_external_method_call(const std::string& name,
     }
 
 #define DISPATCH(method_name, intrinsic)                                                           \
-    if (name == method_name) {                                                                     \
+    if (name == (method_name)) {                                                                   \
         llvm::Value* result = builder.CreateIntrinsic(intrinsic, argument_types, argument_values); \
         values.push_back(result);                                                                  \
         return;                                                                                    \
@@ -234,12 +389,12 @@ llvm::Value* CodegenLLVMVisitor::visit_arithmetic_bin_op(llvm::Value* lhs,
     llvm::Value* result;
 
     switch (bin_op) {
-#define DISPATCH(binary_op, llvm_fp_op, llvm_int_op)         \
-    case binary_op:                                          \
-        if (lhs_type->isDoubleTy() || lhs_type->isFloatTy()) \
-            result = llvm_fp_op(lhs, rhs);                   \
-        else                                                 \
-            result = llvm_int_op(lhs, rhs);                  \
+#define DISPATCH(binary_op, llvm_fp_op, llvm_int_op) \
+    case binary_op:                                  \
+        if (lhs_type->isIntOrIntVectorTy())          \
+            result = llvm_int_op(lhs, rhs);          \
+        else                                         \
+            result = llvm_fp_op(lhs, rhs);           \
         return result;
 
         DISPATCH(ast::BinaryOp::BOP_ADDITION, builder.CreateFAdd, builder.CreateAdd);
@@ -256,20 +411,11 @@ llvm::Value* CodegenLLVMVisitor::visit_arithmetic_bin_op(llvm::Value* lhs,
 
 void CodegenLLVMVisitor::visit_assign_op(const ast::BinaryExpression& node, llvm::Value* rhs) {
     auto var = dynamic_cast<ast::VarName*>(node.get_lhs().get());
-    if (!var) {
-        throw std::runtime_error("Error: only VarName assignment is currently supported.\n");
-    }
+    if (!var)
+        throw std::runtime_error("Error: only VarName assignment is supported!");
 
-    const auto& identifier = var->get_name();
-    if (identifier->is_name()) {
-        llvm::Value* alloca = lookup(var->get_node_name());
-        builder.CreateStore(rhs, alloca);
-    } else if (identifier->is_indexed_name()) {
-        auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
-        builder.CreateStore(rhs, codegen_indexed_name(*indexed_name));
-    } else {
-        throw std::runtime_error("Error: Unsupported variable type");
-    }
+    llvm::Value* ptr = get_variable_ptr(*var);
+    builder.CreateStore(rhs, ptr);
 }
 
 llvm::Value* CodegenLLVMVisitor::visit_logical_bin_op(llvm::Value* lhs,
@@ -373,6 +519,117 @@ void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) {
     values.push_back(constant);
 }
 
+// Generating FOR loop in LLVM IR creates the following structure:
+//
+//  +---------------------------+
+//  | <code before for loop>    |
+//  | <for loop initialisation> |
+//  | br %cond                  |
+//  +---------------------------+
+//                |
+//                V
+//  +-----------------------------+
+//  | <condition code>            |
+//  | %cond = ...                 |<------+
+//  | cond_br %cond, %body, %exit |       |
+//  +-----------------------------+       |
+//      |                 |               |
+//      |                 V               |
+//      |     +------------------------+  |
+//      |     | <body code>            |  |
+//      |     | br %inc                |  |
+//      |     +------------------------+  |
+//      |                 |               |
+//      |                 V               |
+//      |     +------------------------+  |
+//      |     | <increment code>       |  |
+//      |      | br %cond              |  |
+//      |     +------------------------+  |
+//      |                 |               |
+//      |                 +---------------+
+//      V
+//  +---------------------------+
+//  | <code after for loop>     |
+//  +---------------------------+
+void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatement& node) {
+    // Get the current and the next blocks within the function.
+    llvm::BasicBlock* curr_block = builder.GetInsertBlock();
+    llvm::BasicBlock* next = curr_block->getNextNode();
+    llvm::Function* func = curr_block->getParent();
+
+    // Create the basic blocks for FOR loop.
+    llvm::BasicBlock* for_cond =
+        llvm::BasicBlock::Create(*context, /*Name=*/"for.cond", func, next);
+    llvm::BasicBlock* for_body =
+        llvm::BasicBlock::Create(*context, /*Name=*/"for.body", func, next);
+    llvm::BasicBlock* for_inc = llvm::BasicBlock::Create(*context, /*Name=*/"for.inc", func, next);
+    llvm::BasicBlock* exit = llvm::BasicBlock::Create(*context, /*Name=*/"for.exit", func, next);
+
+    // First, initialise the loop in the same basic block.
+    node.get_initialization()->accept(*this);
+
+    // If the loop is to be vectorised, create a separate vector induction variable.
+    // \todo: See the comment for `kernel_id_prefix`.
+    if (vector_width > 1) {
+        // First, create a vector type and alloca for it.
+        llvm::Type* i32_type = llvm::Type::getInt32Ty(*context);
+        llvm::Type* vec_type = llvm::FixedVectorType::get(i32_type, vector_width);
+        llvm::Value* vec_alloca = builder.CreateAlloca(vec_type,
+                                                       /*ArraySize=*/nullptr,
+                                                       /*Name=*/kernel_id_prefix + kernel_id);
+
+        // Then, store the initial value of <0, 1, ..., [W-1]> o the alloca pointer, where W is the
+        // vector width.
+        std::vector<llvm::Constant*> constants;
+        for (unsigned i = 0; i < vector_width; ++i) {
+            const auto& element = llvm::ConstantInt::get(i32_type, i);
+            constants.push_back(element);
+        }
+        llvm::Value* vector_id = llvm::ConstantVector::get(constants);
+        builder.CreateStore(vector_id, vec_alloca);
+    }
+    // Branch to condition basic block and insert condition code there.
+    builder.CreateBr(for_cond);
+    builder.SetInsertPoint(for_cond);
+    node.get_condition()->accept(*this);
+
+    // Extract the condition to decide whether to branch to the loop body or loop exit.
+    llvm::Value* cond = values.back();
+    values.pop_back();
+    builder.CreateCondBr(cond, for_body, exit);
+
+    // Generate code for the loop body and create the basic block for the increment.
+    builder.SetInsertPoint(for_body);
+    is_kernel_code = true;
+    const auto& statement_block = node.get_statement_block();
+    statement_block->accept(*this);
+    is_kernel_code = false;
+    builder.CreateBr(for_inc);
+
+    // Process increment.
+    builder.SetInsertPoint(for_inc);
+    node.get_increment()->accept(*this);
+
+    // If the code is vectorised, then increment the vector id by <W, W, ..., W> where W is the
+    // vector width.
+    // \todo: See the comment for `kernel_id_prefix`.
+    if (vector_width > 1) {
+        // First, create an increment vector.
+        llvm::Value* vector_inc = get_constant_int_vector(vector_width);
+
+        // Increment the kernel id elements by a constant vector width.
+        llvm::Value* vector_id_ptr = lookup(kernel_id_prefix + kernel_id);
+        llvm::Value* vector_id = builder.CreateLoad(vector_id_ptr);
+        llvm::Value* incremented = builder.CreateAdd(vector_id, vector_inc);
+        builder.CreateStore(incremented, vector_id_ptr);
+    }
+
+    // Create a branch to condition block, then generate exit code out of the loop.
+    builder.CreateBr(for_cond);
+    builder.SetInsertPoint(exit);
+}
+
+
 void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node) {
     const auto& name = node.get_node_name();
     const auto& arguments = node.get_arguments();
@@ -406,7 +663,7 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
     block->accept(*this);
 
     // If function has a void return type, add a terminator not handled by CodegenReturnVar.
-    if (node.is_void())
+    if (node.get_return_type()->get_type() == ast::AstNodeType::VOID)
         builder.CreateRetVoid();
 
     // Clear local values stack and remove the pointer to the local symbol table.
@@ -419,7 +676,7 @@ void CodegenLLVMVisitor::visit_codegen_return_statement(const ast::CodegenReturn
         throw std::runtime_error("Error: CodegenReturnStatement must contain a name node\n");
 
     std::string ret = "ret_" + current_func->getName().str();
-    llvm::Value* ret_value = builder.CreateLoad(current_func->getValueSymbolTable()->lookup(ret));
+    llvm::Value* ret_value = builder.CreateLoad(lookup(ret));
     builder.CreateRet(ret_value);
 }
 
@@ -456,6 +713,10 @@ void CodegenLLVMVisitor::visit_codegen_var_list_statement(
 }
 
 void CodegenLLVMVisitor::visit_double(const ast::Double& node) {
+    if (is_kernel_code && vector_width > 1) {
+        values.push_back(get_constant_fp_vector(node.get_value()));
+        return;
+    }
     const auto& constant = llvm::ConstantFP::get(get_default_fp_type(), node.get_value());
     values.push_back(constant);
 }
@@ -547,6 +808,10 @@ void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
 }
 
 void CodegenLLVMVisitor::visit_integer(const ast::Integer& node) {
+    if (is_kernel_code && vector_width > 1) {
+        values.push_back(get_constant_int_vector(node.get_value()));
+        return;
+    }
     const auto& constant = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*context),
                                                   node.get_value());
     values.push_back(constant);
@@ -561,9 +826,7 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     const auto& functions = v.get_codegen_functions(node);
     instance_var_helper = v.get_instance_var_helper();
 
-    // TODO :: George / Ioannis :: before emitting procedures, we have
-    //         to emmit INSTANCE_STRUCT type as it's used as an argument.
-    //         Currently it's done in node.visit_children which is late.
+    kernel_id = v.get_kernel_id();
 
     // For every function, generate its declaration. Thus, we can look up
     // `llvm::Function` in the symbol table in the module.
@@ -574,8 +837,15 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     // Set the AST symbol table.
     sym_tab = node.get_symbol_table();
 
-    // Proceed with code generation.
-    node.visit_children(*this);
+    // Proceed with code generation. Right now, we do not do
+    //     node.visit_children(*this);
+    // The reason is that the node may contain AST nodes for which the visitor functions have been
+    // defined. In our implementation we assume that the code generation is happening within the
+    // function scope. To avoid generating code outside of functions, visit only them for now.
+    // \todo: Handle what is mentioned here.
+    for (const auto& func: functions) {
+        visit_codegen_function(*func);
+    }
 
     if (opt_passes) {
         logger->info("Running LLVM optimisation passes");
@@ -605,60 +875,21 @@ void CodegenLLVMVisitor::visit_unary_expression(const ast::UnaryExpression& node
 }
 
 void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
-    const auto& identifier = node.get_name();
-    if (!identifier->is_name() && !identifier->is_indexed_name())
-        throw std::runtime_error("Error: Unsupported variable type");
-
-    // TODO :: George :: here instance_var_helper can be used to query
-    // variable type and it's index into structure
-    auto name = node.get_node_name();
-
-    auto codegen_var_with_type = instance_var_helper.get_variable(name);
-    auto codegen_var_index = instance_var_helper.get_variable_index(name);
-    // this will be INTEGER or DOUBLE
-    auto var_type = codegen_var_with_type->get_type()->get_type();
-    auto is_pointer = codegen_var_with_type->get_is_pointer();
-
-    llvm::Value* ptr;
-    if (identifier->is_name())
-        ptr = lookup(node.get_node_name());
-
-    if (identifier->is_indexed_name()) {
-        auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
-        ptr = codegen_indexed_name(*indexed_name);
-    }
+    llvm::Value* ptr = get_variable_ptr(node);
 
     // Finally, load the variable from the pointer value.
     llvm::Value* var = builder.CreateLoad(ptr);
-    values.push_back(var);
-}
 
-void CodegenLLVMVisitor::visit_instance_struct(const ast::InstanceStruct& node) {
-    std::vector<llvm::Type*> members;
-    for (const auto& variable: node.get_codegen_vars()) {
-        // TODO :: Ioannis / George :: we have now double*, int*, double and int
-        //         variables in the instance structure. Each variable is of type
-        //         ast::CodegenVarWithType. So we can query variable type and if
-        //         it's pointer.
-        auto is_pointer = variable->get_is_pointer();
-        auto type = variable->get_type()->get_type();
-
-        // todo : clean up ?
-        if (type == ast::AstNodeType::DOUBLE) {
-            auto llvm_type = is_pointer ? get_default_fp_ptr_type() : get_default_fp_type();
-            members.push_back(llvm_type);
-        } else {
-            if (is_pointer) {
-                members.push_back(llvm::Type::getInt32PtrTy(*context));
-            } else {
-                members.push_back(llvm::Type::getInt32Ty(*context));
-            }
-        }
+    // If the vale should not be vectorised, or it is already a vector, add it to the stack.
+    if (!is_kernel_code || vector_width <= 1 || var->getType()->isVectorTy()) {
+        values.push_back(var);
+        return;
     }
 
-    llvm_struct = llvm::StructType::create(*context, mod_filename + "_Instance");
-    llvm_struct->setBody(members);
-    module->getOrInsertGlobal("inst", llvm_struct);
+    // Otherwise, if we are generating vectorised inside the loop, replicate the value to form a
+    // vector of `vector_width`.
+    llvm::Value* vector_var = builder.CreateVectorSplat(vector_width, var);
+    values.push_back(vector_var);
 }
 
 void CodegenLLVMVisitor::visit_while_statement(const ast::WhileStatement& node) {
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index b20a19bac7..c93b76b1d6 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -82,11 +82,14 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     // Use 32-bit floating-point type if true. Otherwise, use deafult 64-bit.
     bool use_single_precision;
 
-    // explicit vectorisation width
+    // Explicit vectorisation width.
     int vector_width;
 
-    // LLVM mechanism struct
-    llvm::StructType* llvm_struct;
+    // The name of induction variable used in the kernel functions.
+    std::string kernel_id;
+
+    // A flag to indicate that the code is generated for the kernel.
+    bool is_kernel_code = false;
 
     /**
      *\brief Run LLVM optimisation passes on generated IR
@@ -106,8 +109,8 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     CodegenLLVMVisitor(const std::string& mod_filename,
                        const std::string& output_dir,
                        bool opt_passes,
-                       int vector_width = 1,
-                       bool use_single_precision = false)
+                       bool use_single_precision = false,
+                       int vector_width = 1)
         : mod_filename(mod_filename)
         , output_dir(output_dir)
         , opt_passes(opt_passes)
@@ -130,6 +133,13 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      */
     llvm::Value* codegen_indexed_name(const ast::IndexedName& node);
 
+    /**
+     * Generates LLVM code for the given Instance variable
+     * \param node CodegenInstanceVar NMODL AST node
+     * \return LLVM code generated for this AST node
+     */
+    llvm::Value* codegen_instance_var(const ast::CodegenInstanceVar& node);
+
     /**
      * Returns GEP instruction to 1D array
      * \param name 1D array name
@@ -152,6 +162,20 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      */
     llvm::Type* get_codegen_var_type(const ast::CodegenVarType& node);
 
+    /**
+     * Returns LLVM vector with `vector_width` int values.
+     * \param int value to replicate
+     * \return LLVM value
+     */
+    llvm::Value* get_constant_int_vector(int value);
+
+    /**
+     * Returns LLVM vector with `vector_width` double values.
+     * \param string a double value to replicate
+     * \return LLVM value
+     */
+    llvm::Value* get_constant_fp_vector(const std::string& value);
+
     /**
      * Returns 64-bit or 32-bit LLVM floating type
      * \return     \c LLVM floating point type according to `use_single_precision` flag
@@ -164,6 +188,18 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      */
     llvm::Type* get_default_fp_ptr_type();
 
+    /**
+     * Returns a pointer to LLVM struct type
+     * \return LLVM pointer type
+     */
+    llvm::Type* get_instance_struct_type();
+
+    /**
+     * Returns a LLVM value corresponding to the VarName node
+     * \return LLVM value
+     */
+    llvm::Value* get_variable_ptr(const ast::VarName& node);
+
     /**
      * Create a function call to an external method
      * \param name external method name
@@ -255,6 +291,7 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void visit_binary_expression(const ast::BinaryExpression& node) override;
     void visit_boolean(const ast::Boolean& node) override;
     void visit_statement_block(const ast::StatementBlock& node) override;
+    void visit_codegen_for_statement(const ast::CodegenForStatement& node) override;
     void visit_codegen_function(const ast::CodegenFunction& node) override;
     void visit_codegen_return_statement(const ast::CodegenReturnStatement& node) override;
     void visit_codegen_var_list_statement(const ast::CodegenVarListStatement& node) override;
@@ -267,7 +304,6 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void visit_program(const ast::Program& node) override;
     void visit_unary_expression(const ast::UnaryExpression& node) override;
     void visit_var_name(const ast::VarName& node) override;
-    void visit_instance_struct(const ast::InstanceStruct& node) override;
     void visit_while_statement(const ast::WhileStatement& node) override;
 
     // \todo: move this to debug mode (e.g. -v option or --dump-ir)
diff --git a/src/main.cpp b/src/main.cpp
index 31ba53c669..c05e1ff5a2 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -625,7 +625,7 @@ int main(int argc, const char* argv[]) {
             if (llvm_ir) {
                 logger->info("Running LLVM backend code generator");
                 CodegenLLVMVisitor visitor(
-                    modfile, output_dir, llvm_opt_passes, llvm_vec_width, llvm_float_type);
+                    modfile, output_dir, llvm_opt_passes, llvm_float_type, llvm_vec_width);
                 visitor.visit_program(*ast);
                 ast_to_nmodl(*ast, filepath("llvm", "mod"));
                 ast_to_json(*ast, filepath("llvm", "json"));
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index fbebf63720..d152e6b5bb 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -794,39 +794,3 @@ SCENARIO("Dead code removal", "[visitor][llvm][opt]") {
         }
     }
 }
-
-//=============================================================================
-// Create Instance Struct
-//=============================================================================
-
-SCENARIO("Creation of Instance Struct", "[visitor][llvm][instance_struct]") {
-    GIVEN("NEURON block with RANGE variables and IONS") {
-        std::string nmodl_text = R"(
-            NEURON {
-                USEION na READ ena WRITE ina
-                NONSPECIFIC_CURRENT il
-                RANGE minf, hinf
-            }
-
-            STATE {
-                m
-            }
-
-            ASSIGNED {
-                v (mV)
-                celsius (degC)
-                minf
-                hinf
-            }
-        )";
-
-        THEN("create struct with the declared variables") {
-            std::string module_string = run_llvm_visitor(nmodl_text, true);
-            std::smatch m;
-
-            std::regex instance_struct_declaration(
-                R"(%unknown_Instance = type \{ double\*, double\*, double\*, double\*, double\*, double\*, double\*, double\*, double\*, double\* \})");
-            REQUIRE(std::regex_search(module_string, m, instance_struct_declaration));
-        }
-    }
-}

From e1e8eabd2b9ff3d2a7752f18bff1f91c63a5e068 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 9 Mar 2021 11:50:53 +0300
Subject: [PATCH 028/105] Unit test for scalar state kernel generation in LLVM
 (#547)

This PR adds a unit test to check LLVM instructions generated for
the scalar kernel, particularly:

- FOR loop blocks.

- Induction variable increments and comparisons.

- Correct loads through GEPs from the struct.

Test for vectorised code generation would be added in a separate
PR or when full vectorisation support (indirect indexing) would
land.
---
 test/unit/codegen/codegen_llvm_ir.cpp | 112 +++++++++++++++++++++++++-
 1 file changed, 110 insertions(+), 2 deletions(-)

diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index d152e6b5bb..5ea3a1f003 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -12,6 +12,8 @@
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "parser/nmodl_driver.hpp"
 #include "visitors/checkparent_visitor.hpp"
+#include "visitors/neuron_solve_visitor.hpp"
+#include "visitors/solve_block_visitor.hpp"
 #include "visitors/symtab_visitor.hpp"
 
 using namespace nmodl;
@@ -24,16 +26,20 @@ using nmodl::parser::NmodlDriver;
 
 std::string run_llvm_visitor(const std::string& text,
                              bool opt = false,
-                             bool use_single_precision = false) {
+                             bool use_single_precision = false,
+                             int vector_width = 1) {
     NmodlDriver driver;
     const auto& ast = driver.parse_string(text);
 
     SymtabVisitor().visit_program(*ast);
+    NeuronSolveVisitor().visit_program(*ast);
+    SolveBlockVisitor().visit_program(*ast);
 
     codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                              /*output_dir=*/".",
                                              opt,
-                                             use_single_precision);
+                                             use_single_precision,
+                                             vector_width);
     llvm_visitor.visit_program(*ast);
     return llvm_visitor.print_module();
 }
@@ -770,6 +776,108 @@ SCENARIO("While", "[visitor][llvm]") {
     }
 }
 
+//=============================================================================
+// State scalar kernel
+//=============================================================================
+
+SCENARIO("Scalar state kernel", "[visitor][llvm]") {
+    GIVEN("A neuron state update") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX hh
+                NONSPECIFIC_CURRENT il
+                RANGE minf, mtau, gl, el
+            }
+
+            STATE {
+                m
+            }
+
+            ASSIGNED {
+                v (mV)
+                minf
+                mtau (ms)
+            }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+                il = gl * (v - el)
+            }
+
+            DERIVATIVE states {
+                    m = (minf-m) / mtau
+            }
+        )";
+
+        THEN("a kernel with instance struct as an argument and a FOR loop is created") {
+            std::string module_string = run_llvm_visitor(nmodl_text);
+            std::smatch m;
+
+            // Check the struct type and the kernel declaration.
+            std::regex struct_type(
+                "%.*__instance_var__type = type \\{ double\\*, double\\*, double\\*, double\\*, "
+                "double\\*, double\\*, double\\*, i32\\*, double, double, double, i32, i32 \\}");
+            std::regex kernel_declaration(
+                R"(define void @nrn_state_hh\(%.*__instance_var__type\* .*\))");
+            REQUIRE(std::regex_search(module_string, m, struct_type));
+            REQUIRE(std::regex_search(module_string, m, kernel_declaration));
+
+            // Check for correct induction variable initialisation and a branch to condition block.
+            std::regex alloca_instr(R"(%id = alloca i32)");
+            std::regex br(R"(br label %for\.cond)");
+            REQUIRE(std::regex_search(module_string, m, alloca_instr));
+            REQUIRE(std::regex_search(module_string, m, br));
+
+            // Check condition block: id < mech->node_count, and a conditional branch to loop body
+            // or exit.
+            std::regex condition(
+                "  %.* = load %.*__instance_var__type\\*, %.*__instance_var__type\\*\\* %.*,.*\n"
+                "  %.* = getelementptr inbounds %.*__instance_var__type, "
+                "%.*__instance_var__type\\* "
+                "%.*, i32 0, i32 [0-9]+\n"
+                "  %.* = load i32, i32\\* %.*,.*\n"
+                "  %.* = load i32, i32\\* %id,.*\n"
+                "  %.* = icmp slt i32 %.*, %.*");
+            std::regex cond_br(R"(br i1 %.*, label %for\.body, label %for\.exit)");
+            REQUIRE(std::regex_search(module_string, m, condition));
+            REQUIRE(std::regex_search(module_string, m, cond_br));
+
+            // In the body block, `node_id` and voltage `v` are initialised with the data from the
+            // struct. Check for variable allocations and correct loads from the struct with GEPs.
+            std::regex initialisation(
+                "for\\.body:.*\n"
+                "  %node_id = alloca i32,.*\n"
+                "  %v = alloca double,.*");
+            std::regex load_from_struct(
+                "  %.* = load %.*__instance_var__type\\*, %.*__instance_var__type\\*\\* %.*\n"
+                "  %.* = getelementptr inbounds %.*__instance_var__type, "
+                "%.*__instance_var__type\\* %.*, i32 0, i32 [0-9]+\n"
+                "  %.* = load i32, i32\\* %id,.*\n"
+                "  %.* = sext i32 %.* to i64\n"
+                "  %.* = load (i32|double)\\*, (i32|double)\\*\\* %.*\n"
+                "  %.* = getelementptr inbounds (i32|double), (i32|double)\\* %.*, i64 %.*\n"
+                "  %.* = load (i32|double), (i32|double)\\* %.*");
+            REQUIRE(std::regex_search(module_string, m, initialisation));
+            REQUIRE(std::regex_search(module_string, m, load_from_struct));
+
+            // Check induction variable is incremented in increment block.
+            std::regex increment(
+                "for.inc:.*\n"
+                "  %.* = load i32, i32\\* %id,.*\n"
+                "  %.* = add i32 %.*, 1\n"
+                "  store i32 %.*, i32\\* %id,.*\n"
+                "  br label %for\\.cond");
+            REQUIRE(std::regex_search(module_string, m, increment));
+
+            // Check exit block.
+            std::regex exit(
+                "for\\.exit:.*\n"
+                "  ret void");
+            REQUIRE(std::regex_search(module_string, m, exit));
+        }
+    }
+}
+
 //=============================================================================
 // Optimization : dead code removal
 //=============================================================================

From fd2053e4de613508b4fec966f06448ceeb674475 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Fri, 12 Mar 2021 04:50:38 -0800
Subject: [PATCH 029/105] Indexed name codegen improvements (#550)

Improved index code generation within the LLVM pipeline.
The following issues were addressed:

Array indices are i64 per LLVM's addressing convention.
This means that if the value is not a constant, an additional
sext instruction must be created.

Bounds check is removed since it requires a certain analysis
on the index value. This can be addressed in a separate PR.

`IndexedName` code generation is separated into 2 functions
The first, `get_array_length()` is responsible for array initialisation,
the second, `get_array_index()`, for indexing. In latter case, we
support the following cases:
```
...
// Indexing with an integer constant
k[0] = ...

// Indexing with an integer expression
k[10 - 10]

// Indexing with a `Name` AST node that is an integer
// (in our case a FOR loop induction variable or a variable
// with `CodegenVarType` == `Integer`
k[id] = ...
k[ena_id] = ...
```
Note that the case:
```
// id := loop integer induction variable
k[id + 1] = ...
```
is not supported for 2 reasons:

On the AST level, as per #545 the expression would
contain a Name and not VarName node that fails the
code generation.

The case only arises in the kernel functions like state_update,
where indexing is "artificially" created with indexing by a Name
only.

fixes #541
---
 src/codegen/llvm/codegen_llvm_visitor.cpp | 71 +++++++++++++----------
 src/codegen/llvm/codegen_llvm_visitor.hpp | 21 +++----
 test/unit/codegen/codegen_llvm_ir.cpp     | 37 +++++-------
 3 files changed, 65 insertions(+), 64 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 62e69449b7..cd2af2af69 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -39,28 +39,17 @@ static bool is_supported_statement(const ast::Statement& statement) {
            statement.is_if_statement() || statement.is_while_statement();
 }
 
-bool CodegenLLVMVisitor::check_array_bounds(const ast::IndexedName& node, unsigned index) {
-    llvm::Type* array_type = lookup(node.get_node_name())->getType()->getPointerElementType();
-    unsigned length = array_type->getArrayNumElements();
-    return 0 <= index && index < length;
-}
-
-llvm::Value* CodegenLLVMVisitor::create_gep(const std::string& name, unsigned index) {
-    llvm::Type* index_type = llvm::Type::getInt32Ty(*context);
+llvm::Value* CodegenLLVMVisitor::create_gep(const std::string& name, llvm::Value* index) {
+    llvm::Type* index_type = llvm::Type::getInt64Ty(*context);
     std::vector<llvm::Value*> indices;
     indices.push_back(llvm::ConstantInt::get(index_type, 0));
-    indices.push_back(llvm::ConstantInt::get(index_type, index));
+    indices.push_back(index);
 
     return builder.CreateInBoundsGEP(lookup(name), indices);
 }
 
 llvm::Value* CodegenLLVMVisitor::codegen_indexed_name(const ast::IndexedName& node) {
-    unsigned index = get_array_index_or_length(node);
-
-    // Check if index is within array bounds.
-    if (!check_array_bounds(node, index))
-        throw std::runtime_error("Error: Index is out of bounds");
-
+    llvm::Value* index = get_array_index(node);
     return create_gep(node.get_node_name(), index);
 }
 
@@ -96,20 +85,11 @@ llvm::Value* CodegenLLVMVisitor::codegen_instance_var(const ast::CodegenInstance
     if (!member_var_name->get_name()->is_indexed_name())
         throw std::runtime_error("Error: " + member_name + " is not an IndexedName!");
 
-    // Proceed to creating a GEP instruction to get the pointer to the member's element. While LLVM
-    // Helper set the indices to be Name nodes, a sanity check is added here. Note that this step
-    // can be avoided if using `get_array_index_or_length()`. However, it does not support indexing
-    // with Name/Expression at the moment. \todo: Reuse `get_array_index_or_length()` here.
+    // Proceed to creating a GEP instruction to get the pointer to the member's element.
     auto member_indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(
         member_var_name->get_name());
-    if (!member_indexed_name->get_length()->is_name())
-        throw std::runtime_error("Error: " + member_name + " has a non-Name index!");
+    llvm::Value* i64_index = get_array_index(*member_indexed_name);
 
-    // Load the index variable that will be used to access the member's element. Since we index a
-    // pointer variable, we need to extend the 32-bit integer index variable to 64-bit.
-    llvm::Value* i32_index = builder.CreateLoad(
-        lookup(member_indexed_name->get_length()->get_node_name()));
-    llvm::Value* i64_index = builder.CreateSExt(i32_index, llvm::Type::getInt64Ty(*context));
 
     // Create a indices vector for GEP to return the pointer to the element at the specified index.
     std::vector<llvm::Value*> member_indices;
@@ -135,17 +115,44 @@ llvm::Value* CodegenLLVMVisitor::codegen_instance_var(const ast::CodegenInstance
     return builder.CreateInBoundsGEP(instance_member, member_indices);
 }
 
-unsigned CodegenLLVMVisitor::get_array_index_or_length(const ast::IndexedName& indexed_name) {
-    // \todo: Support indices with expressions and names: k[i + j] = ...
-    auto integer = std::dynamic_pointer_cast<ast::Integer>(indexed_name.get_length());
+llvm::Value* CodegenLLVMVisitor::get_array_index(const ast::IndexedName& node) {
+    // Process the index expression. It can either be a Name node:
+    //    k[id]     // id is an integer
+    // or an integer expression.
+    llvm::Value* index_value;
+    if (node.get_length()->is_name()) {
+        llvm::Value* ptr = lookup(node.get_length()->get_node_name());
+        index_value = builder.CreateLoad(ptr);
+    } else {
+        node.get_length()->accept(*this);
+        index_value = values.back();
+        values.pop_back();
+    }
+
+    // Check if index is a double. While it is possible to use casting from double to integer
+    // values, we choose not to support these cases.
+    if (!index_value->getType()->isIntOrIntVectorTy())
+        throw std::runtime_error("Error: only integer indexing is supported!");
+
+    // Conventionally, in LLVM array indices are 64 bit.
+    auto index_type = llvm::cast<llvm::IntegerType>(index_value->getType());
+    llvm::Type* i64_type = llvm::Type::getInt64Ty(*context);
+    if (index_type->getBitWidth() == i64_type->getIntegerBitWidth())
+        return index_value;
+
+    return builder.CreateSExtOrTrunc(index_value, i64_type);
+}
+
+int CodegenLLVMVisitor::get_array_length(const ast::IndexedName& node) {
+    auto integer = std::dynamic_pointer_cast<ast::Integer>(node.get_length());
     if (!integer)
-        throw std::runtime_error("Error: only integer indices/length are supported!");
+        throw std::runtime_error("Error: only integer length is supported!");
 
     // Check if integer value is taken from a macro.
     if (!integer->get_macro())
         return integer->get_value();
     const auto& macro = sym_tab->lookup(integer->get_macro()->get_node_name());
-    return static_cast<unsigned>(*macro->get_value());
+    return static_cast<int>(*macro->get_value());
 }
 
 llvm::Type* CodegenLLVMVisitor::get_codegen_var_type(const ast::CodegenVarType& node) {
@@ -691,7 +698,7 @@ void CodegenLLVMVisitor::visit_codegen_var_list_statement(
         llvm::Type* var_type;
         if (identifier->is_indexed_name()) {
             auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
-            unsigned length = get_array_index_or_length(*indexed_name);
+            int length = get_array_length(*indexed_name);
             var_type = llvm::ArrayType::get(scalar_var_type, length);
         } else if (identifier->is_name()) {
             // This case corresponds to a scalar local variable. Its type is double by default.
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index c93b76b1d6..1477e0d66d 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -119,12 +119,6 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
         , builder(*context)
         , fpm(module.get()) {}
 
-    /**
-     * Checks if array index specified by the given IndexedName is within bounds
-     * \param node IndexedName representing array
-     * \return     \c true if the index is within bounds
-     */
-    bool check_array_bounds(const ast::IndexedName& node, unsigned index);
 
     /**
      * Generates LLVM code for the given IndexedName
@@ -146,14 +140,21 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      * \param index element index
      * \return GEP instruction value
      */
-    llvm::Value* create_gep(const std::string& name, unsigned index);
+    llvm::Value* create_gep(const std::string& name, llvm::Value* index);
+
+    /**
+     * Returns array index from given IndexedName
+     * \param node IndexedName representing array
+     * \return array index
+     */
+    llvm::Value* get_array_index(const ast::IndexedName& node);
 
     /**
-     * Returns array index or length from given IndexedName
+     * Returns array length from given IndexedName
      * \param node IndexedName representing array
-     * \return array index or length
+     * \return array length
      */
-    unsigned get_array_index_or_length(const ast::IndexedName& node);
+    int get_array_length(const ast::IndexedName& node);
 
     /**
      * Returns LLVM type for the given CodegenVarType node
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 5ea3a1f003..30fe8a391a 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -557,6 +557,7 @@ SCENARIO("Indexed name", "[visitor][llvm]") {
         std::string nmodl_text = R"(
             PROCEDURE foo() {
                 LOCAL x[2]
+                x[10 - 10] = 1
                 x[1] = 3
             }
         )";
@@ -565,14 +566,19 @@ SCENARIO("Indexed name", "[visitor][llvm]") {
             std::string module_string = run_llvm_visitor(nmodl_text);
             std::smatch m;
 
-            // Check GEP is created correctly to pint at array element.
-            std::regex GEP(
-                R"(%1 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i32 0, i32 1)");
-            REQUIRE(std::regex_search(module_string, m, GEP));
-
-            // Check the value is stored to the pointer.
-            std::regex store(R"(store double 3.000000e\+00, double\* %1)");
-            REQUIRE(std::regex_search(module_string, m, store));
+            // Check GEPs are created correctly to get the addresses of array elements.
+            std::regex GEP1(
+                R"(%1 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i64 0, i64 0)");
+            std::regex GEP2(
+                R"(%2 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i64 0, i64 1)");
+            REQUIRE(std::regex_search(module_string, m, GEP1));
+            REQUIRE(std::regex_search(module_string, m, GEP2));
+
+            // Check the value is stored to the correct addresses.
+            std::regex store1(R"(store double 1.000000e\+00, double\* %1)");
+            std::regex store2(R"(store double 3.000000e\+00, double\* %2)");
+            REQUIRE(std::regex_search(module_string, m, store1));
+            REQUIRE(std::regex_search(module_string, m, store2));
         }
     }
 
@@ -591,7 +597,7 @@ SCENARIO("Indexed name", "[visitor][llvm]") {
 
             // Check GEP is created correctly to pint at array element.
             std::regex GEP(
-                R"(%2 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i32 0, i32 1)");
+                R"(%2 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i64 0, i64 1)");
             REQUIRE(std::regex_search(module_string, m, GEP));
 
             // Check the value is loaded from the pointer.
@@ -603,19 +609,6 @@ SCENARIO("Indexed name", "[visitor][llvm]") {
             REQUIRE(std::regex_search(module_string, m, store));
         }
     }
-
-    GIVEN("Array with out of bounds access") {
-        std::string nmodl_text = R"(
-            PROCEDURE foo() {
-                LOCAL x[2]
-                x[5] = 3
-            }
-        )";
-
-        THEN("error is thrown") {
-            REQUIRE_THROWS_AS(run_llvm_visitor(nmodl_text), std::runtime_error);
-        }
-    }
 }
 
 //=============================================================================

From 57cb77dc3fdfc6d487706291cef22b2beaf879fb Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Sat, 13 Mar 2021 12:35:18 +0100
Subject: [PATCH 030/105] Add InstanceStruct  test data generation helper and
 unit test (#546)

* CodegenLLVMHelperVisitor improved without hardcoded parameters
* Added get_instance_struct_ptr to get instance structure for variable information
* test/unit/codegen/codegen_data_helper.cpp : first draft implementation
   of codegen data helper
* Added test for typecasting to the proper struct type

Co-authored-by: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
---
 .../llvm/codegen_llvm_helper_visitor.cpp      |  25 ++-
 .../llvm/codegen_llvm_helper_visitor.hpp      |  14 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp     |   4 +
 src/codegen/llvm/codegen_llvm_visitor.hpp     |   6 +
 test/unit/CMakeLists.txt                      |   5 +-
 test/unit/codegen/codegen_data_helper.cpp     | 186 ++++++++++++++++++
 test/unit/codegen/codegen_data_helper.hpp     | 111 +++++++++++
 .../codegen/codegen_llvm_instance_struct.cpp  | 174 ++++++++++++++++
 8 files changed, 512 insertions(+), 13 deletions(-)
 create mode 100644 test/unit/codegen/codegen_data_helper.cpp
 create mode 100644 test/unit/codegen/codegen_data_helper.hpp
 create mode 100644 test/unit/codegen/codegen_llvm_instance_struct.cpp

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index b6b417a960..67129a90ce 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -18,6 +18,13 @@ namespace codegen {
 
 using namespace fmt::literals;
 
+/// initialize static member variables
+const ast::AstNodeType CodegenLLVMHelperVisitor::INTEGER_TYPE = ast::AstNodeType::INTEGER;
+const ast::AstNodeType CodegenLLVMHelperVisitor::FLOAT_TYPE = ast::AstNodeType::DOUBLE;
+const std::string CodegenLLVMHelperVisitor::NODECOUNT_VAR = "node_count";
+const std::string CodegenLLVMHelperVisitor::VOLTAGE_VAR = "voltage";
+const std::string CodegenLLVMHelperVisitor::NODE_INDEX_VAR = "node_index";
+
 /**
  * \brief Create variable definition statement
  *
@@ -157,7 +164,12 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
     auto function = std::make_shared<ast::CodegenFunction>(fun_ret_type, name, arguments, block);
     codegen_functions.push_back(function);
 }
-
+/**
+ * \note : Order of variables is not important but we assume all pointers
+ * are added first and then scalar variables like t, dt, second_order etc.
+ * This order is assumed when we allocate data for integration testing
+ * and benchmarking purpose. See CodegenDataHelper::create_data().
+ */
 std::shared_ptr<ast::InstanceStruct> CodegenLLVMHelperVisitor::create_instance_struct() {
     ast::CodegenVarWithTypeVector codegen_vars;
 
@@ -186,15 +198,15 @@ std::shared_ptr<ast::InstanceStruct> CodegenLLVMHelperVisitor::create_instance_s
     }
 
     // add voltage and node index
-    add_var_with_type("voltage", FLOAT_TYPE, /*is_pointer=*/1);
-    add_var_with_type("node_index", INTEGER_TYPE, /*is_pointer=*/1);
+    add_var_with_type(VOLTAGE_VAR, FLOAT_TYPE, /*is_pointer=*/1);
+    add_var_with_type(NODE_INDEX_VAR, INTEGER_TYPE, /*is_pointer=*/1);
 
     // add dt, t, celsius
     add_var_with_type(naming::NTHREAD_T_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
     add_var_with_type(naming::NTHREAD_DT_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
     add_var_with_type(naming::CELSIUS_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
     add_var_with_type(naming::SECOND_ORDER_VARIABLE, INTEGER_TYPE, /*is_pointer=*/0);
-    add_var_with_type(MECH_NODECOUNT_VAR, INTEGER_TYPE, /*is_pointer=*/0);
+    add_var_with_type(NODECOUNT_VAR, INTEGER_TYPE, /*is_pointer=*/0);
 
     return std::make_shared<ast::InstanceStruct>(codegen_vars);
 }
@@ -510,7 +522,7 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
 
     /// loop constructs : initialization, condition and increment
     const auto& initialization = loop_initialization_expression(INDUCTION_VAR);
-    const auto& condition = create_expression("{} < {}"_format(INDUCTION_VAR, MECH_NODECOUNT_VAR));
+    const auto& condition = create_expression("{} < {}"_format(INDUCTION_VAR, NODECOUNT_VAR));
     const auto& increment = loop_increment_expression(INDUCTION_VAR, vector_width);
 
     /// loop body : initialization + solve blocks
@@ -524,7 +536,8 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
         /// access node index and corresponding voltage
         loop_index_statements.push_back(
             visitor::create_statement("node_id = node_index[{}]"_format(INDUCTION_VAR)));
-        loop_body_statements.push_back(visitor::create_statement("v = voltage[node_id]"));
+        loop_body_statements.push_back(
+            visitor::create_statement("v = {}[node_id]"_format(VOLTAGE_VAR)));
 
         /// read ion variables
         ion_read_statements(BlockType::State,
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index b67aa7ee09..446d5a6fd9 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -112,13 +112,8 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     /// mechanism data helper
     InstanceVarHelper instance_var_helper;
 
-    /// default integer and float node type
-    const ast::AstNodeType INTEGER_TYPE = ast::AstNodeType::INTEGER;
-    const ast::AstNodeType FLOAT_TYPE = ast::AstNodeType::DOUBLE;
-
     /// name of the mechanism instance parameter
     const std::string MECH_INSTANCE_VAR = "mech";
-    const std::string MECH_NODECOUNT_VAR = "node_count";
 
     /// name of induction variable used in the kernel.
     const std::string INDUCTION_VAR = "id";
@@ -130,6 +125,15 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     std::shared_ptr<ast::InstanceStruct> create_instance_struct();
 
   public:
+    /// default integer and float node type
+    static const ast::AstNodeType INTEGER_TYPE;
+    static const ast::AstNodeType FLOAT_TYPE;
+
+    // node count, voltage and node index variables
+    static const std::string NODECOUNT_VAR;
+    static const std::string VOLTAGE_VAR;
+    static const std::string NODE_INDEX_VAR;
+
     CodegenLLVMHelperVisitor(int vector_width)
         : vector_width(vector_width){};
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index cd2af2af69..b1182d36b9 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -259,6 +259,10 @@ llvm::Value* CodegenLLVMVisitor::get_variable_ptr(const ast::VarName& node) {
     return ptr;
 }
 
+std::shared_ptr<ast::InstanceStruct> CodegenLLVMVisitor::get_instance_struct_ptr() {
+    return instance_var_helper.instance;
+}
+
 void CodegenLLVMVisitor::run_llvm_opt_passes() {
     /// run some common optimisation passes that are commonly suggested
     fpm.add(llvm::createInstructionCombiningPass());
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 1477e0d66d..41235a1ff0 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -201,6 +201,12 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      */
     llvm::Value* get_variable_ptr(const ast::VarName& node);
 
+    /**
+     * Returns shared_ptr to generated ast::InstanceStruct
+     * \return std::shared_ptr<ast::InstanceStruct>
+     */
+    std::shared_ptr<ast::InstanceStruct> get_instance_struct_ptr();
+
     /**
      * Create a function call to an external method
      * \param name external method name
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 18741d41ac..70b538b35a 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -107,8 +107,9 @@ target_link_libraries(
   ${NMODL_WRAPPER_LIBS})
 
 if(NMODL_ENABLE_LLVM)
-  include_directories(${LLVM_INCLUDE_DIRS})
-  add_executable(testllvm visitor/main.cpp codegen/codegen_llvm_ir.cpp)
+  include_directories(${LLVM_INCLUDE_DIRS} codegen)
+  add_executable(testllvm visitor/main.cpp codegen/codegen_llvm_ir.cpp
+                          codegen/codegen_data_helper.cpp codegen/codegen_llvm_instance_struct.cpp)
   add_executable(test_llvm_runner visitor/main.cpp codegen/codegen_llvm_execution.cpp)
   target_link_libraries(
     testllvm
diff --git a/test/unit/codegen/codegen_data_helper.cpp b/test/unit/codegen/codegen_data_helper.cpp
new file mode 100644
index 0000000000..e42cfe01f3
--- /dev/null
+++ b/test/unit/codegen/codegen_data_helper.cpp
@@ -0,0 +1,186 @@
+#include <algorithm>
+
+#include "ast/codegen_var_type.hpp"
+#include "codegen/llvm/codegen_llvm_helper_visitor.hpp"
+
+#include "codegen_data_helper.hpp"
+
+namespace nmodl {
+namespace codegen {
+
+// scalar variables with default values
+const double default_nthread_dt_value = 0.025;
+const double default_nthread_t_value = 100.0;
+const double default_celsius_value = 34.0;
+const int default_second_order_value = 0;
+
+// cleanup all members and struct base pointer
+CodegenInstanceData::~CodegenInstanceData() {
+    // first free num_ptr_members members which are pointers
+    for (size_t i = 0; i < num_ptr_members; i++) {
+        free(members[i]);
+    }
+    // and then pointer to container struct
+    free(base_ptr);
+}
+
+/**
+ * \todo : various things can be improved here
+ * - if variable is voltage then initialization range could be -65 to +65
+ * - if variable is double or float then those could be initialize with
+ *   "some" floating point value between range like 1.0 to 100.0. Note
+ *   it would be nice to have unique values to avoid errors like division
+ *   by zero. We have simple implementation that is taking care of this.
+ * - if variable is integer then initialization range must be between
+ *   0 and num_elements. In practice, num_elements is number of instances
+ *   of a particular mechanism. This would be <= number of compartments
+ *   in the cell. For now, just initialize integer variables from 0 to
+ *   num_elements - 1.
+ */
+void initialize_variable(const std::shared_ptr<ast::CodegenVarWithType>& var,
+                         void* ptr,
+                         size_t initial_value,
+                         size_t num_elements) {
+    ast::AstNodeType type = var->get_type()->get_type();
+    const std::string& name = var->get_name()->get_node_name();
+
+    if (type == ast::AstNodeType::DOUBLE) {
+        const auto& generated_double_data = generate_dummy_data<double>(initial_value,
+                                                                        num_elements);
+        double* data = (double*) ptr;
+        for (size_t i = 0; i < num_elements; i++) {
+            data[i] = generated_double_data[i];
+        }
+    } else if (type == ast::AstNodeType::FLOAT) {
+        const auto& generated_float_data = generate_dummy_data<float>(initial_value, num_elements);
+        float* data = (float*) ptr;
+        for (size_t i = 0; i < num_elements; i++) {
+            data[i] = generated_float_data[i];
+        }
+    } else if (type == ast::AstNodeType::INTEGER) {
+        const auto& generated_int_data = generate_dummy_data<int>(initial_value, num_elements);
+        int* data = (int*) ptr;
+        for (size_t i = 0; i < num_elements; i++) {
+            data[i] = generated_int_data[i];
+        }
+    } else {
+        throw std::runtime_error("Unhandled data type during initialize_variable");
+    };
+}
+
+CodegenInstanceData CodegenDataHelper::create_data(size_t num_elements, size_t seed) {
+    // alignment with 64-byte to generate aligned loads/stores
+    const unsigned NBYTE_ALIGNMENT = 64;
+
+    // get variable information
+    const auto& variables = instance->get_codegen_vars();
+
+    // start building data
+    CodegenInstanceData data;
+    data.num_elements = num_elements;
+
+    // base pointer to instance object
+    void* base = nullptr;
+
+    // max size of each member : pointer / double has maximum size
+    size_t member_size = std::max(sizeof(double), sizeof(double*));
+
+    // allocate instance object with memory alignment
+    posix_memalign(&base, NBYTE_ALIGNMENT, member_size * variables.size());
+    data.base_ptr = base;
+
+    size_t offset = 0;
+    void* ptr = base;
+    size_t variable_index = 0;
+
+    // allocate each variable and allocate memory at particular offset in base pointer
+    for (auto& var: variables) {
+        // only process until first non-pointer variable
+        if (!var->get_is_pointer()) {
+            break;
+        }
+
+        // check type of variable and it's size
+        size_t member_size = 0;
+        ast::AstNodeType type = var->get_type()->get_type();
+        if (type == ast::AstNodeType::DOUBLE) {
+            member_size = sizeof(double);
+        } else if (type == ast::AstNodeType::FLOAT) {
+            member_size = sizeof(float);
+        } else if (type == ast::AstNodeType::INTEGER) {
+            member_size = sizeof(int);
+        }
+
+        // allocate memory and setup a pointer
+        void* member;
+        posix_memalign(&member, NBYTE_ALIGNMENT, member_size * num_elements);
+        initialize_variable(var, member, variable_index, num_elements);
+
+        // copy address at specific location in the struct
+        memcpy(ptr, &member, sizeof(double*));
+
+        data.offsets.push_back(offset);
+        data.members.push_back(member);
+        data.num_ptr_members++;
+
+        // all pointer types are of same size, so just use double*
+        offset += sizeof(double*);
+        ptr = (char*) base + offset;
+
+        variable_index++;
+    }
+
+    // we are now switching from pointer type to next member type (e.g. double)
+    // ideally we should use padding but switching from double* to double should
+    // already meet alignment requirements
+    for (auto& var: variables) {
+        // process only scalar elements
+        if (var->get_is_pointer()) {
+            continue;
+        }
+        ast::AstNodeType type = var->get_type()->get_type();
+        const std::string& name = var->get_name()->get_node_name();
+
+        // some default values for standard parameters
+        double value = 0;
+        if (name == naming::NTHREAD_DT_VARIABLE) {
+            value = default_nthread_dt_value;
+        } else if (name == naming::NTHREAD_T_VARIABLE) {
+            value = default_nthread_t_value;
+        } else if (name == naming::CELSIUS_VARIABLE) {
+            value = default_celsius_value;
+        } else if (name == CodegenLLVMHelperVisitor::NODECOUNT_VAR) {
+            value = num_elements;
+        } else if (name == naming::SECOND_ORDER_VARIABLE) {
+            value = default_second_order_value;
+        }
+
+        if (type == ast::AstNodeType::DOUBLE) {
+            *((double*) ptr) = value;
+            data.offsets.push_back(offset);
+            data.members.push_back(ptr);
+            offset += sizeof(double);
+            ptr = (char*) base + offset;
+        } else if (type == ast::AstNodeType::FLOAT) {
+            *((float*) ptr) = float(value);
+            data.offsets.push_back(offset);
+            data.members.push_back(ptr);
+            offset += sizeof(float);
+            ptr = (char*) base + offset;
+        } else if (type == ast::AstNodeType::INTEGER) {
+            *((int*) ptr) = int(value);
+            data.offsets.push_back(offset);
+            data.members.push_back(ptr);
+            offset += sizeof(int);
+            ptr = (char*) base + offset;
+        } else {
+            throw std::runtime_error(
+                "Unhandled type while allocating data in CodegenDataHelper::create_data()");
+        }
+    }
+
+    return data;
+}
+
+}  // namespace codegen
+}  // namespace nmodl
diff --git a/test/unit/codegen/codegen_data_helper.hpp b/test/unit/codegen/codegen_data_helper.hpp
new file mode 100644
index 0000000000..368b964147
--- /dev/null
+++ b/test/unit/codegen/codegen_data_helper.hpp
@@ -0,0 +1,111 @@
+/*************************************************************************
+ * Copyright (C) 2018-2021 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+#include <string>
+
+#include "ast/ast.hpp"
+
+/// \file
+/// \brief Generate test data for testing and benchmarking compute kernels
+
+namespace nmodl {
+namespace codegen {
+
+/// common scalar variables
+extern const double default_nthread_dt_value;
+extern const double default_nthread_t_value;
+extern const double default_celsius_value;
+extern const int default_second_order_value;
+
+/**
+ * \class CodegenInstanceData
+ * \brief Wrapper class to pack data allocate for instance
+ */
+struct CodegenInstanceData {
+    /// base pointer which can be type casted
+    /// to instance struct at run time
+    void* base_ptr = nullptr;
+
+    /// length of each member of pointer type
+    size_t num_elements = 0;
+
+    /// number of pointer members
+    size_t num_ptr_members = 0;
+
+    /// offset relative to base_ptr to locate
+    /// each member variable in instance struct
+    std::vector<size_t> offsets;
+
+    /// pointer to array allocated for each member variable
+    /// i.e. *(base_ptr + offsets[0]) will be members[0]
+    std::vector<void*> members;
+
+    // cleanup all memory allocated for type and member variables
+    ~CodegenInstanceData();
+};
+
+
+/**
+ * Generate vector of dummy data according to the template type specified
+ *
+ * For double type: generate vector starting from (initial_value + 1e-15)
+ *                  with increments of 1e-15
+ * For float type:  generate vector starting from (initial_value + 1e-6)
+ *                  with increments of 1e-6
+ * For int type:    generate vector starting from (initial_value + 1) with
+ *                  increments of 1
+ *
+ * \param inital_value Base value for initializing the data
+ * \param num_elements Number of element of the generated vector
+ * \return std::vector<T> of dummy data for testing purposes
+ */
+template <typename T>
+std::vector<T> generate_dummy_data(size_t initial_value, size_t num_elements) {
+    std::vector<T> data(num_elements);
+    T precision;
+    if (std::is_same<T, double>::value) {
+        precision = 1e-15;
+    } else if (std::is_same<T, float>::value) {
+        precision = 1e-6;
+    } else {
+        precision = 1;
+    }
+    for (size_t i = 0; i < num_elements; i++) {
+        data[i] = initial_value + precision * (i + 1);
+    }
+    return data;
+}
+
+/**
+ * \class CodegenDataHelper
+ * \brief Helper to allocate and initialize data for benchmarking
+ *
+ * The `ast::InstanceStruct` is has different number of member
+ * variables for different MOD files and hence we can't instantiate
+ * it at compile time. This class helps to inspect the variables
+ * information gathered from AST and allocate memory block that
+ * can be type cast to the `ast::InstanceStruct` corresponding
+ * to the MOD file.
+ */
+class CodegenDataHelper {
+    std::shared_ptr<ast::Program> program;
+    std::shared_ptr<ast::InstanceStruct> instance;
+
+  public:
+    CodegenDataHelper() = delete;
+    CodegenDataHelper(const std::shared_ptr<ast::Program>& program,
+                      const std::shared_ptr<ast::InstanceStruct>& instance)
+        : program(program)
+        , instance(instance) {}
+
+    CodegenInstanceData create_data(size_t num_elements, size_t seed);
+};
+
+}  // namespace codegen
+}  // namespace nmodl
diff --git a/test/unit/codegen/codegen_llvm_instance_struct.cpp b/test/unit/codegen/codegen_llvm_instance_struct.cpp
new file mode 100644
index 0000000000..4bfa1cd31c
--- /dev/null
+++ b/test/unit/codegen/codegen_llvm_instance_struct.cpp
@@ -0,0 +1,174 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include <catch/catch.hpp>
+
+#include "ast/all.hpp"
+#include "ast/program.hpp"
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "codegen_data_helper.hpp"
+#include "parser/nmodl_driver.hpp"
+#include "visitors/checkparent_visitor.hpp"
+#include "visitors/neuron_solve_visitor.hpp"
+#include "visitors/solve_block_visitor.hpp"
+#include "visitors/symtab_visitor.hpp"
+
+using namespace nmodl;
+using namespace codegen;
+using namespace visitor;
+using nmodl::parser::NmodlDriver;
+
+//=============================================================================
+// Utility to get initialized Struct Instance data
+//=============================================================================
+
+codegen::CodegenInstanceData generate_instance_data(const std::string& text,
+                                                    bool opt = false,
+                                                    bool use_single_precision = false,
+                                                    int vector_width = 1,
+                                                    size_t num_elements = 100,
+                                                    size_t seed = 1) {
+    NmodlDriver driver;
+    const auto& ast = driver.parse_string(text);
+
+    // Generate full AST and solve the BREAKPOINT block to be able to generate the Instance Struct
+    SymtabVisitor().visit_program(*ast);
+    NeuronSolveVisitor().visit_program(*ast);
+
+    codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"test",
+                                             /*output_dir=*/".",
+                                             opt,
+                                             use_single_precision,
+                                             vector_width);
+    llvm_visitor.visit_program(*ast);
+    llvm_visitor.print_module();
+    const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
+    auto codegen_data = codegen::CodegenDataHelper(ast, generated_instance_struct);
+    auto instance_data = codegen_data.create_data(num_elements, seed);
+    return instance_data;
+}
+
+template <typename T>
+bool compare(void* instance_struct_data_ptr, const std::vector<T>& generated_data) {
+    std::vector<T> instance_struct_vector;
+    std::cout << "Generated data size: " << generated_data.size() << std::endl;
+    instance_struct_vector.assign(static_cast<T*>(instance_struct_data_ptr),
+                                  static_cast<T*>(instance_struct_data_ptr) +
+                                      generated_data.size());
+    for (auto value: instance_struct_vector) {
+        std::cout << value << std::endl;
+    }
+    return instance_struct_vector == generated_data;
+}
+
+//=============================================================================
+// Simple Instance Struct creation
+//=============================================================================
+
+SCENARIO("Instance Struct creation", "[visitor][llvm][instance_struct]") {
+    GIVEN("Instantiate simple Instance Struct") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+                USEION na READ ena
+                RANGE minf, mtau
+            }
+
+            STATE {
+                m
+            }
+
+            ASSIGNED {
+                v (mV)
+                celsius (degC)
+                ena (mV)
+                minf
+                mtau
+            }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+            }
+
+            DERIVATIVE states {
+                m' =  (minf-m)/mtau
+            }
+        )";
+
+
+        THEN("instance struct elements are properly initialized") {
+            const size_t num_elements = 10;
+            constexpr static double seed = 42;
+            auto instance_data = generate_instance_data(nmodl_text,
+                                                        /*opt=*/false,
+                                                        /*use_single_precision=*/true,
+                                                        /*vector_width*/ 1,
+                                                        num_elements,
+                                                        seed);
+            size_t minf_index = 0;
+            size_t mtau_index = 1;
+            size_t m_index = 2;
+            size_t Dm_index = 3;
+            size_t ena_index = 4;
+            size_t v_unused_index = 5;
+            size_t g_unused_index = 6;
+            size_t ion_ena_index = 7;
+            size_t ion_ena_index_index = 8;
+            size_t voltage_index = 9;
+            size_t node_index_index = 10;
+            size_t t_index = 11;
+            size_t dt_index = 12;
+            size_t celsius_index = 13;
+            size_t secondorder_index = 14;
+            size_t node_count_index = 15;
+            // Check if the various instance struct fields are properly initialized
+            REQUIRE(compare(instance_data.members[minf_index],
+                            generate_dummy_data<double>(minf_index, num_elements)));
+            REQUIRE(compare(instance_data.members[ena_index],
+                            generate_dummy_data<double>(ena_index, num_elements)));
+            REQUIRE(compare(instance_data.members[ion_ena_index],
+                            generate_dummy_data<double>(ion_ena_index, num_elements)));
+            REQUIRE(compare(instance_data.members[node_index_index],
+                            generate_dummy_data<int>(node_index_index, num_elements)));
+            REQUIRE(*static_cast<double*>(instance_data.members[t_index]) ==
+                    default_nthread_t_value);
+            REQUIRE(*static_cast<int*>(instance_data.members[node_count_index]) == num_elements);
+
+            // Hard code TestInstanceType struct
+            struct TestInstanceType {
+                double* minf;
+                double* mtau;
+                double* m;
+                double* Dm;
+                double* ena;
+                double* v_unused;
+                double* g_unused;
+                double* ion_ena;
+                int* ion_ena_index;
+                double* voltage;
+                int* node_index;
+                double t;
+                double dt;
+                double celsius;
+                int secondorder;
+                int node_count;
+            };
+            // Test if TestInstanceType struct is properly initialized
+            // Cast void ptr instance_data.base_ptr to TestInstanceType*
+            TestInstanceType* instance = (TestInstanceType*) instance_data.base_ptr;
+            REQUIRE(compare(instance->minf, generate_dummy_data<double>(minf_index, num_elements)));
+            REQUIRE(compare(instance->ena, generate_dummy_data<double>(ena_index, num_elements)));
+            REQUIRE(compare(instance->ion_ena,
+                            generate_dummy_data<double>(ion_ena_index, num_elements)));
+            REQUIRE(compare(instance->node_index,
+                            generate_dummy_data<int>(node_index_index, num_elements)));
+            REQUIRE(instance->t == default_nthread_t_value);
+            REQUIRE(instance->celsius == default_celsius_value);
+            REQUIRE(instance->secondorder == default_second_order_value);
+        }
+    }
+}

From 5768d68e53d4c5b8f697f53583f377844b9fb219 Mon Sep 17 00:00:00 2001
From: Nicolas Cornu <nicolas.cornu@epfl.ch>
Date: Wed, 17 Mar 2021 12:57:02 +0100
Subject: [PATCH 031/105] Add the remainder loop for vectorization of
 DERIVATIVE block (#534)

* Implement remainder loop along with main vector loop
* Add unit test for the same

fixes #532
---
 .../llvm/codegen_llvm_helper_visitor.cpp      | 56 ++++++++----
 src/codegen/llvm/codegen_llvm_visitor.cpp     |  6 +-
 test/unit/codegen/codegen_llvm_ir.cpp         | 89 ++++++++++++++++++-
 3 files changed, 132 insertions(+), 19 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 67129a90ce..80c2dc6eb8 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -520,11 +520,6 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
 
     /// create now main compute part : for loop over channel instances
 
-    /// loop constructs : initialization, condition and increment
-    const auto& initialization = loop_initialization_expression(INDUCTION_VAR);
-    const auto& condition = create_expression("{} < {}"_format(INDUCTION_VAR, NODECOUNT_VAR));
-    const auto& increment = loop_increment_expression(INDUCTION_VAR, vector_width);
-
     /// loop body : initialization + solve blocks
     ast::StatementVector loop_def_statements;
     ast::StatementVector loop_index_statements;
@@ -583,20 +578,49 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     /// now construct a new code block which will become the body of the loop
     auto loop_block = std::make_shared<ast::StatementBlock>(loop_body);
 
-    /// convert local statement to codegenvar statement
-    convert_local_statement(*loop_block);
+    /// main loop possibly vectorized on vector_width
+    {
+        /// loop constructs : initialization, condition and increment
+        const auto& initialization = loop_initialization_expression(INDUCTION_VAR);
+        const auto& condition = create_expression("{} < {}"_format(INDUCTION_VAR, NODECOUNT_VAR));
+        const auto& increment = loop_increment_expression(INDUCTION_VAR, vector_width);
+
+        /// clone it
+        auto local_loop_block = std::shared_ptr<ast::StatementBlock>(loop_block->clone());
 
-    /// create for loop node
-    auto for_loop_statement = std::make_shared<ast::CodegenForStatement>(initialization,
-                                                                         condition,
-                                                                         increment,
-                                                                         loop_block);
+        /// convert local statement to codegenvar statement
+        convert_local_statement(*local_loop_block);
 
-    /// convert all variables inside loop body to instance variables
-    convert_to_instance_variable(*for_loop_statement, loop_index_var);
+        auto for_loop_statement_main = std::make_shared<ast::CodegenForStatement>(initialization,
+                                                                                  condition,
+                                                                                  increment,
+                                                                                  local_loop_block);
+
+        /// convert all variables inside loop body to instance variables
+        convert_to_instance_variable(*for_loop_statement_main, loop_index_var);
+
+        /// loop itself becomes one of the statement in the function
+        function_statements.push_back(for_loop_statement_main);
+    }
 
-    /// loop itself becomes one of the statement in the function
-    function_statements.push_back(for_loop_statement);
+    /// remainder loop possibly vectorized on vector_width
+    {
+        /// loop constructs : initialization, condition and increment
+        const auto& condition = create_expression("{} < {}"_format(INDUCTION_VAR, NODECOUNT_VAR));
+        const auto& increment = loop_increment_expression(INDUCTION_VAR, 1);
+
+        /// convert local statement to codegenvar statement
+        convert_local_statement(*loop_block);
+
+        auto for_loop_statement_remainder =
+            std::make_shared<ast::CodegenForStatement>(nullptr, condition, increment, loop_block);
+
+        /// convert all variables inside loop body to instance variables
+        convert_to_instance_variable(*for_loop_statement_remainder, loop_index_var);
+
+        /// loop itself becomes one of the statement in the function
+        function_statements.push_back(for_loop_statement_remainder);
+    }
 
     /// new block for the function
     auto function_block = new ast::StatementBlock(function_statements);
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index b1182d36b9..bed88046a7 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -576,8 +576,10 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     llvm::BasicBlock* for_inc = llvm::BasicBlock::Create(*context, /*Name=*/"for.inc", func, next);
     llvm::BasicBlock* exit = llvm::BasicBlock::Create(*context, /*Name=*/"for.exit", func, next);
 
-    // First, initialise the loop in the same basic block.
-    node.get_initialization()->accept(*this);
+    // First, initialise the loop in the same basic block. This block is optional.
+    if (node.get_initialization()) {
+        node.get_initialization()->accept(*this);
+    }
 
     // If the loop is to be vectorised, create a separate vector induction variable.
     // \todo: See the comment for `kernel_id_prefix`.
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 30fe8a391a..414c33972e 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -8,16 +8,25 @@
 #include <catch2/catch.hpp>
 #include <regex>
 
+#include "test/unit/utils/test_utils.hpp"
+
 #include "ast/program.hpp"
+#include "ast/statement_block.hpp"
+#include "codegen/llvm/codegen_llvm_helper_visitor.hpp"
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "parser/nmodl_driver.hpp"
 #include "visitors/checkparent_visitor.hpp"
 #include "visitors/neuron_solve_visitor.hpp"
 #include "visitors/solve_block_visitor.hpp"
 #include "visitors/symtab_visitor.hpp"
+#include "visitors/visitor_utils.hpp"
 
 using namespace nmodl;
+using namespace codegen;
 using namespace visitor;
+
+using namespace test_utils;
+
 using nmodl::parser::NmodlDriver;
 
 //=============================================================================
@@ -44,6 +53,24 @@ std::string run_llvm_visitor(const std::string& text,
     return llvm_visitor.print_module();
 }
 
+//=============================================================================
+// Utility to get specific LLVM nodes
+//=============================================================================
+
+std::vector<std::shared_ptr<ast::Ast>> run_codegen_visitor_helper(const std::string& text) {
+    NmodlDriver driver;
+    const auto& ast = driver.parse_string(text);
+
+    /// construct symbol table and run codegen helper visitor
+    SymtabVisitor().visit_program(*ast);
+    SolveBlockVisitor().visit_program(*ast);
+    CodegenLLVMHelperVisitor(8).visit_program(*ast);
+
+    const auto& nodes = collect_nodes(*ast, {ast::AstNodeType::CODEGEN_FOR_STATEMENT});
+
+    return nodes;
+}
+
 //=============================================================================
 // BinaryExpression and Double
 //=============================================================================
@@ -864,13 +891,73 @@ SCENARIO("Scalar state kernel", "[visitor][llvm]") {
 
             // Check exit block.
             std::regex exit(
-                "for\\.exit:.*\n"
+                "for\\.exit[0-9]*:.*\n"
                 "  ret void");
             REQUIRE(std::regex_search(module_string, m, exit));
         }
     }
 }
 
+//=============================================================================
+// Derivative block : test optimization
+//=============================================================================
+
+SCENARIO("Derivative block", "[visitor][llvm][derivative]") {
+    GIVEN("After helper visitor") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX hh
+                RANGE minf, mtau
+            }
+            STATE {
+                m
+            }
+            ASSIGNED {
+                v (mV)
+                minf
+                mtau (ms)
+            }
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+            }
+            DERIVATIVE states {
+                m = (minf-m)/mtau
+            }
+        )";
+
+        std::string expected_main_loop = R"(
+            for(id = 0; id<mech->node_count; id = id+8) {
+                INTEGER node_id
+                DOUBLE v
+                node_id = mech->node_index[id]
+                v = mech->voltage[node_id]
+                mech->m[id] = (mech->minf[id]-mech->m[id])/mech->mtau[id]
+                SOLVE states METHOD cnexp
+            })";
+        std::string expected_reminder_loop = R"(
+            for(; id<mech->node_count; id = id+1) {
+                INTEGER node_id
+                DOUBLE v
+                node_id = mech->node_index[id]
+                v = mech->voltage[node_id]
+                mech->m[id] = (mech->minf[id]-mech->m[id])/mech->mtau[id]
+                SOLVE states METHOD cnexp
+            })";
+
+
+        THEN("should contains 2 for loops") {
+            auto result = run_codegen_visitor_helper(nmodl_text);
+            REQUIRE(result.size() == 2);
+
+            auto main_loop = reindent_text(to_nmodl(result[0]));
+            REQUIRE(main_loop == reindent_text(expected_main_loop));
+
+            auto reminder_loop = reindent_text(to_nmodl(result[1]));
+            REQUIRE(reminder_loop == reindent_text(expected_reminder_loop));
+        }
+    }
+}
+
 //=============================================================================
 // Optimization : dead code removal
 //=============================================================================

From 0497ee3a5a42ee42daa5a3fccc94c73cfb575e34 Mon Sep 17 00:00:00 2001
From: Nicolas Cornu <nicolas.cornu@epfl.ch>
Date: Fri, 19 Mar 2021 20:59:19 +0100
Subject: [PATCH 032/105] Always initialize return variable in function block
 (#554)

* return value in PROCEDURE block was not initialised
* do the initialisation as part of ASTR transformation
* remove initialisation specific code from LLVM visitor

fixes #530
---
 .../llvm/codegen_llvm_helper_visitor.cpp      | 55 +++++++++++--------
 src/codegen/llvm/codegen_llvm_visitor.cpp     |  9 ---
 test/unit/codegen/codegen_llvm_ir.cpp         |  1 +
 3 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 80c2dc6eb8..c080dd2320 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -25,6 +25,28 @@ const std::string CodegenLLVMHelperVisitor::NODECOUNT_VAR = "node_count";
 const std::string CodegenLLVMHelperVisitor::VOLTAGE_VAR = "voltage";
 const std::string CodegenLLVMHelperVisitor::NODE_INDEX_VAR = "node_index";
 
+/// Create asr::Varname node with given a given variable name
+static ast::VarName* create_varname(const std::string& varname) {
+    return new ast::VarName(new ast::Name(new ast::String(varname)), nullptr, nullptr);
+}
+
+/**
+ * Create initialization expression
+ * @param code Usually "id = 0" as a string
+ * @return Expression representing code
+ * \todo : we can not use `create_statement_as_expression` function because
+ *         NMODL parser is using `ast::Double` type to represent all variables
+ *         including Integer. See #542.
+ */
+static std::shared_ptr<ast::Expression> int_initialization_expression(
+    const std::string& induction_var,
+    int value = 0) {
+    // create id = 0
+    const auto& id = create_varname(induction_var);
+    const auto& zero = new ast::Integer(value, nullptr);
+    return std::make_shared<ast::BinaryExpression>(id, ast::BinaryOperator(ast::BOP_ASSIGN), zero);
+}
+
 /**
  * \brief Create variable definition statement
  *
@@ -120,7 +142,8 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
     auto name = new ast::Name(new ast::String(function_name));
 
     /// return variable name has "ret_" prefix
-    auto return_var = new ast::Name(new ast::String("ret_" + function_name));
+    std::string return_var_name = "ret_{}"_format(function_name);
+    auto return_var = new ast::Name(new ast::String(return_var_name));
 
     /// return type based on node type
     ast::CodegenVarType* ret_var_type = nullptr;
@@ -137,6 +160,11 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
     /// convert local statement to codegenvar statement
     convert_local_statement(*block);
 
+    if (node.get_node_type() == ast::AstNodeType::PROCEDURE_BLOCK) {
+        block->insert_statement(statements.begin(),
+                                std::make_shared<ast::ExpressionStatement>(
+                                    int_initialization_expression(return_var_name)));
+    }
     /// insert return variable at the start of the block
     ast::CodegenVarVector codegen_vars;
     codegen_vars.emplace_back(new ast::CodegenVar(0, return_var->clone()));
@@ -462,30 +490,9 @@ void CodegenLLVMHelperVisitor::visit_function_block(ast::FunctionBlock& node) {
     create_function_for_node(node);
 }
 
-/// Create asr::Varname node with given a given variable name
-static ast::VarName* create_varname(const std::string& varname) {
-    return new ast::VarName(new ast::Name(new ast::String(varname)), nullptr, nullptr);
-}
-
-/**
- * Create for loop initialization expression
- * @param code Usually "id = 0" as a string
- * @return Expression representing code
- * \todo : we can not use `create_statement_as_expression` function because
- *         NMODL parser is using `ast::Double` type to represent all variables
- *         including Integer. See #542.
- */
-static std::shared_ptr<ast::Expression> loop_initialization_expression(
-    const std::string& induction_var) {
-    // create id = 0
-    const auto& id = create_varname(induction_var);
-    const auto& zero = new ast::Integer(0, nullptr);
-    return std::make_shared<ast::BinaryExpression>(id, ast::BinaryOperator(ast::BOP_ASSIGN), zero);
-}
-
 /**
  * Create loop increment expression `id = id + width`
- * \todo : same as loop_initialization_expression()
+ * \todo : same as int_initialization_expression()
  */
 static std::shared_ptr<ast::Expression> loop_increment_expression(const std::string& induction_var,
                                                                   int vector_width) {
@@ -581,7 +588,7 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     /// main loop possibly vectorized on vector_width
     {
         /// loop constructs : initialization, condition and increment
-        const auto& initialization = loop_initialization_expression(INDUCTION_VAR);
+        const auto& initialization = int_initialization_expression(INDUCTION_VAR);
         const auto& condition = create_expression("{} < {}"_format(INDUCTION_VAR, NODECOUNT_VAR));
         const auto& increment = loop_increment_expression(INDUCTION_VAR, vector_width);
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index bed88046a7..37b2e7fc67 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -713,15 +713,6 @@ void CodegenLLVMVisitor::visit_codegen_var_list_statement(
             throw std::runtime_error("Error: Unsupported local variable type");
         }
         llvm::Value* alloca = builder.CreateAlloca(var_type, /*ArraySize=*/nullptr, name);
-
-        // Check if the variable we process is a procedure return variable (i.e. it has a name
-        // "ret_<current_function_name>" and the function return type is integer). If so, initialise
-        // it to 0.
-        std::string ret_val_name = "ret_" + current_func->getName().str();
-        if (name == ret_val_name && current_func->getReturnType()->isIntegerTy()) {
-            llvm::Value* zero = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*context), 0);
-            builder.CreateStore(zero, alloca);
-        }
     }
 }
 
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 414c33972e..47936cfb7a 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -686,6 +686,7 @@ SCENARIO("Procedure", "[visitor][llvm]") {
             REQUIRE(std::regex_search(module_string, m, signature));
             REQUIRE(std::regex_search(module_string, m, alloc));
             REQUIRE(std::regex_search(module_string, m, store));
+            REQUIRE(std::regex_search(module_string, m, load));
             REQUIRE(std::regex_search(module_string, m, ret));
         }
     }

From 59894601baf4584712f7694824dda0a098f43c6a Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Fri, 9 Apr 2021 19:57:54 +0300
Subject: [PATCH 033/105] Running a kernel with NMODL-LLVM JIT (#549)

* Added support for arguments in the JIT llvm runner
* Adjusted tests and added a simple kernel test
* Removed printfs from the kernel
* Fixed kernel number of arguments check
* Initial integration of dataHelper for kernel tests
* Implemented a test to check the scalar kernel execution
---
 src/codegen/llvm/codegen_llvm_visitor.cpp    |  36 +++++
 src/codegen/llvm/codegen_llvm_visitor.hpp    |  14 ++
 src/codegen/llvm/jit_driver.hpp              |  36 +++--
 src/codegen/llvm/main.cpp                    |   2 +-
 test/unit/CMakeLists.txt                     |   3 +-
 test/unit/codegen/codegen_llvm_execution.cpp | 150 +++++++++++++++++--
 6 files changed, 218 insertions(+), 23 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 37b2e7fc67..5fdd906480 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -923,5 +923,41 @@ void CodegenLLVMVisitor::visit_while_statement(const ast::WhileStatement& node)
     builder.SetInsertPoint(exit);
 }
 
+void CodegenLLVMVisitor::wrap_kernel_function(const std::string& kernel_name) {
+    // Get the kernel function and the instance struct type.
+    auto kernel = module->getFunction(kernel_name);
+    if (!kernel)
+        throw std::runtime_error("Kernel " + kernel_name + " is not found!");
+
+    if (std::distance(kernel->args().begin(), kernel->args().end()) != 1)
+        throw std::runtime_error("Kernel " + kernel_name + " must have a single argument!");
+
+    auto instance_struct_ptr_type = llvm::dyn_cast<llvm::PointerType>(kernel->getArg(0)->getType());
+    if (!instance_struct_ptr_type)
+        throw std::runtime_error("Kernel " + kernel_name +
+                                 " does not have an instance struct pointer argument!");
+
+    // Create a wrapper void function that takes a void pointer as a single argument.
+    llvm::Type* void_type = llvm::Type::getVoidTy(*context);
+    llvm::Type* i32_type = llvm::Type::getInt32Ty(*context);
+    llvm::Type* void_ptr_type = llvm::PointerType::get(void_type, /*AddressSpace=*/0);
+    llvm::Function* wrapper_func = llvm::Function::Create(
+        llvm::FunctionType::get(i32_type, {void_ptr_type}, /*isVarArg=*/false),
+        llvm::Function::ExternalLinkage,
+        "__" + kernel_name + "_wrapper",
+        *module);
+    llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", wrapper_func);
+    builder.SetInsertPoint(body);
+
+    // Proceed with bitcasting the void pointer to the struct pointer type, calling the kernel and
+    // adding a terminator.
+    llvm::Value* bitcasted = builder.CreateBitCast(wrapper_func->getArg(0),
+                                                   instance_struct_ptr_type);
+    std::vector<llvm::Value*> args;
+    args.push_back(bitcasted);
+    builder.CreateCall(kernel, args);
+    builder.CreateRet(llvm::ConstantInt::get(i32_type, 0));
+}
+
 }  // namespace codegen
 }  // namespace nmodl
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 41235a1ff0..b099646b07 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -237,6 +237,14 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      */
     void emit_procedure_or_function_declaration(const ast::CodegenFunction& node);
 
+    /**
+     * Return InstanceVarHelper
+     * \return InstanceVarHelper
+     */
+    InstanceVarHelper get_instance_var_helper() {
+        return instance_var_helper;
+    }
+
     /**
      * Return module pointer
      * \return LLVM IR module pointer
@@ -321,6 +329,12 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
         os.flush();
         return str;
     }
+
+    /**
+     * For the given kernel function, wraps it into another function that uses void* to pass the
+     * data to the kernel \param kernel_name kernel name to be wrapped
+     */
+    void wrap_kernel_function(const std::string& kernel_name);
 };
 
 /** \} */  // end of llvm_backends
diff --git a/src/codegen/llvm/jit_driver.hpp b/src/codegen/llvm/jit_driver.hpp
index d1e9a9412f..23c8fca612 100644
--- a/src/codegen/llvm/jit_driver.hpp
+++ b/src/codegen/llvm/jit_driver.hpp
@@ -39,15 +39,27 @@ class JITDriver {
     /// Initialize the JIT.
     void init();
 
-    /// Lookup the entry-point in the JIT and execute it, returning the result.
-    template <typename T>
-    T execute(const std::string& entry_point) {
+    /// Lookup the entry-point without arguments in the JIT and execute it, returning the result.
+    template <typename ReturnType>
+    ReturnType execute_without_arguments(const std::string& entry_point) {
         auto expected_symbol = jit->lookup(entry_point);
         if (!expected_symbol)
             throw std::runtime_error("Error: entry-point symbol not found in JIT\n");
 
-        auto (*res)() = (T(*)())(intptr_t) expected_symbol->getAddress();
-        T result = res();
+        auto (*res)() = (ReturnType(*)())(intptr_t) expected_symbol->getAddress();
+        ReturnType result = res();
+        return result;
+    }
+
+    /// Lookup the entry-point with an argument in the JIT and execute it, returning the result.
+    template <typename ReturnType, typename ArgType>
+    ReturnType execute_with_arguments(const std::string& entry_point, ArgType arg) {
+        auto expected_symbol = jit->lookup(entry_point);
+        if (!expected_symbol)
+            throw std::runtime_error("Error: entry-point symbol not found in JIT\n");
+
+        auto (*res)(ArgType) = (ReturnType(*)(ArgType))(intptr_t) expected_symbol->getAddress();
+        ReturnType result = res(arg);
         return result;
     }
 
@@ -71,10 +83,16 @@ class Runner {
         driver->init();
     }
 
-    /// Run the entry-point function.
-    template <typename T>
-    double run(const std::string& entry_point) {
-        return driver->execute<T>(entry_point);
+    /// Run the entry-point function without arguments.
+    template <typename ReturnType>
+    ReturnType run_without_arguments(const std::string& entry_point) {
+        return driver->template execute_without_arguments<ReturnType>(entry_point);
+    }
+
+    /// Run the entry-point function with a pointer to the data as an argument.
+    template <typename ReturnType, typename ArgType>
+    ReturnType run_with_argument(const std::string& entry_point, ArgType arg) {
+        return driver->template execute_with_arguments<ReturnType, ArgType>(entry_point, arg);
     }
 };
 
diff --git a/src/codegen/llvm/main.cpp b/src/codegen/llvm/main.cpp
index 11ea178cb4..acbdc37f19 100644
--- a/src/codegen/llvm/main.cpp
+++ b/src/codegen/llvm/main.cpp
@@ -67,7 +67,7 @@ int main(int argc, const char* argv[]) {
     Runner runner(std::move(module));
 
     // Since only double type is supported, provide explicit double type to the running function.
-    auto r = runner.run<double>(entry_point_name);
+    auto r = runner.run_without_arguments<double>(entry_point_name);
     fprintf(stderr, "Result: %f\n", r);
 
     return 0;
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 70b538b35a..e5f10180cb 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -110,7 +110,8 @@ if(NMODL_ENABLE_LLVM)
   include_directories(${LLVM_INCLUDE_DIRS} codegen)
   add_executable(testllvm visitor/main.cpp codegen/codegen_llvm_ir.cpp
                           codegen/codegen_data_helper.cpp codegen/codegen_llvm_instance_struct.cpp)
-  add_executable(test_llvm_runner visitor/main.cpp codegen/codegen_llvm_execution.cpp)
+  add_executable(test_llvm_runner visitor/main.cpp codegen/codegen_data_helper.cpp
+                                  codegen/codegen_llvm_execution.cpp)
   target_link_libraries(
     testllvm
     Catch2::Catch2
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index a14f7a226c..fcf078a34b 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -11,8 +11,11 @@
 #include "ast/program.hpp"
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "codegen/llvm/jit_driver.hpp"
+#include "codegen_data_helper.hpp"
 #include "parser/nmodl_driver.hpp"
 #include "visitors/checkparent_visitor.hpp"
+#include "visitors/neuron_solve_visitor.hpp"
+#include "visitors/solve_block_visitor.hpp"
 #include "visitors/symtab_visitor.hpp"
 
 using namespace nmodl;
@@ -23,7 +26,43 @@ using nmodl::parser::NmodlDriver;
 static double EPSILON = 1e-15;
 
 //=============================================================================
-// No optimisations
+// Utilities for testing.
+//=============================================================================
+
+struct InstanceTestInfo {
+    codegen::CodegenInstanceData& instance;
+    codegen::CodegenLLVMVisitor& visitor;
+    int num_elements;
+};
+
+template <typename T>
+bool check_instance_variable(InstanceTestInfo& instance_info,
+                             std::vector<T>& expected,
+                             const std::string& variable_name) {
+    std::vector<T> actual;
+    int variable_index = instance_info.visitor.get_instance_var_helper().get_variable_index(
+        variable_name);
+    actual.assign(static_cast<T*>(instance_info.instance.members[variable_index]),
+                  static_cast<T*>(instance_info.instance.members[variable_index]) +
+                      instance_info.num_elements);
+    // While we are comparing double types as well, for simplicity the test cases are hand-crafted
+    // so that no floating-point arithmetic is really involved.
+    return actual == expected;
+}
+
+template <typename T>
+void initialise_instance_variable(InstanceTestInfo& instance_info,
+                                  std::vector<T>& data,
+                                  const std::string& variable_name) {
+    int variable_index = instance_info.visitor.get_instance_var_helper().get_variable_index(
+        variable_name);
+    T* data_start = static_cast<T*>(instance_info.instance.members[variable_index]);
+    for (int i = 0; i < instance_info.num_elements; ++i)
+        *(data_start + i) = data[i];
+}
+
+//=============================================================================
+// Simple functions: no optimisations
 //=============================================================================
 
 SCENARIO("Arithmetic expression", "[llvm][runner]") {
@@ -60,6 +99,10 @@ SCENARIO("Arithmetic expression", "[llvm][runner]") {
 
             PROCEDURE foo() {}
 
+            FUNCTION with_argument(x) {
+                with_argument = x
+            }
+
             FUNCTION loop() {
                 LOCAL i, j, sum, result
                 result = 0
@@ -92,26 +135,31 @@ SCENARIO("Arithmetic expression", "[llvm][runner]") {
         Runner runner(std::move(m));
 
         THEN("functions are evaluated correctly") {
-            auto exp_result = runner.run<double>("exponential");
+            auto exp_result = runner.run_without_arguments<double>("exponential");
             REQUIRE(fabs(exp_result - 2.718281828459045) < EPSILON);
 
-            auto constant_result = runner.run<double>("constant");
+            auto constant_result = runner.run_without_arguments<double>("constant");
             REQUIRE(fabs(constant_result - 10.0) < EPSILON);
 
-            auto arithmetic_result = runner.run<double>("arithmetic");
+            auto arithmetic_result = runner.run_without_arguments<double>("arithmetic");
             REQUIRE(fabs(arithmetic_result - 2.1) < EPSILON);
 
-            auto function_call_result = runner.run<double>("function_call");
+            auto function_call_result = runner.run_without_arguments<double>("function_call");
             REQUIRE(fabs(function_call_result - 1.0) < EPSILON);
 
-            auto loop_result = runner.run<double>("loop");
+            double data = 10.0;
+            auto with_argument_result = runner.run_with_argument<double, double>("with_argument",
+                                                                                 data);
+            REQUIRE(fabs(with_argument_result - 10.0) < EPSILON);
+
+            auto loop_result = runner.run_without_arguments<double>("loop");
             REQUIRE(fabs(loop_result - 90.0) < EPSILON);
         }
     }
 }
 
 //=============================================================================
-// With optimisations
+// Simple functions: with optimisations
 //=============================================================================
 
 SCENARIO("Optimised arithmetic expression", "[llvm][runner]") {
@@ -189,23 +237,101 @@ SCENARIO("Optimised arithmetic expression", "[llvm][runner]") {
 
         THEN("optimizations preserve function results") {
             // Check exponential is turned into a constant.
-            auto exp_result = runner.run<double>("exponential");
+            auto exp_result = runner.run_without_arguments<double>("exponential");
             REQUIRE(fabs(exp_result - 2.718281828459045) < EPSILON);
 
             // Check constant folding.
-            auto constant_result = runner.run<double>("constant");
+            auto constant_result = runner.run_without_arguments<double>("constant");
             REQUIRE(fabs(constant_result - 10.0) < EPSILON);
 
             // Check nested conditionals
-            auto conditionals_result = runner.run<double>("conditionals");
+            auto conditionals_result = runner.run_without_arguments<double>("conditionals");
             REQUIRE(fabs(conditionals_result - 4.0) < EPSILON);
 
             // Check constant folding.
-            auto arithmetic_result = runner.run<double>("arithmetic");
+            auto arithmetic_result = runner.run_without_arguments<double>("arithmetic");
             REQUIRE(fabs(arithmetic_result - 2.1) < EPSILON);
 
-            auto function_call_result = runner.run<double>("function_call");
+            auto function_call_result = runner.run_without_arguments<double>("function_call");
             REQUIRE(fabs(function_call_result - 1.0) < EPSILON);
         }
     }
 }
+
+//=============================================================================
+// State scalar kernel.
+//=============================================================================
+
+SCENARIO("Simple scalar kernel", "[llvm][runner]") {
+    GIVEN("Simple MOD file with a state update") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+                NONSPECIFIC_CURRENT i
+                RANGE x0, x1
+            }
+
+            STATE {
+                x
+            }
+
+            ASSIGNED {
+                v
+                x0
+                x1
+            }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+                i = 0
+            }
+
+            DERIVATIVE states {
+                x = (x0 - x) / x1
+            }
+        )";
+
+
+        NmodlDriver driver;
+        const auto& ast = driver.parse_string(nmodl_text);
+
+        // Run passes on the AST to generate LLVM.
+        SymtabVisitor().visit_program(*ast);
+        NeuronSolveVisitor().visit_program(*ast);
+        SolveBlockVisitor().visit_program(*ast);
+        codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
+                                                 /*output_dir=*/".",
+                                                 /*opt_passes=*/false,
+                                                 /*use_single_precision=*/false,
+                                                 /*vector_width=*/1);
+        llvm_visitor.visit_program(*ast);
+        llvm_visitor.wrap_kernel_function("nrn_state_test");
+
+        // Create the instance struct data.
+        int num_elements = 4;
+        const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
+        auto codegen_data = codegen::CodegenDataHelper(ast, generated_instance_struct);
+        auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1);
+
+        // Fill the instance struct data with some values.
+        std::vector<double> x = {1.0, 2.0, 3.0, 4.0};
+        std::vector<double> x0 = {5.0, 5.0, 5.0, 5.0};
+        std::vector<double> x1 = {1.0, 1.0, 1.0, 1.0};
+
+        InstanceTestInfo instance_info{instance_data, llvm_visitor, num_elements};
+        initialise_instance_variable(instance_info, x, "x");
+        initialise_instance_variable(instance_info, x0, "x0");
+        initialise_instance_variable(instance_info, x1, "x1");
+
+        // Set up the JIT runner.
+        std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
+        Runner runner(std::move(module));
+
+        THEN("Values in struct have changed according to the formula") {
+            runner.run_with_argument<int, void*>("__nrn_state_test_wrapper",
+                                                 instance_data.base_ptr);
+            std::vector<double> x_expected = {4.0, 3.0, 2.0, 1.0};
+            REQUIRE(check_instance_variable(instance_info, x_expected, "x"));
+        }
+    }
+}

From b5ceca6e66bdc6392594a632664f6d143ea872bb Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Fri, 9 Apr 2021 21:42:09 +0300
Subject: [PATCH 034/105] Loop epilogue fix for LLVM visitor helper (#567)

* Added renaming for loop local variables in CodegenForStatement
* Fixed trip count in main loop and removed epilogue loop for scalar case
* Refactored loop remainder tests and added a scalar case
* Change `reminder` to `epilogue` in the test
---
 .../llvm/codegen_llvm_helper_visitor.cpp      | 68 +++++++++++++--
 test/unit/codegen/codegen_llvm_ir.cpp         | 84 +++++++++++++++----
 2 files changed, 126 insertions(+), 26 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index c080dd2320..270794fc9a 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -11,6 +11,7 @@
 #include "ast/all.hpp"
 #include "codegen/codegen_helper_visitor.hpp"
 #include "utils/logger.hpp"
+#include "visitors/rename_visitor.hpp"
 #include "visitors/visitor_utils.hpp"
 
 namespace nmodl {
@@ -25,6 +26,8 @@ const std::string CodegenLLVMHelperVisitor::NODECOUNT_VAR = "node_count";
 const std::string CodegenLLVMHelperVisitor::VOLTAGE_VAR = "voltage";
 const std::string CodegenLLVMHelperVisitor::NODE_INDEX_VAR = "node_index";
 
+static constexpr const char epilogue_variable_prefix[] = "epilogue_";
+
 /// Create asr::Varname node with given a given variable name
 static ast::VarName* create_varname(const std::string& varname) {
     return new ast::VarName(new ast::Name(new ast::String(varname)), nullptr, nullptr);
@@ -507,6 +510,39 @@ static std::shared_ptr<ast::Expression> loop_increment_expression(const std::str
                                                    inc_expr);
 }
 
+/**
+ * Create loop count comparison expression
+ *
+ * Based on if loop is vectorised or not, the condition for loop
+ * is different. For example:
+ *  - serial loop : `id < node_count`
+ *  - vector loop : `id < (node_count - vector_width + 1)`
+ *
+ * \todo : same as int_initialization_expression()
+ */
+static std::shared_ptr<ast::Expression> loop_count_expression(const std::string& induction_var,
+                                                              const std::string& node_count,
+                                                              int vector_width) {
+    const auto& id = create_varname(induction_var);
+    const auto& mech_node_count = create_varname(node_count);
+
+    // For non-vectorised loop, the condition is id < mech->node_count
+    if (vector_width == 1) {
+        return std::make_shared<ast::BinaryExpression>(id->clone(),
+                                                       ast::BinaryOperator(ast::BOP_LESS),
+                                                       mech_node_count);
+    }
+
+    // For vectorised loop, the condition is id < mech->node_count - vector_width + 1
+    const auto& remainder = new ast::Integer(vector_width - 1, /*macro=*/nullptr);
+    const auto& count = new ast::BinaryExpression(mech_node_count,
+                                                  ast::BinaryOperator(ast::BOP_SUBTRACTION),
+                                                  remainder);
+    return std::make_shared<ast::BinaryExpression>(id->clone(),
+                                                   ast::BinaryOperator(ast::BOP_LESS),
+                                                   count);
+}
+
 /**
  * \brief Convert ast::NrnStateBlock to corresponding code generation function nrn_state
  * @param node AST node representing ast::NrnStateBlock
@@ -522,8 +558,9 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
 
     /// create variable definition for loop index and insert at the beginning
     std::string loop_index_var = "id";
-    std::vector<std::string> int_variables{"id"};
-    function_statements.push_back(create_local_variable_statement(int_variables, INTEGER_TYPE));
+    std::vector<std::string> induction_variables{"id"};
+    function_statements.push_back(
+        create_local_variable_statement(induction_variables, INTEGER_TYPE));
 
     /// create now main compute part : for loop over channel instances
 
@@ -531,10 +568,10 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     ast::StatementVector loop_def_statements;
     ast::StatementVector loop_index_statements;
     ast::StatementVector loop_body_statements;
-    {
-        std::vector<std::string> int_variables{"node_id"};
-        std::vector<std::string> double_variables{"v"};
 
+    std::vector<std::string> int_variables{"node_id"};
+    std::vector<std::string> double_variables{"v"};
+    {
         /// access node index and corresponding voltage
         loop_index_statements.push_back(
             visitor::create_statement("node_id = node_index[{}]"_format(INDUCTION_VAR)));
@@ -589,7 +626,7 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     {
         /// loop constructs : initialization, condition and increment
         const auto& initialization = int_initialization_expression(INDUCTION_VAR);
-        const auto& condition = create_expression("{} < {}"_format(INDUCTION_VAR, NODECOUNT_VAR));
+        const auto& condition = loop_count_expression(INDUCTION_VAR, NODECOUNT_VAR, vector_width);
         const auto& increment = loop_increment_expression(INDUCTION_VAR, vector_width);
 
         /// clone it
@@ -611,10 +648,11 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     }
 
     /// remainder loop possibly vectorized on vector_width
-    {
+    if (vector_width > 1) {
         /// loop constructs : initialization, condition and increment
-        const auto& condition = create_expression("{} < {}"_format(INDUCTION_VAR, NODECOUNT_VAR));
-        const auto& increment = loop_increment_expression(INDUCTION_VAR, 1);
+        const auto& condition =
+            loop_count_expression(INDUCTION_VAR, NODECOUNT_VAR, /*vector_width=*/1);
+        const auto& increment = loop_increment_expression(INDUCTION_VAR, /*vector_width=*/1);
 
         /// convert local statement to codegenvar statement
         convert_local_statement(*loop_block);
@@ -622,6 +660,18 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
         auto for_loop_statement_remainder =
             std::make_shared<ast::CodegenForStatement>(nullptr, condition, increment, loop_block);
 
+        const auto& loop_statements = for_loop_statement_remainder->get_statement_block();
+        // \todo: Change RenameVisitor to take a vector of names to which it would append a single
+        // prefix.
+        for (const auto& name: int_variables) {
+            visitor::RenameVisitor v(name, epilogue_variable_prefix + name);
+            loop_statements->accept(v);
+        }
+        for (const auto& name: double_variables) {
+            visitor::RenameVisitor v(name, epilogue_variable_prefix + name);
+            loop_statements->accept(v);
+        }
+
         /// convert all variables inside loop body to instance variables
         convert_to_instance_variable(*for_loop_statement_remainder, loop_index_var);
 
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 47936cfb7a..942a908be6 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -54,19 +54,21 @@ std::string run_llvm_visitor(const std::string& text,
 }
 
 //=============================================================================
-// Utility to get specific LLVM nodes
+// Utility to get specific NMODL AST nodes
 //=============================================================================
 
-std::vector<std::shared_ptr<ast::Ast>> run_codegen_visitor_helper(const std::string& text) {
+std::vector<std::shared_ptr<ast::Ast>> run_llvm_visitor_helper(
+    const std::string& text,
+    int vector_width,
+    const std::vector<ast::AstNodeType>& nodes_to_collect) {
     NmodlDriver driver;
     const auto& ast = driver.parse_string(text);
 
-    /// construct symbol table and run codegen helper visitor
     SymtabVisitor().visit_program(*ast);
     SolveBlockVisitor().visit_program(*ast);
-    CodegenLLVMHelperVisitor(8).visit_program(*ast);
+    CodegenLLVMHelperVisitor(vector_width).visit_program(*ast);
 
-    const auto& nodes = collect_nodes(*ast, {ast::AstNodeType::CODEGEN_FOR_STATEMENT});
+    const auto& nodes = collect_nodes(*ast, nodes_to_collect);
 
     return nodes;
 }
@@ -903,11 +905,12 @@ SCENARIO("Scalar state kernel", "[visitor][llvm]") {
 // Derivative block : test optimization
 //=============================================================================
 
-SCENARIO("Derivative block", "[visitor][llvm][derivative]") {
-    GIVEN("After helper visitor") {
+SCENARIO("Scalar derivative block", "[visitor][llvm][derivative]") {
+    GIVEN("After LLVM helper visitor transformations") {
         std::string nmodl_text = R"(
             NEURON {
                 SUFFIX hh
+                NONSPECIFIC_CURRENT il
                 RANGE minf, mtau
             }
             STATE {
@@ -920,41 +923,88 @@ SCENARIO("Derivative block", "[visitor][llvm][derivative]") {
             }
             BREAKPOINT {
                 SOLVE states METHOD cnexp
+                il = 2
             }
             DERIVATIVE states {
                 m = (minf-m)/mtau
             }
         )";
 
-        std::string expected_main_loop = R"(
-            for(id = 0; id<mech->node_count; id = id+8) {
+        std::string expected_loop = R"(
+            for(id = 0; id<mech->node_count; id = id+1) {
                 INTEGER node_id
                 DOUBLE v
                 node_id = mech->node_index[id]
                 v = mech->voltage[node_id]
                 mech->m[id] = (mech->minf[id]-mech->m[id])/mech->mtau[id]
-                SOLVE states METHOD cnexp
             })";
-        std::string expected_reminder_loop = R"(
-            for(; id<mech->node_count; id = id+1) {
+
+        THEN("a single scalar loops is constructed") {
+            auto result = run_llvm_visitor_helper(nmodl_text,
+                                                  /*vector_width=*/1,
+                                                  {ast::AstNodeType::CODEGEN_FOR_STATEMENT});
+            REQUIRE(result.size() == 1);
+
+            auto main_loop = reindent_text(to_nmodl(result[0]));
+            REQUIRE(main_loop == reindent_text(expected_loop));
+        }
+    }
+}
+
+SCENARIO("Vectorised derivative block", "[visitor][llvm][derivative]") {
+    GIVEN("After LLVM helper visitor transformations") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX hh
+                NONSPECIFIC_CURRENT il
+                RANGE minf, mtau
+            }
+            STATE {
+                m
+            }
+            ASSIGNED {
+                v (mV)
+                minf
+                mtau (ms)
+            }
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+                il = 2
+            }
+            DERIVATIVE states {
+                m = (minf-m)/mtau
+            }
+        )";
+
+        std::string expected_main_loop = R"(
+            for(id = 0; id<mech->node_count-7; id = id+8) {
                 INTEGER node_id
                 DOUBLE v
                 node_id = mech->node_index[id]
                 v = mech->voltage[node_id]
                 mech->m[id] = (mech->minf[id]-mech->m[id])/mech->mtau[id]
-                SOLVE states METHOD cnexp
+            })";
+        std::string expected_epilogue_loop = R"(
+            for(; id<mech->node_count; id = id+1) {
+                INTEGER epilogue_node_id
+                DOUBLE epilogue_v
+                epilogue_node_id = mech->node_index[id]
+                epilogue_v = mech->voltage[epilogue_node_id]
+                mech->m[id] = (mech->minf[id]-mech->m[id])/mech->mtau[id]
             })";
 
 
-        THEN("should contains 2 for loops") {
-            auto result = run_codegen_visitor_helper(nmodl_text);
+        THEN("vector and epilogue scalar loops are constructed") {
+            auto result = run_llvm_visitor_helper(nmodl_text,
+                                                  /*vector_width=*/8,
+                                                  {ast::AstNodeType::CODEGEN_FOR_STATEMENT});
             REQUIRE(result.size() == 2);
 
             auto main_loop = reindent_text(to_nmodl(result[0]));
             REQUIRE(main_loop == reindent_text(expected_main_loop));
 
-            auto reminder_loop = reindent_text(to_nmodl(result[1]));
-            REQUIRE(reminder_loop == reindent_text(expected_reminder_loop));
+            auto epilogue_loop = reindent_text(to_nmodl(result[1]));
+            REQUIRE(epilogue_loop == reindent_text(expected_epilogue_loop));
         }
     }
 }

From 9e2284b386ef804ed18949d30caee08af5fa09f4 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Sat, 10 Apr 2021 22:00:50 +0300
Subject: [PATCH 035/105] Gather support and vectorisation fixes for LLVM code
 generation (#568)

* Add gather support
* Fixed vectorisation patterns and added simple JIT tests
* Added IR regex test for gather
---
 src/codegen/llvm/codegen_llvm_visitor.cpp    | 103 ++++++++-----------
 test/unit/codegen/codegen_llvm_execution.cpp | 103 +++++++++++++++++--
 test/unit/codegen/codegen_llvm_ir.cpp        |  55 ++++++++++
 3 files changed, 191 insertions(+), 70 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 5fdd906480..a42201824c 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -23,11 +23,6 @@ namespace codegen {
 
 static constexpr const char instance_struct_type_name[] = "__instance_var__type";
 
-// The prefix is used to create a vectorised id that can be used as index to GEPs. However, for
-// simple aligned vector loads and stores vector id is not needed. This is because we can bitcast
-// the pointer to the vector pointer! \todo: Consider removing this.
-static constexpr const char kernel_id_prefix[] = "__vec_";
-
 
 /****************************************************************************************/
 /*                            Helper routines                                           */
@@ -88,12 +83,11 @@ llvm::Value* CodegenLLVMVisitor::codegen_instance_var(const ast::CodegenInstance
     // Proceed to creating a GEP instruction to get the pointer to the member's element.
     auto member_indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(
         member_var_name->get_name());
-    llvm::Value* i64_index = get_array_index(*member_indexed_name);
 
+    if (!member_indexed_name->get_length()->is_name())
+        throw std::runtime_error("Error: " + member_name + " must be indexed with a variable!");
 
-    // Create a indices vector for GEP to return the pointer to the element at the specified index.
-    std::vector<llvm::Value*> member_indices;
-    member_indices.push_back(i64_index);
+    llvm::Value* i64_index = get_array_index(*member_indexed_name);
 
     // The codegen variable type is always a scalar, so we need to transform it to a pointer. Then
     // load the member which would be indexed later.
@@ -101,18 +95,25 @@ llvm::Value* CodegenLLVMVisitor::codegen_instance_var(const ast::CodegenInstance
     llvm::Value* instance_member =
         builder.CreateLoad(llvm::PointerType::get(type, /*AddressSpace=*/0), member_ptr);
 
+    // Check if the code is vectorised and the index is indirect.
+    std::string id = member_indexed_name->get_length()->get_node_name();
+    if (id != kernel_id && is_kernel_code && vector_width > 1) {
+        // Calculate a vector of addresses via GEP instruction, and then created a gather to load
+        // indirectly.
+        llvm::Value* addresses = builder.CreateInBoundsGEP(instance_member, {i64_index});
+        return builder.CreateMaskedGather(addresses, llvm::Align());
+    }
+
+    llvm::Value* member_addr = builder.CreateInBoundsGEP(instance_member, {i64_index});
 
     // If the code is vectorised, then bitcast to a vector pointer.
     if (is_kernel_code && vector_width > 1) {
         llvm::Type* vector_type =
             llvm::PointerType::get(llvm::FixedVectorType::get(type, vector_width),
                                    /*AddressSpace=*/0);
-        llvm::Value* instance_member_bitcasted = builder.CreateBitCast(instance_member,
-                                                                       vector_type);
-        return builder.CreateInBoundsGEP(instance_member_bitcasted, member_indices);
+        return builder.CreateBitCast(member_addr, vector_type);
     }
-
-    return builder.CreateInBoundsGEP(instance_member, member_indices);
+    return member_addr;
 }
 
 llvm::Value* CodegenLLVMVisitor::get_array_index(const ast::IndexedName& node) {
@@ -135,12 +136,19 @@ llvm::Value* CodegenLLVMVisitor::get_array_index(const ast::IndexedName& node) {
         throw std::runtime_error("Error: only integer indexing is supported!");
 
     // Conventionally, in LLVM array indices are 64 bit.
-    auto index_type = llvm::cast<llvm::IntegerType>(index_value->getType());
     llvm::Type* i64_type = llvm::Type::getInt64Ty(*context);
-    if (index_type->getBitWidth() == i64_type->getIntegerBitWidth())
-        return index_value;
+    if (auto index_type = llvm::dyn_cast<llvm::IntegerType>(index_value->getType())) {
+        if (index_type->getBitWidth() == i64_type->getIntegerBitWidth())
+            return index_value;
+        return builder.CreateSExtOrTrunc(index_value, i64_type);
+    }
 
-    return builder.CreateSExtOrTrunc(index_value, i64_type);
+    auto vector_type = llvm::cast<llvm::FixedVectorType>(index_value->getType());
+    auto element_type = llvm::cast<llvm::IntegerType>(vector_type->getElementType());
+    if (element_type->getBitWidth() == i64_type->getIntegerBitWidth())
+        return index_value;
+    return builder.CreateSExtOrTrunc(index_value,
+                                     llvm::FixedVectorType::get(i64_type, vector_width));
 }
 
 int CodegenLLVMVisitor::get_array_length(const ast::IndexedName& node) {
@@ -167,8 +175,6 @@ llvm::Type* CodegenLLVMVisitor::get_codegen_var_type(const ast::CodegenVarType&
         return llvm::Type::getInt32Ty(*context);
     case ast::AstNodeType::VOID:
         return llvm::Type::getVoidTy(*context);
-    // TODO :: George/Ioannis : Here we have to also return INSTANCE_STRUCT type
-    //         as it is used as an argument to nrn_state function
     default:
         throw std::runtime_error("Error: expecting a type in CodegenVarType node\n");
     }
@@ -576,31 +582,15 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     llvm::BasicBlock* for_inc = llvm::BasicBlock::Create(*context, /*Name=*/"for.inc", func, next);
     llvm::BasicBlock* exit = llvm::BasicBlock::Create(*context, /*Name=*/"for.exit", func, next);
 
-    // First, initialise the loop in the same basic block. This block is optional.
+    // First, initialise the loop in the same basic block. This block is optional. Also, reset
+    // vector width to 1 if processing the remainder of the loop.
+    int tmp_vector_width = vector_width;
     if (node.get_initialization()) {
         node.get_initialization()->accept(*this);
+    } else {
+        vector_width = 1;
     }
 
-    // If the loop is to be vectorised, create a separate vector induction variable.
-    // \todo: See the comment for `kernel_id_prefix`.
-    if (vector_width > 1) {
-        // First, create a vector type and alloca for it.
-        llvm::Type* i32_type = llvm::Type::getInt32Ty(*context);
-        llvm::Type* vec_type = llvm::FixedVectorType::get(i32_type, vector_width);
-        llvm::Value* vec_alloca = builder.CreateAlloca(vec_type,
-                                                       /*ArraySize=*/nullptr,
-                                                       /*Name=*/kernel_id_prefix + kernel_id);
-
-        // Then, store the initial value of <0, 1, ..., [W-1]> o the alloca pointer, where W is the
-        // vector width.
-        std::vector<llvm::Constant*> constants;
-        for (unsigned i = 0; i < vector_width; ++i) {
-            const auto& element = llvm::ConstantInt::get(i32_type, i);
-            constants.push_back(element);
-        }
-        llvm::Value* vector_id = llvm::ConstantVector::get(constants);
-        builder.CreateStore(vector_id, vec_alloca);
-    }
     // Branch to condition basic block and insert condition code there.
     builder.CreateBr(for_cond);
     builder.SetInsertPoint(for_cond);
@@ -623,23 +613,11 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     builder.SetInsertPoint(for_inc);
     node.get_increment()->accept(*this);
 
-    // If the code is vectorised, then increment the vector id by <W, W, ..., W> where W is the
+    // Create a branch to condition block, then generate exit code out of the loop. Restore the
     // vector width.
-    // \todo: See the comment for `kernel_id_prefix`.
-    if (vector_width > 1) {
-        // First, create an increment vector.
-        llvm::Value* vector_inc = get_constant_int_vector(vector_width);
-
-        // Increment the kernel id elements by a constant vector width.
-        llvm::Value* vector_id_ptr = lookup(kernel_id_prefix + kernel_id);
-        llvm::Value* vector_id = builder.CreateLoad(vector_id_ptr);
-        llvm::Value* incremented = builder.CreateAdd(vector_id, vector_inc);
-        builder.CreateStore(incremented, vector_id_ptr);
-    }
-
-    // Create a branch to condition block, then generate exit code out of the loop.
     builder.CreateBr(for_cond);
     builder.SetInsertPoint(exit);
+    vector_width = tmp_vector_width;
 }
 
 
@@ -707,8 +685,12 @@ void CodegenLLVMVisitor::visit_codegen_var_list_statement(
             int length = get_array_length(*indexed_name);
             var_type = llvm::ArrayType::get(scalar_var_type, length);
         } else if (identifier->is_name()) {
-            // This case corresponds to a scalar local variable. Its type is double by default.
-            var_type = scalar_var_type;
+            // This case corresponds to a scalar or vector local variable.
+            if (is_kernel_code && vector_width > 1) {
+                var_type = llvm::FixedVectorType::get(scalar_var_type, vector_width);
+            } else {
+                var_type = scalar_var_type;
+            }
         } else {
             throw std::runtime_error("Error: Unsupported local variable type");
         }
@@ -881,10 +863,11 @@ void CodegenLLVMVisitor::visit_unary_expression(const ast::UnaryExpression& node
 void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
     llvm::Value* ptr = get_variable_ptr(node);
 
-    // Finally, load the variable from the pointer value.
-    llvm::Value* var = builder.CreateLoad(ptr);
+    // Finally, load the variable from the pointer value unless it has already been loaded (e.g. via
+    // gather instruction).
+    llvm::Value* var = ptr->getType()->isPointerTy() ? builder.CreateLoad(ptr) : ptr;
 
-    // If the vale should not be vectorised, or it is already a vector, add it to the stack.
+    // If the value should not be vectorised, or it is already a vector, add it to the stack.
     if (!is_kernel_code || vector_width <= 1 || var->getType()->isVectorTy()) {
         values.push_back(var);
         return;
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index fcf078a34b..716eda16dc 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -30,8 +30,8 @@ static double EPSILON = 1e-15;
 //=============================================================================
 
 struct InstanceTestInfo {
-    codegen::CodegenInstanceData& instance;
-    codegen::CodegenLLVMVisitor& visitor;
+    codegen::CodegenInstanceData* instance;
+    codegen::InstanceVarHelper helper;
     int num_elements;
 };
 
@@ -40,11 +40,11 @@ bool check_instance_variable(InstanceTestInfo& instance_info,
                              std::vector<T>& expected,
                              const std::string& variable_name) {
     std::vector<T> actual;
-    int variable_index = instance_info.visitor.get_instance_var_helper().get_variable_index(
-        variable_name);
-    actual.assign(static_cast<T*>(instance_info.instance.members[variable_index]),
-                  static_cast<T*>(instance_info.instance.members[variable_index]) +
+    int variable_index = instance_info.helper.get_variable_index(variable_name);
+    actual.assign(static_cast<T*>(instance_info.instance->members[variable_index]),
+                  static_cast<T*>(instance_info.instance->members[variable_index]) +
                       instance_info.num_elements);
+
     // While we are comparing double types as well, for simplicity the test cases are hand-crafted
     // so that no floating-point arithmetic is really involved.
     return actual == expected;
@@ -54,9 +54,8 @@ template <typename T>
 void initialise_instance_variable(InstanceTestInfo& instance_info,
                                   std::vector<T>& data,
                                   const std::string& variable_name) {
-    int variable_index = instance_info.visitor.get_instance_var_helper().get_variable_index(
-        variable_name);
-    T* data_start = static_cast<T*>(instance_info.instance.members[variable_index]);
+    int variable_index = instance_info.helper.get_variable_index(variable_name);
+    T* data_start = static_cast<T*>(instance_info.instance->members[variable_index]);
     for (int i = 0; i < instance_info.num_elements; ++i)
         *(data_start + i) = data[i];
 }
@@ -318,7 +317,9 @@ SCENARIO("Simple scalar kernel", "[llvm][runner]") {
         std::vector<double> x0 = {5.0, 5.0, 5.0, 5.0};
         std::vector<double> x1 = {1.0, 1.0, 1.0, 1.0};
 
-        InstanceTestInfo instance_info{instance_data, llvm_visitor, num_elements};
+        InstanceTestInfo instance_info{&instance_data,
+                                       llvm_visitor.get_instance_var_helper(),
+                                       num_elements};
         initialise_instance_variable(instance_info, x, "x");
         initialise_instance_variable(instance_info, x0, "x0");
         initialise_instance_variable(instance_info, x1, "x1");
@@ -335,3 +336,85 @@ SCENARIO("Simple scalar kernel", "[llvm][runner]") {
         }
     }
 }
+
+//=============================================================================
+// State vectorised kernel with optimisations on.
+//=============================================================================
+
+SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
+    GIVEN("Simple MOD file with a state update") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+                NONSPECIFIC_CURRENT i
+                RANGE x0, x1
+            }
+
+            STATE {
+                x
+            }
+
+            ASSIGNED {
+                v
+                x0
+                x1
+            }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+                i = 0
+            }
+
+            DERIVATIVE states {
+                x = (x0 - x) / x1
+            }
+        )";
+
+
+        NmodlDriver driver;
+        const auto& ast = driver.parse_string(nmodl_text);
+
+        // Run passes on the AST to generate LLVM.
+        SymtabVisitor().visit_program(*ast);
+        NeuronSolveVisitor().visit_program(*ast);
+        SolveBlockVisitor().visit_program(*ast);
+        codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
+                                                 /*output_dir=*/".",
+                                                 /*opt_passes=*/true,
+                                                 /*use_single_precision=*/false,
+                                                 /*vector_width=*/4);
+        llvm_visitor.visit_program(*ast);
+        llvm_visitor.wrap_kernel_function("nrn_state_test");
+
+        // Create the instance struct data.
+        int num_elements = 10;
+        const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
+        auto codegen_data = codegen::CodegenDataHelper(ast, generated_instance_struct);
+        auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1);
+
+        // Fill the instance struct data with some values for unit testing.
+        std::vector<double> x = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
+        std::vector<double> x0 = {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0};
+        std::vector<double> x1 = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+
+        InstanceTestInfo instance_info{&instance_data,
+                                       llvm_visitor.get_instance_var_helper(),
+                                       num_elements};
+        initialise_instance_variable<double>(instance_info, x, "x");
+        initialise_instance_variable<double>(instance_info, x0, "x0");
+        initialise_instance_variable<double>(instance_info, x1, "x1");
+
+        // Set up the JIT runner.
+        std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
+        Runner runner(std::move(module));
+
+        THEN("Values in struct have changed according to the formula") {
+            runner.run_with_argument<int, void*>("__nrn_state_test_wrapper",
+                                                 instance_data.base_ptr);
+            std::vector<double> x_expected = {10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
+
+            // Check that the main and remainder loops correctly change the data stored in x.
+            REQUIRE(check_instance_variable<double>(instance_info, x_expected, "x"));
+        }
+    }
+}
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 942a908be6..28cb526d5d 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -901,6 +901,61 @@ SCENARIO("Scalar state kernel", "[visitor][llvm]") {
     }
 }
 
+//=============================================================================
+// Gather for vectorised kernel
+//=============================================================================
+
+SCENARIO("Vectorised simple kernel", "[visitor][llvm]") {
+    GIVEN("An indirect indexing of voltage") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX hh
+                NONSPECIFIC_CURRENT i
+            }
+
+            STATE {}
+
+            ASSIGNED {
+                v (mV)
+            }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+                i = 2
+            }
+
+            DERIVATIVE states {}
+        )";
+
+        THEN("a gather instructions is created") {
+            std::string module_string = run_llvm_visitor(nmodl_text,
+                                                         /*opt=*/false,
+                                                         /*use_single_precision=*/false,
+                                                         /*vector_width=*/4);
+            std::smatch m;
+
+            // Check gather intrinsic is correctly declared.
+            std::regex declaration(
+                R"(declare <4 x double> @llvm\.masked\.gather\.v4f64\.v4p0f64\(<4 x double\*>, i32 immarg, <4 x i1>, <4 x double>\) )");
+            REQUIRE(std::regex_search(module_string, m, declaration));
+
+            // Check that the indices vector is created correctly and extended to i64.
+            std::regex index_load(R"(load <4 x i32>, <4 x i32>\* %node_id)");
+            std::regex sext(R"(sext <4 x i32> %.* to <4 x i64>)");
+            REQUIRE(std::regex_search(module_string, m, index_load));
+            REQUIRE(std::regex_search(module_string, m, sext));
+
+            // Check that the access to `voltage` is performed via gather instruction.
+            //      v = mech->voltage[node_id]
+            std::regex gather(
+                "call <4 x double> @llvm\\.masked\\.gather\\.v4f64\\.v4p0f64\\("
+                "<4 x double\\*> %.*, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x "
+                "double> undef\\)");
+            REQUIRE(std::regex_search(module_string, m, gather));
+        }
+    }
+}
+
 //=============================================================================
 // Derivative block : test optimization
 //=============================================================================

From 0cb1cea14b63ae7bf5e3b6c040a3a8656a57827b Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 13 Apr 2021 08:31:28 +0300
Subject: [PATCH 036/105] Verification and file utilities for LLVM IR codegen
 (#582)

Added several minor improvement to the current pipeline
infrastructure. Particularly, the following was addressed:

- The generated IR module is now verified after running the
visitor
- The kernel is checked if it can be vectorised or not
- The generated IR can be dumped to `.ll` file with
`-o <filename>`
- Printing LLVM IR is moved to debug mode
---
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 58 ++++++++++++++++++-
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  6 +-
 .../codegen/codegen_llvm_instance_struct.cpp  |  2 +-
 test/unit/codegen/codegen_llvm_ir.cpp         |  2 +-
 4 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index a42201824c..b080a1638f 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -9,13 +9,17 @@
 
 #include "ast/all.hpp"
 #include "visitors/rename_visitor.hpp"
+#include "visitors/visitor_utils.hpp"
 
+#include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ToolOutputFile.h"
 
 namespace nmodl {
 namespace codegen {
@@ -28,12 +32,31 @@ static constexpr const char instance_struct_type_name[] = "__instance_var__type"
 /*                            Helper routines                                           */
 /****************************************************************************************/
 
+/// A utility to check for supported Statement AST nodes.
 static bool is_supported_statement(const ast::Statement& statement) {
     return statement.is_codegen_var_list_statement() || statement.is_expression_statement() ||
            statement.is_codegen_for_statement() || statement.is_codegen_return_statement() ||
            statement.is_if_statement() || statement.is_while_statement();
 }
 
+/// A utility to check of the kernel body can be vectorised.
+static bool can_vectorise(const ast::CodegenForStatement& statement, symtab::SymbolTable* sym_tab) {
+    // Check that function calls are made to external methods only.
+    const auto& function_calls = collect_nodes(statement, {ast::AstNodeType::FUNCTION_CALL});
+    for (const auto& call: function_calls) {
+        const auto& name = call->get_node_name();
+        auto symbol = sym_tab->lookup(name);
+        if (symbol && !symbol->has_any_property(symtab::syminfo::NmodlType::extern_method))
+            return false;
+    }
+
+    // Check there is no control flow in the kernel.
+    const std::vector<ast::AstNodeType> unsupported_nodes = {ast::AstNodeType::IF_STATEMENT};
+    const auto& collected = collect_nodes(statement, unsupported_nodes);
+
+    return collected.empty();
+}
+
 llvm::Value* CodegenLLVMVisitor::create_gep(const std::string& name, llvm::Value* index) {
     llvm::Type* index_type = llvm::Type::getInt64Ty(*context);
     std::vector<llvm::Value*> indices;
@@ -582,9 +605,18 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     llvm::BasicBlock* for_inc = llvm::BasicBlock::Create(*context, /*Name=*/"for.inc", func, next);
     llvm::BasicBlock* exit = llvm::BasicBlock::Create(*context, /*Name=*/"for.exit", func, next);
 
+    // Save the vector width.
+    int tmp_vector_width = vector_width;
+
+    // Check if the kernel can be vectorised. If not, generate scalar code.
+    if (!can_vectorise(node, sym_tab)) {
+        logger->info("Cannot vectorise the for loop in '" + current_func->getName().str() + "'");
+        logger->info("Generating scalar code...");
+        vector_width = 1;
+    }
+
     // First, initialise the loop in the same basic block. This block is optional. Also, reset
     // vector width to 1 if processing the remainder of the loop.
-    int tmp_vector_width = vector_width;
     if (node.get_initialization()) {
         node.get_initialization()->accept(*this);
     } else {
@@ -833,13 +865,33 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
         visit_codegen_function(*func);
     }
 
+    // Verify the generated LLVM IR module.
+    std::string error;
+    llvm::raw_string_ostream ostream(error);
+    if (verifyModule(*module, &ostream)) {
+        throw std::runtime_error("Error: incorrect IR has been generated!\n" + ostream.str());
+    }
+
     if (opt_passes) {
         logger->info("Running LLVM optimisation passes");
         run_llvm_opt_passes();
     }
 
-    // Keep this for easier development (maybe move to debug mode later).
-    std::cout << print_module();
+    // If the output directory is specified, save the IR to .ll file.
+    // \todo: Consider saving the generated LLVM IR to bytecode (.bc) file instead.
+    if (output_dir != ".") {
+        std::error_code error_code;
+        std::unique_ptr<llvm::ToolOutputFile> out = std::make_unique<llvm::ToolOutputFile>(
+            output_dir + "/" + mod_filename + ".ll", error_code, llvm::sys::fs::OF_Text);
+        if (error_code)
+            throw std::runtime_error("Error: " + error_code.message());
+
+        std::unique_ptr<llvm::AssemblyAnnotationWriter> annotator;
+        module->print(out->os(), annotator.get());
+        out->keep();
+    }
+
+    logger->debug("Dumping generated IR...\n" + dump_module());
 }
 
 void CodegenLLVMVisitor::visit_procedure_block(const ast::ProcedureBlock& node) {
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index b099646b07..f001c2c2fe 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -321,8 +321,10 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void visit_var_name(const ast::VarName& node) override;
     void visit_while_statement(const ast::WhileStatement& node) override;
 
-    // \todo: move this to debug mode (e.g. -v option or --dump-ir)
-    std::string print_module() const {
+    /**
+     * Dumps the generated LLVM IR module to string.
+     */
+    std::string dump_module() const {
         std::string str;
         llvm::raw_string_ostream os(str);
         os << *module;
diff --git a/test/unit/codegen/codegen_llvm_instance_struct.cpp b/test/unit/codegen/codegen_llvm_instance_struct.cpp
index 4bfa1cd31c..52b9bb9868 100644
--- a/test/unit/codegen/codegen_llvm_instance_struct.cpp
+++ b/test/unit/codegen/codegen_llvm_instance_struct.cpp
@@ -45,7 +45,7 @@ codegen::CodegenInstanceData generate_instance_data(const std::string& text,
                                              use_single_precision,
                                              vector_width);
     llvm_visitor.visit_program(*ast);
-    llvm_visitor.print_module();
+    llvm_visitor.dump_module();
     const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
     auto codegen_data = codegen::CodegenDataHelper(ast, generated_instance_struct);
     auto instance_data = codegen_data.create_data(num_elements, seed);
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 28cb526d5d..7a8718fb2a 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -50,7 +50,7 @@ std::string run_llvm_visitor(const std::string& text,
                                              use_single_precision,
                                              vector_width);
     llvm_visitor.visit_program(*ast);
-    return llvm_visitor.print_module();
+    return llvm_visitor.dump_module();
 }
 
 //=============================================================================

From c839e686a214536c4b9bf6becd4f6177ebf4850b Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Fri, 16 Apr 2021 19:20:29 +0300
Subject: [PATCH 037/105] Add gather execution test (#591)

---
 test/unit/codegen/codegen_llvm_execution.cpp | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index 716eda16dc..e5d0904b6e 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -351,7 +351,7 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
             }
 
             STATE {
-                x
+                x y
             }
 
             ASSIGNED {
@@ -367,6 +367,7 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
 
             DERIVATIVE states {
                 x = (x0 - x) / x1
+                y = v
             }
         )";
 
@@ -397,6 +398,9 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
         std::vector<double> x0 = {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0};
         std::vector<double> x1 = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
 
+        std::vector<double> voltage = {3.0, 4.0, 7.0, 1.0, 2.0, 5.0, 8.0, 6.0, 10.0, 9.0};
+        std::vector<int> node_index = {3, 4, 0, 1, 5, 7, 2, 6, 9, 8};
+
         InstanceTestInfo instance_info{&instance_data,
                                        llvm_visitor.get_instance_var_helper(),
                                        num_elements};
@@ -404,6 +408,9 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
         initialise_instance_variable<double>(instance_info, x0, "x0");
         initialise_instance_variable<double>(instance_info, x1, "x1");
 
+        initialise_instance_variable<double>(instance_info, voltage, "voltage");
+        initialise_instance_variable<int>(instance_info, node_index, "node_index");
+
         // Set up the JIT runner.
         std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
         Runner runner(std::move(module));
@@ -411,10 +418,14 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
         THEN("Values in struct have changed according to the formula") {
             runner.run_with_argument<int, void*>("__nrn_state_test_wrapper",
                                                  instance_data.base_ptr);
-            std::vector<double> x_expected = {10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
-
             // Check that the main and remainder loops correctly change the data stored in x.
+            std::vector<double> x_expected = {10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
             REQUIRE(check_instance_variable<double>(instance_info, x_expected, "x"));
+
+            // Check that the gather load produces correct results in y:
+            //   y[id] = voltage[node_index[id]]
+            std::vector<double> y_expected = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
+            REQUIRE(check_instance_variable<double>(instance_info, y_expected, "y"));
         }
     }
 }

From cffb50db53411b35ac920bee4a13e873ec51a5d6 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Sat, 17 Apr 2021 09:34:46 +0300
Subject: [PATCH 038/105] Fixed loop allocations (#590)

* avoid local variables inside loop to not have allocas
* this was causing stack overview for large instance count
---
 .../llvm/codegen_llvm_helper_visitor.cpp      | 32 ++++++++++++++-----
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 28 +++++++++++++---
 test/unit/codegen/codegen_llvm_ir.cpp         | 24 +++++---------
 3 files changed, 55 insertions(+), 29 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 270794fc9a..00ea3de297 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -562,15 +562,16 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     function_statements.push_back(
         create_local_variable_statement(induction_variables, INTEGER_TYPE));
 
+    /// create vectors of local variables that would be used in compute part
+    std::vector<std::string> int_variables{"node_id"};
+    std::vector<std::string> double_variables{"v"};
+
     /// create now main compute part : for loop over channel instances
 
     /// loop body : initialization + solve blocks
     ast::StatementVector loop_def_statements;
     ast::StatementVector loop_index_statements;
     ast::StatementVector loop_body_statements;
-
-    std::vector<std::string> int_variables{"node_id"};
-    std::vector<std::string> double_variables{"v"};
     {
         /// access node index and corresponding voltage
         loop_index_statements.push_back(
@@ -597,6 +598,7 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
         /// add breakpoint block if no current
         if (info.currents.empty() && info.breakpoint_node != nullptr) {
             auto block = info.breakpoint_node->get_statement_block();
+            // \todo this automatically adds `SOLVE states METHOD ...`
             append_statements_from_block(loop_body_statements, block);
         }
 
@@ -607,10 +609,6 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
                              loop_index_statements,
                              loop_body_statements);
 
-        loop_def_statements.push_back(create_local_variable_statement(int_variables, INTEGER_TYPE));
-        loop_def_statements.push_back(
-            create_local_variable_statement(double_variables, FLOAT_TYPE));
-
         // \todo handle process_shadow_update_statement and wrote_conc_call yet
     }
 
@@ -622,6 +620,10 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     /// now construct a new code block which will become the body of the loop
     auto loop_block = std::make_shared<ast::StatementBlock>(loop_body);
 
+    /// declare main FOR loop local variables
+    function_statements.push_back(create_local_variable_statement(int_variables, INTEGER_TYPE));
+    function_statements.push_back(create_local_variable_statement(double_variables, FLOAT_TYPE));
+
     /// main loop possibly vectorized on vector_width
     {
         /// loop constructs : initialization, condition and increment
@@ -647,6 +649,10 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
         function_statements.push_back(for_loop_statement_main);
     }
 
+    /// vectors containing renamed FOR loop local variables
+    std::vector<std::string> renamed_int_variables;
+    std::vector<std::string> renamed_double_variables;
+
     /// remainder loop possibly vectorized on vector_width
     if (vector_width > 1) {
         /// loop constructs : initialization, condition and increment
@@ -664,14 +670,24 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
         // \todo: Change RenameVisitor to take a vector of names to which it would append a single
         // prefix.
         for (const auto& name: int_variables) {
-            visitor::RenameVisitor v(name, epilogue_variable_prefix + name);
+            std::string new_name = epilogue_variable_prefix + name;
+            renamed_int_variables.push_back(new_name);
+            visitor::RenameVisitor v(name, new_name);
             loop_statements->accept(v);
         }
         for (const auto& name: double_variables) {
+            std::string new_name = epilogue_variable_prefix + name;
+            renamed_double_variables.push_back(new_name);
             visitor::RenameVisitor v(name, epilogue_variable_prefix + name);
             loop_statements->accept(v);
         }
 
+        /// declare remainder FOR loop local variables
+        function_statements.push_back(
+            create_local_variable_statement(renamed_int_variables, INTEGER_TYPE));
+        function_statements.push_back(
+            create_local_variable_statement(renamed_double_variables, FLOAT_TYPE));
+
         /// convert all variables inside loop body to instance variables
         convert_to_instance_variable(*for_loop_statement_remainder, loop_index_var);
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index b080a1638f..3a165e465a 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -592,6 +592,9 @@ void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) {
 //  | <code after for loop>     |
 //  +---------------------------+
 void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatement& node) {
+    // Disable vector code generation for condition and increment blocks.
+    is_kernel_code = false;
+
     // Get the current and the next blocks within the function.
     llvm::BasicBlock* curr_block = builder.GetInsertBlock();
     llvm::BasicBlock* next = curr_block->getNextNode();
@@ -650,6 +653,7 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     builder.CreateBr(for_cond);
     builder.SetInsertPoint(exit);
     vector_width = tmp_vector_width;
+    is_kernel_code = true;
 }
 
 
@@ -682,11 +686,19 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
         builder.CreateStore(&arg, alloca);
     }
 
-    // Process function or procedure body. The return statement is handled in a separate visitor.
-    block->accept(*this);
+    // Process function or procedure body. If the function is a compute kernel, then set the
+    // corresponding flags. The return statement is handled in a separate visitor.
+    bool has_void_ret_type = node.get_return_type()->get_type() == ast::AstNodeType::VOID;
+    if (has_void_ret_type) {
+        is_kernel_code = true;
+        block->accept(*this);
+        is_kernel_code = false;
+    } else {
+        block->accept(*this);
+    }
 
     // If function has a void return type, add a terminator not handled by CodegenReturnVar.
-    if (node.get_return_type()->get_type() == ast::AstNodeType::VOID)
+    if (has_void_ret_type)
         builder.CreateRetVoid();
 
     // Clear local values stack and remove the pointer to the local symbol table.
@@ -718,7 +730,13 @@ void CodegenLLVMVisitor::visit_codegen_var_list_statement(
             var_type = llvm::ArrayType::get(scalar_var_type, length);
         } else if (identifier->is_name()) {
             // This case corresponds to a scalar or vector local variable.
-            if (is_kernel_code && vector_width > 1) {
+            const auto& identifier_name = identifier->get_node_name();
+
+            // Even if generating vectorised code, some variables still need to be scalar.
+            // Particularly, the induction variable "id" and remainder loop variables (that start
+            // with "epilogue").
+            if (is_kernel_code && vector_width > 1 && identifier_name != kernel_id &&
+                identifier_name.rfind("epilogue", 0)) {
                 var_type = llvm::FixedVectorType::get(scalar_var_type, vector_width);
             } else {
                 var_type = scalar_var_type;
@@ -726,7 +744,7 @@ void CodegenLLVMVisitor::visit_codegen_var_list_statement(
         } else {
             throw std::runtime_error("Error: Unsupported local variable type");
         }
-        llvm::Value* alloca = builder.CreateAlloca(var_type, /*ArraySize=*/nullptr, name);
+        builder.CreateAlloca(var_type, /*ArraySize=*/nullptr, name);
     }
 }
 
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 7a8718fb2a..ec7319bfa6 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -845,10 +845,14 @@ SCENARIO("Scalar state kernel", "[visitor][llvm]") {
             REQUIRE(std::regex_search(module_string, m, struct_type));
             REQUIRE(std::regex_search(module_string, m, kernel_declaration));
 
-            // Check for correct induction variable initialisation and a branch to condition block.
-            std::regex alloca_instr(R"(%id = alloca i32)");
+            // Check for correct variables initialisation and a branch to condition block.
+            std::regex id_initialisation(R"(%id = alloca i32)");
+            std::regex node_id_initialisation(R"(%node_id = alloca i32)");
+            std::regex v_initialisation(R"(%v = alloca double)");
             std::regex br(R"(br label %for\.cond)");
-            REQUIRE(std::regex_search(module_string, m, alloca_instr));
+            REQUIRE(std::regex_search(module_string, m, id_initialisation));
+            REQUIRE(std::regex_search(module_string, m, node_id_initialisation));
+            REQUIRE(std::regex_search(module_string, m, v_initialisation));
             REQUIRE(std::regex_search(module_string, m, br));
 
             // Check condition block: id < mech->node_count, and a conditional branch to loop body
@@ -865,12 +869,7 @@ SCENARIO("Scalar state kernel", "[visitor][llvm]") {
             REQUIRE(std::regex_search(module_string, m, condition));
             REQUIRE(std::regex_search(module_string, m, cond_br));
 
-            // In the body block, `node_id` and voltage `v` are initialised with the data from the
-            // struct. Check for variable allocations and correct loads from the struct with GEPs.
-            std::regex initialisation(
-                "for\\.body:.*\n"
-                "  %node_id = alloca i32,.*\n"
-                "  %v = alloca double,.*");
+            // Check for correct loads from the struct with GEPs.
             std::regex load_from_struct(
                 "  %.* = load %.*__instance_var__type\\*, %.*__instance_var__type\\*\\* %.*\n"
                 "  %.* = getelementptr inbounds %.*__instance_var__type, "
@@ -880,7 +879,6 @@ SCENARIO("Scalar state kernel", "[visitor][llvm]") {
                 "  %.* = load (i32|double)\\*, (i32|double)\\*\\* %.*\n"
                 "  %.* = getelementptr inbounds (i32|double), (i32|double)\\* %.*, i64 %.*\n"
                 "  %.* = load (i32|double), (i32|double)\\* %.*");
-            REQUIRE(std::regex_search(module_string, m, initialisation));
             REQUIRE(std::regex_search(module_string, m, load_from_struct));
 
             // Check induction variable is incremented in increment block.
@@ -987,8 +985,6 @@ SCENARIO("Scalar derivative block", "[visitor][llvm][derivative]") {
 
         std::string expected_loop = R"(
             for(id = 0; id<mech->node_count; id = id+1) {
-                INTEGER node_id
-                DOUBLE v
                 node_id = mech->node_index[id]
                 v = mech->voltage[node_id]
                 mech->m[id] = (mech->minf[id]-mech->m[id])/mech->mtau[id]
@@ -1033,16 +1029,12 @@ SCENARIO("Vectorised derivative block", "[visitor][llvm][derivative]") {
 
         std::string expected_main_loop = R"(
             for(id = 0; id<mech->node_count-7; id = id+8) {
-                INTEGER node_id
-                DOUBLE v
                 node_id = mech->node_index[id]
                 v = mech->voltage[node_id]
                 mech->m[id] = (mech->minf[id]-mech->m[id])/mech->mtau[id]
             })";
         std::string expected_epilogue_loop = R"(
             for(; id<mech->node_count; id = id+1) {
-                INTEGER epilogue_node_id
-                DOUBLE epilogue_v
                 epilogue_node_id = mech->node_index[id]
                 epilogue_v = mech->voltage[epilogue_node_id]
                 mech->m[id] = (mech->minf[id]-mech->m[id])/mech->mtau[id]

From 481c7287dcd4c3f562d7bb99992823f27cd9b289 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Sat, 17 Apr 2021 09:51:19 +0300
Subject: [PATCH 039/105] Benchmarking LLVM code generation (#583)

Introduced the benchmarking for LLVM code generation pipeline.
For that, new options have been added:

```
benchmark
  LLVM benchmark option
  Options:
    --run                                                               Run LLVM benchmark (false)
    --instance-size INT                                       Instance struct size (10000)
    --repeat INT                                                    Number of experiments for benchmarking (100)
    --backend TEXT:{avx2, default, sse2}         Target's backend (default)
```

The JIT runner has also been modified to extract the target
information correctly, and disable available CPU features for
benchmarking a specific backend.

Example:
```
$ nmodl hh.mod llvm --ir --vector-width 1 benchmark --run --instance-size 100 --repeat 2 --backend default

Created LLVM IR module from NMODL AST in 0.006765817

Benchmarking kernel 'nrn_state_hh'
Experiment 0: compute time = 0.013977749
Experiment 1: compute time = 0.004847989
Average compute time = 0.0058550929
```

Co-authored-by: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
---
 src/CMakeLists.txt                           |   2 +-
 src/codegen/llvm/CMakeLists.txt              |   5 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp    |  83 ++++++----
 src/codegen/llvm/codegen_llvm_visitor.hpp    |  11 +-
 src/codegen/llvm/jit_driver.cpp              |  59 +++----
 src/codegen/llvm/jit_driver.hpp              |  11 +-
 src/codegen/llvm/llvm_benchmark.cpp          | 157 +++++++++++++++++++
 src/codegen/llvm/llvm_benchmark.hpp          |  85 ++++++++++
 src/main.cpp                                 |  43 ++++-
 test/unit/CMakeLists.txt                     |   5 +
 test/unit/codegen/codegen_llvm_execution.cpp |   4 +-
 11 files changed, 390 insertions(+), 75 deletions(-)
 create mode 100644 src/codegen/llvm/llvm_benchmark.cpp
 create mode 100644 src/codegen/llvm/llvm_benchmark.hpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 022cecf1ac..ab84cfbd70 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -37,7 +37,7 @@ target_link_libraries(
 cpp_cc_configure_sanitizers(TARGET nmodl)
 
 if(NMODL_ENABLE_LLVM)
-  target_link_libraries(nmodl llvm_codegen ${LLVM_LIBS_TO_LINK})
+  target_link_libraries(nmodl llvm_codegen llvm_benchmark ${LLVM_LIBS_TO_LINK})
 endif()
 
 # =============================================================================
diff --git a/src/codegen/llvm/CMakeLists.txt b/src/codegen/llvm/CMakeLists.txt
index 8b2f78a6cb..09e37d3896 100644
--- a/src/codegen/llvm/CMakeLists.txt
+++ b/src/codegen/llvm/CMakeLists.txt
@@ -7,7 +7,9 @@ set(LLVM_CODEGEN_SOURCE_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_helper_visitor.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_helper_visitor.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.hpp)
+    ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.hpp)
 
 # =============================================================================
 # LLVM codegen library and executable
@@ -28,6 +30,7 @@ if(NOT NMODL_AS_SUBPROJECT)
     CLI11::CLI11
     llvm_codegen
     codegen
+    llvm_benchmark
     visitor
     symtab
     lexer
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 3a165e465a..ea7e828035 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -976,40 +976,57 @@ void CodegenLLVMVisitor::visit_while_statement(const ast::WhileStatement& node)
     builder.SetInsertPoint(exit);
 }
 
-void CodegenLLVMVisitor::wrap_kernel_function(const std::string& kernel_name) {
-    // Get the kernel function and the instance struct type.
-    auto kernel = module->getFunction(kernel_name);
-    if (!kernel)
-        throw std::runtime_error("Kernel " + kernel_name + " is not found!");
-
-    if (std::distance(kernel->args().begin(), kernel->args().end()) != 1)
-        throw std::runtime_error("Kernel " + kernel_name + " must have a single argument!");
-
-    auto instance_struct_ptr_type = llvm::dyn_cast<llvm::PointerType>(kernel->getArg(0)->getType());
-    if (!instance_struct_ptr_type)
-        throw std::runtime_error("Kernel " + kernel_name +
-                                 " does not have an instance struct pointer argument!");
-
-    // Create a wrapper void function that takes a void pointer as a single argument.
-    llvm::Type* void_type = llvm::Type::getVoidTy(*context);
-    llvm::Type* i32_type = llvm::Type::getInt32Ty(*context);
-    llvm::Type* void_ptr_type = llvm::PointerType::get(void_type, /*AddressSpace=*/0);
-    llvm::Function* wrapper_func = llvm::Function::Create(
-        llvm::FunctionType::get(i32_type, {void_ptr_type}, /*isVarArg=*/false),
-        llvm::Function::ExternalLinkage,
-        "__" + kernel_name + "_wrapper",
-        *module);
-    llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", wrapper_func);
-    builder.SetInsertPoint(body);
+void CodegenLLVMVisitor::find_kernel_names(std::vector<std::string>& container) {
+    // By convention, only the kernel functions return void type.
+    const auto& functions = module->getFunctionList();
+    for (const auto& func: functions) {
+        if (func.getReturnType()->isVoidTy()) {
+            container.push_back(func.getName().str());
+        }
+    }
+}
 
-    // Proceed with bitcasting the void pointer to the struct pointer type, calling the kernel and
-    // adding a terminator.
-    llvm::Value* bitcasted = builder.CreateBitCast(wrapper_func->getArg(0),
-                                                   instance_struct_ptr_type);
-    std::vector<llvm::Value*> args;
-    args.push_back(bitcasted);
-    builder.CreateCall(kernel, args);
-    builder.CreateRet(llvm::ConstantInt::get(i32_type, 0));
+void CodegenLLVMVisitor::wrap_kernel_functions() {
+    // First, identify all kernels.
+    std::vector<std::string> kernel_names;
+    find_kernel_names(kernel_names);
+
+    for (const auto& kernel_name: kernel_names) {
+        // Get the kernel function and the instance struct type.
+        auto kernel = module->getFunction(kernel_name);
+        if (!kernel)
+            throw std::runtime_error("Kernel " + kernel_name + " is not found!");
+
+        if (std::distance(kernel->args().begin(), kernel->args().end()) != 1)
+            throw std::runtime_error("Kernel " + kernel_name + " must have a single argument!");
+
+        auto instance_struct_ptr_type = llvm::dyn_cast<llvm::PointerType>(
+            kernel->getArg(0)->getType());
+        if (!instance_struct_ptr_type)
+            throw std::runtime_error("Kernel " + kernel_name +
+                                     " does not have an instance struct pointer argument!");
+
+        // Create a wrapper void function that takes a void pointer as a single argument.
+        llvm::Type* void_type = llvm::Type::getVoidTy(*context);
+        llvm::Type* i32_type = llvm::Type::getInt32Ty(*context);
+        llvm::Type* void_ptr_type = llvm::PointerType::get(void_type, /*AddressSpace=*/0);
+        llvm::Function* wrapper_func = llvm::Function::Create(
+            llvm::FunctionType::get(i32_type, {void_ptr_type}, /*isVarArg=*/false),
+            llvm::Function::ExternalLinkage,
+            "__" + kernel_name + "_wrapper",
+            *module);
+        llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", wrapper_func);
+        builder.SetInsertPoint(body);
+
+        // Proceed with bitcasting the void pointer to the struct pointer type, calling the kernel
+        // and adding a terminator.
+        llvm::Value* bitcasted = builder.CreateBitCast(wrapper_func->getArg(0),
+                                                       instance_struct_ptr_type);
+        std::vector<llvm::Value*> args;
+        args.push_back(bitcasted);
+        builder.CreateCall(kernel, args);
+        builder.CreateRet(llvm::ConstantInt::get(i32_type, 0));
+    }
 }
 
 }  // namespace codegen
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index f001c2c2fe..1007258010 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -333,10 +333,15 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     }
 
     /**
-     * For the given kernel function, wraps it into another function that uses void* to pass the
-     * data to the kernel \param kernel_name kernel name to be wrapped
+     * Fills the container with the names of kernel functions from the MOD file.
      */
-    void wrap_kernel_function(const std::string& kernel_name);
+    void find_kernel_names(std::vector<std::string>& container);
+
+    /**
+     * Wraps all kernel function calls into wrapper functions that use void* to pass the data to the
+     * kernel.
+     */
+    void wrap_kernel_functions();
 };
 
 /** \} */  // end of llvm_backends
diff --git a/src/codegen/llvm/jit_driver.cpp b/src/codegen/llvm/jit_driver.cpp
index a7673bb2ff..842c500810 100644
--- a/src/codegen/llvm/jit_driver.cpp
+++ b/src/codegen/llvm/jit_driver.cpp
@@ -22,24 +22,27 @@
 namespace nmodl {
 namespace runner {
 
-void JITDriver::init() {
+void JITDriver::init(std::string features) {
     llvm::InitializeNativeTarget();
     llvm::InitializeNativeTargetAsmPrinter();
 
-    set_target_triple(module.get());
-    auto data_layout = module->getDataLayout();
-
     // Create IR compile function callback.
     auto compile_function_creator = [&](llvm::orc::JITTargetMachineBuilder tm_builder)
         -> llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
-        auto tm = tm_builder.createTargetMachine();
-        if (!tm)
-            return tm.takeError();
-        return std::make_unique<llvm::orc::TMOwningSimpleCompiler>(std::move(*tm));
+        // Create target machine with some features possibly turned off.
+        auto tm = create_target(&tm_builder, features);
+
+        // Set the target triple and the data layout for the module.
+        module->setDataLayout(tm->createDataLayout());
+        module->setTargetTriple(tm->getTargetTriple().getTriple());
+
+        return std::make_unique<llvm::orc::TMOwningSimpleCompiler>(std::move(tm));
     };
 
+    // Set JIT instance and extract the data layout from the module.
     auto jit_instance = cantFail(
         llvm::orc::LLJITBuilder().setCompileFunctionCreator(compile_function_creator).create());
+    auto data_layout = module->getDataLayout();
 
     // Add a ThreadSafeModule to the driver.
     llvm::orc::ThreadSafeModule tsm(std::move(module), std::make_unique<llvm::LLVMContext>());
@@ -52,29 +55,29 @@ void JITDriver::init() {
         data_layout.getGlobalPrefix())));
 }
 
-void JITDriver::set_target_triple(llvm::Module* module) {
-    auto target_triple = llvm::sys::getDefaultTargetTriple();
-    std::string error;
-    auto target = llvm::TargetRegistry::lookupTarget(target_triple, error);
+std::unique_ptr<llvm::TargetMachine> JITDriver::create_target(
+    llvm::orc::JITTargetMachineBuilder* builder,
+    const std::string& features) {
+    // First, look up the target.
+    std::string error_msg;
+    auto target_triple = builder->getTargetTriple().getTriple();
+    auto* target = llvm::TargetRegistry::lookupTarget(target_triple, error_msg);
     if (!target)
-        throw std::runtime_error("Error: " + error + "\n");
-
-    std::string cpu(llvm::sys::getHostCPUName());
-    llvm::SubtargetFeatures features;
-    llvm::StringMap<bool> host_features;
-
-    if (llvm::sys::getHostCPUFeatures(host_features)) {
-        for (auto& f: host_features)
-            features.AddFeature(f.first(), f.second);
-    }
+        throw std::runtime_error("Error " + error_msg + "\n");
 
-    std::unique_ptr<llvm::TargetMachine> machine(
-        target->createTargetMachine(target_triple, cpu, features.getString(), {}, {}));
-    if (!machine)
-        throw std::runtime_error("Error: failed to create a target machine\n");
+    // Create default target machine with provided features.
+    auto tm = target->createTargetMachine(target_triple,
+                                          llvm::sys::getHostCPUName().str(),
+                                          features,
+                                          builder->getOptions(),
+                                          builder->getRelocationModel(),
+                                          builder->getCodeModel(),
+                                          /*OL=*/llvm::CodeGenOpt::Default,
+                                          /*JIT=*/true);
+    if (!tm)
+        throw std::runtime_error("Error: could not create the target machine\n");
 
-    module->setDataLayout(machine->createDataLayout());
-    module->setTargetTriple(target_triple);
+    return std::unique_ptr<llvm::TargetMachine>(tm);
 }
 
 }  // namespace runner
diff --git a/src/codegen/llvm/jit_driver.hpp b/src/codegen/llvm/jit_driver.hpp
index 23c8fca612..f994a57303 100644
--- a/src/codegen/llvm/jit_driver.hpp
+++ b/src/codegen/llvm/jit_driver.hpp
@@ -37,7 +37,7 @@ class JITDriver {
         : module(std::move(m)) {}
 
     /// Initialize the JIT.
-    void init();
+    void init(std::string features);
 
     /// Lookup the entry-point without arguments in the JIT and execute it, returning the result.
     template <typename ReturnType>
@@ -63,8 +63,9 @@ class JITDriver {
         return result;
     }
 
-    /// Set the target triple on the module.
-    static void set_target_triple(llvm::Module* module);
+    /// A wrapper around llvm::createTargetMachine to turn on/off certain CPU features.
+    std::unique_ptr<llvm::TargetMachine> create_target(llvm::orc::JITTargetMachineBuilder* builder,
+                                                       const std::string& features);
 };
 
 /**
@@ -78,9 +79,9 @@ class Runner {
     std::unique_ptr<JITDriver> driver = std::make_unique<JITDriver>(std::move(module));
 
   public:
-    Runner(std::unique_ptr<llvm::Module> m)
+    Runner(std::unique_ptr<llvm::Module> m, std::string features = "")
         : module(std::move(m)) {
-        driver->init();
+        driver->init(features);
     }
 
     /// Run the entry-point function without arguments.
diff --git a/src/codegen/llvm/llvm_benchmark.cpp b/src/codegen/llvm/llvm_benchmark.cpp
new file mode 100644
index 0000000000..57e0d05c5b
--- /dev/null
+++ b/src/codegen/llvm/llvm_benchmark.cpp
@@ -0,0 +1,157 @@
+/*************************************************************************
+ * Copyright (C) 2018-2021 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include <chrono>
+#include <fstream>
+
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "codegen/llvm/jit_driver.hpp"
+#include "llvm_benchmark.hpp"
+#include "llvm/Support/Host.h"
+
+#include "test/unit/codegen/codegen_data_helper.hpp"
+
+
+namespace nmodl {
+namespace benchmark {
+
+
+/// Precision for the timing measurements.
+static constexpr int PRECISION = 9;
+
+
+void LLVMBenchmark::disable(const std::string& feature, std::vector<std::string>& host_features) {
+    for (auto& host_feature: host_features) {
+        if (feature == host_feature.substr(1)) {
+            host_feature[0] = '-';
+            *log_stream << host_feature << "\n";
+            return;
+        }
+    }
+}
+
+void LLVMBenchmark::benchmark(const std::shared_ptr<ast::Program>& node) {
+    // First, set the output stream for the logs.
+    set_log_output();
+
+    // Then, record the time taken for building the LLVM IR module.
+    codegen::CodegenLLVMVisitor visitor(mod_filename,
+                                        output_dir,
+                                        llvm_build_info.opt_passes,
+                                        llvm_build_info.use_single_precision,
+                                        llvm_build_info.vector_width);
+    generate_llvm(visitor, node);
+
+    // Finally, run the benchmark and log the measurements.
+    run_benchmark(visitor, node);
+}
+
+void LLVMBenchmark::generate_llvm(codegen::CodegenLLVMVisitor& visitor,
+                                  const std::shared_ptr<ast::Program>& node) {
+    // First, visit the AST to build the LLVM IR module and wrap the kernel function calls.
+    auto start = std::chrono::high_resolution_clock::now();
+    visitor.visit_program(*node);
+    visitor.wrap_kernel_functions();
+    auto end = std::chrono::high_resolution_clock::now();
+
+    // Log the time taken to visit the AST and build LLVM IR.
+    std::chrono::duration<double> diff = end - start;
+    *log_stream << "Created LLVM IR module from NMODL AST in " << std::setprecision(PRECISION)
+                << diff.count() << "\n\n";
+}
+
+std::vector<std::string> LLVMBenchmark::get_cpu_features() {
+    std::string cpu(llvm::sys::getHostCPUName());
+
+    llvm::SubtargetFeatures features;
+    llvm::StringMap<bool> host_features;
+    if (llvm::sys::getHostCPUFeatures(host_features)) {
+        for (auto& f: host_features)
+            features.AddFeature(f.first(), f.second);
+    }
+    return features.getFeatures();
+}
+
+void LLVMBenchmark::run_benchmark(codegen::CodegenLLVMVisitor& visitor,
+                                  const std::shared_ptr<ast::Program>& node) {
+    // Set the codegen data helper and find the kernels.
+    auto codegen_data = codegen::CodegenDataHelper(node, visitor.get_instance_struct_ptr());
+    std::vector<std::string> kernel_names;
+    visitor.find_kernel_names(kernel_names);
+
+    // Get feature's string and turn them off depending on the backend.
+    std::vector<std::string> features = get_cpu_features();
+    *log_stream << "Backend: " << backend << "\n";
+    if (backend == "avx2") {
+        // Disable SSE.
+        *log_stream << "Disabling features:\n";
+        disable("sse", features);
+        disable("sse2", features);
+        disable("sse3", features);
+        disable("sse4.1", features);
+        disable("sse4.2", features);
+    } else if (backend == "sse2") {
+        // Disable AVX.
+        *log_stream << "Disabling features:\n";
+        disable("avx", features);
+        disable("avx2", features);
+    }
+
+    std::string features_str = llvm::join(features.begin(), features.end(), ",");
+    std::unique_ptr<llvm::Module> m = visitor.get_module();
+    runner::Runner runner(std::move(m), features_str);
+
+    // Benchmark every kernel.
+    for (const auto& kernel_name: kernel_names) {
+        *log_stream << "Benchmarking kernel '" << kernel_name << "'\n";
+
+        // For every kernel run the benchmark `num_experiments` times.
+        double time_sum = 0.0;
+        for (int i = 0; i < num_experiments; ++i) {
+            // Initialise the data.
+            auto instance_data = codegen_data.create_data(instance_size, /*seed=*/1);
+
+            // Record the execution time of the kernel.
+            std::string wrapper_name = "__" + kernel_name + "_wrapper";
+            auto start = std::chrono::high_resolution_clock::now();
+            runner.run_with_argument<int, void*>(kernel_name, instance_data.base_ptr);
+            auto end = std::chrono::high_resolution_clock::now();
+            std::chrono::duration<double> diff = end - start;
+
+            // Log the time taken for each run.
+            *log_stream << "Experiment " << i << ": compute time = " << std::setprecision(9)
+                        << diff.count() << "\n";
+
+            time_sum += diff.count();
+        }
+        // Log the average time taken for the kernel.
+        *log_stream << "Average compute time = " << std::setprecision(PRECISION)
+                    << time_sum / num_experiments << "\n\n";
+    }
+}
+
+void LLVMBenchmark::set_log_output() {
+    // If the output directory is not specified, dump logs to the console.
+    if (output_dir == ".") {
+        log_stream = std::make_shared<std::ostream>(std::cout.rdbuf());
+        return;
+    }
+
+    // Otherwise, dump logs to the specified file.
+    std::string filename = output_dir + "/" + mod_filename + ".log";
+    std::ofstream ofs;
+
+    ofs.open(filename.c_str());
+
+    if (ofs.fail())
+        throw std::runtime_error("Error while opening a file '" + filename + "'");
+
+    log_stream = std::make_shared<std::ostream>(ofs.rdbuf());
+}
+
+}  // namespace benchmark
+}  // namespace nmodl
diff --git a/src/codegen/llvm/llvm_benchmark.hpp b/src/codegen/llvm/llvm_benchmark.hpp
new file mode 100644
index 0000000000..30ebf182e8
--- /dev/null
+++ b/src/codegen/llvm/llvm_benchmark.hpp
@@ -0,0 +1,85 @@
+/*************************************************************************
+ * Copyright (C) 2018-2021 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+#include <string>
+
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
+
+
+namespace nmodl {
+namespace benchmark {
+
+/// A struct to hold LLVM visitor information.
+struct LLVMBuildInfo {
+    int vector_width;
+    bool opt_passes;
+    bool use_single_precision;
+};
+
+/**
+ * \class LLVMBenchmark
+ * \brief A wrapper to execute MOD file kernels via LLVM IR backend, and
+ * benchmark compile-time and runtime.
+ */
+class LLVMBenchmark {
+  private:
+    std::string mod_filename;
+
+    std::string output_dir;
+
+    int num_experiments;
+
+    int instance_size;
+
+    std::string backend;
+
+    LLVMBuildInfo llvm_build_info;
+
+    std::shared_ptr<std::ostream> log_stream;
+
+    /// Disable the specified feature.
+    void disable(const std::string& feature, std::vector<std::string>& host_features);
+
+    /// Visits the AST to construct the LLVM IR module.
+    void generate_llvm(codegen::CodegenLLVMVisitor& visitor,
+                       const std::shared_ptr<ast::Program>& node);
+
+    /// Get the host CPU features in the format:
+    ///   +feature,+feature,-feature,+feature,...
+    /// where `+` indicates that the feature is enabled.
+    std::vector<std::string> get_cpu_features();
+
+    /// Runs the main body of the benchmark, executing the compute kernels.
+    void run_benchmark(codegen::CodegenLLVMVisitor& visitor,
+                       const std::shared_ptr<ast::Program>& node);
+
+    /// Sets the log output stream (file or console).
+    void set_log_output();
+
+  public:
+    LLVMBenchmark(const std::string& mod_filename,
+                  const std::string& output_dir,
+                  LLVMBuildInfo info,
+                  int num_experiments,
+                  int instance_size,
+                  const std::string& backend)
+        : mod_filename(mod_filename)
+        , output_dir(output_dir)
+        , num_experiments(num_experiments)
+        , instance_size(instance_size)
+        , backend(backend)
+        , llvm_build_info(info) {}
+
+    /// Runs the benchmark.
+    void benchmark(const std::shared_ptr<ast::Program>& node);
+};
+
+
+}  // namespace benchmark
+}  // namespace nmodl
diff --git a/src/main.cpp b/src/main.cpp
index c05e1ff5a2..8d316763ea 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -19,6 +19,7 @@
 #include "codegen/codegen_transform_visitor.hpp"
 #ifdef NMODL_LLVM_BACKEND
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "codegen/llvm/llvm_benchmark.hpp"
 #endif
 
 #include "config/config.h"
@@ -175,8 +176,20 @@ int main(int argc, const char* argv[]) {
     /// run llvm optimisation passes
     bool llvm_opt_passes(false);
 
-    /// llvm vector width;
+    /// llvm vector width
     int llvm_vec_width = 1;
+
+    /// run llvm benchmark
+    bool run_benchmark(false);
+
+    /// the size of the instance struct for the benchmark
+    int instance_size = 10000;
+
+    /// the number of experiments to run for the benchmarking
+    int repeat = 100;
+
+    /// specify the backend for LLVM IR to target
+    std::string backend = "default";
 #endif
 
     // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers,readability-magic-numbers)
@@ -298,6 +311,8 @@ int main(int argc, const char* argv[]) {
         fmt::format("Optimize copies of ion variables ({})", optimize_ionvar_copies_codegen))->ignore_case();
 
 #ifdef NMODL_LLVM_BACKEND
+
+    // LLVM IR code generation options.
     auto llvm_opt = app.add_subcommand("llvm", "LLVM code generation option")->ignore_case();
     llvm_opt->add_flag("--ir",
         llvm_ir,
@@ -311,6 +326,21 @@ int main(int argc, const char* argv[]) {
     llvm_opt->add_option("--vector-width",
         llvm_vec_width,
         fmt::format("LLVM explicit vectorisation width ({})", llvm_vec_width))->ignore_case();
+
+    // LLVM IR benchmark options.
+    auto benchmark_opt = app.add_subcommand("benchmark", "LLVM benchmark option")->ignore_case();
+    benchmark_opt->add_flag("--run",
+                       run_benchmark,
+                       fmt::format("Run LLVM benchmark ({})", run_benchmark))->ignore_case();
+    benchmark_opt->add_option("--instance-size",
+                       instance_size,
+                       fmt::format("Instance struct size ({})", instance_size))->ignore_case();
+    benchmark_opt->add_option("--repeat",
+                       repeat,
+                       fmt::format("Number of experiments for benchmarking ({})", repeat))->ignore_case();
+    benchmark_opt->add_option("--backend",
+                       backend,
+                       fmt::format("Target's backend ({})", backend))->ignore_case()->check(CLI::IsMember({"avx2", "default", "sse2"}));;
 #endif
     // clang-format on
 
@@ -622,7 +652,16 @@ int main(int argc, const char* argv[]) {
             }
 
 #ifdef NMODL_LLVM_BACKEND
-            if (llvm_ir) {
+
+            if (run_benchmark) {
+                logger->info("Running LLVM benchmark");
+                benchmark::LLVMBuildInfo info{llvm_vec_width, llvm_opt_passes, llvm_float_type};
+                benchmark::LLVMBenchmark bench(
+                    modfile, output_dir, info, repeat, instance_size, backend);
+                bench.benchmark(ast);
+            }
+
+            else if (llvm_ir) {
                 logger->info("Running LLVM backend code generator");
                 CodegenLLVMVisitor visitor(
                     modfile, output_dir, llvm_opt_passes, llvm_float_type, llvm_vec_width);
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index e5f10180cb..a82ff8ff19 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -95,6 +95,7 @@ target_link_libraries(
   test_util
   printer
   ${NMODL_WRAPPER_LIBS})
+
 target_link_libraries(
   testcodegen
   codegen
@@ -108,6 +109,10 @@ target_link_libraries(
 
 if(NMODL_ENABLE_LLVM)
   include_directories(${LLVM_INCLUDE_DIRS} codegen)
+
+  add_library(llvm_benchmark STATIC codegen/codegen_data_helper.cpp)
+  add_dependencies(llvm_benchmark lexer)
+
   add_executable(testllvm visitor/main.cpp codegen/codegen_llvm_ir.cpp
                           codegen/codegen_data_helper.cpp codegen/codegen_llvm_instance_struct.cpp)
   add_executable(test_llvm_runner visitor/main.cpp codegen/codegen_data_helper.cpp
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index e5d0904b6e..d0d0408853 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -304,7 +304,7 @@ SCENARIO("Simple scalar kernel", "[llvm][runner]") {
                                                  /*use_single_precision=*/false,
                                                  /*vector_width=*/1);
         llvm_visitor.visit_program(*ast);
-        llvm_visitor.wrap_kernel_function("nrn_state_test");
+        llvm_visitor.wrap_kernel_functions();
 
         // Create the instance struct data.
         int num_elements = 4;
@@ -385,7 +385,7 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
                                                  /*use_single_precision=*/false,
                                                  /*vector_width=*/4);
         llvm_visitor.visit_program(*ast);
-        llvm_visitor.wrap_kernel_function("nrn_state_test");
+        llvm_visitor.wrap_kernel_functions();
 
         // Create the instance struct data.
         int num_elements = 10;

From 8b2d59828e24bdb13d4cce16e8f174421afdf62e Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Sun, 18 Apr 2021 23:47:28 +0200
Subject: [PATCH 040/105] Minor benchmarking improvement (#593)

- allocate instance data only once
- store memory size with instance data
- print memory size while running benchmarking kernel
---
 src/codegen/llvm/llvm_benchmark.cpp       | 9 +++++----
 test/unit/codegen/codegen_data_helper.cpp | 2 ++
 test/unit/codegen/codegen_data_helper.hpp | 3 +++
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/codegen/llvm/llvm_benchmark.cpp b/src/codegen/llvm/llvm_benchmark.cpp
index 57e0d05c5b..6ab9ff4982 100644
--- a/src/codegen/llvm/llvm_benchmark.cpp
+++ b/src/codegen/llvm/llvm_benchmark.cpp
@@ -107,14 +107,15 @@ void LLVMBenchmark::run_benchmark(codegen::CodegenLLVMVisitor& visitor,
 
     // Benchmark every kernel.
     for (const auto& kernel_name: kernel_names) {
-        *log_stream << "Benchmarking kernel '" << kernel_name << "'\n";
+        // Initialise the data.
+        auto instance_data = codegen_data.create_data(instance_size, /*seed=*/1);
+
+        double size_mbs = instance_data.num_bytes / (1024.0 * 1024.0);
+        *log_stream << "Benchmarking kernel '" << kernel_name << ", with " << size_mbs << " MBs\n";
 
         // For every kernel run the benchmark `num_experiments` times.
         double time_sum = 0.0;
         for (int i = 0; i < num_experiments; ++i) {
-            // Initialise the data.
-            auto instance_data = codegen_data.create_data(instance_size, /*seed=*/1);
-
             // Record the execution time of the kernel.
             std::string wrapper_name = "__" + kernel_name + "_wrapper";
             auto start = std::chrono::high_resolution_clock::now();
diff --git a/test/unit/codegen/codegen_data_helper.cpp b/test/unit/codegen/codegen_data_helper.cpp
index e42cfe01f3..4bf94f583d 100644
--- a/test/unit/codegen/codegen_data_helper.cpp
+++ b/test/unit/codegen/codegen_data_helper.cpp
@@ -88,6 +88,7 @@ CodegenInstanceData CodegenDataHelper::create_data(size_t num_elements, size_t s
     // allocate instance object with memory alignment
     posix_memalign(&base, NBYTE_ALIGNMENT, member_size * variables.size());
     data.base_ptr = base;
+    data.num_bytes += member_size * variables.size();
 
     size_t offset = 0;
     void* ptr = base;
@@ -115,6 +116,7 @@ CodegenInstanceData CodegenDataHelper::create_data(size_t num_elements, size_t s
         void* member;
         posix_memalign(&member, NBYTE_ALIGNMENT, member_size * num_elements);
         initialize_variable(var, member, variable_index, num_elements);
+        data.num_bytes += member_size * num_elements;
 
         // copy address at specific location in the struct
         memcpy(ptr, &member, sizeof(double*));
diff --git a/test/unit/codegen/codegen_data_helper.hpp b/test/unit/codegen/codegen_data_helper.hpp
index 368b964147..ef8e869366 100644
--- a/test/unit/codegen/codegen_data_helper.hpp
+++ b/test/unit/codegen/codegen_data_helper.hpp
@@ -46,6 +46,9 @@ struct CodegenInstanceData {
     /// i.e. *(base_ptr + offsets[0]) will be members[0]
     std::vector<void*> members;
 
+    /// size in bytes
+    size_t num_bytes = 0;
+
     // cleanup all memory allocated for type and member variables
     ~CodegenInstanceData();
 };

From 2d67af2fbae964784a615e4de6a72d9d94e0f75a Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Mon, 19 Apr 2021 19:22:07 +0200
Subject: [PATCH 041/105] Bug fix in codegen helper: delete LOCAL statement
 (#595)

- LOCAL statement was not deleted correctly
- Instead of getting first element from statement vector,
   use local statement pointer to erase it from the node.

Related to #594
---
 src/codegen/llvm/codegen_llvm_helper_visitor.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 00ea3de297..38678df3a1 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -473,12 +473,13 @@ void CodegenLLVMHelperVisitor::convert_local_statement(ast::StatementBlock& node
         }
 
         /// remove local list statement now
-        const auto& statements = node.get_statements();
-        node.erase_statement(statements.begin());
+        std::unordered_set<nmodl::ast::Statement*> to_delete({local_statement.get()});
+        node.erase_statement(to_delete);
 
         /// create new codegen variable statement and insert at the beginning of the block
         auto type = new ast::CodegenVarType(FLOAT_TYPE);
         auto statement = std::make_shared<ast::CodegenVarListStatement>(type, variables);
+        const auto& statements = node.get_statements();
         node.insert_statement(statements.begin(), statement);
     }
 }

From c884dd8fb8c8f26466b93e1431938442e70bdee2 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 20 Apr 2021 21:36:39 +0300
Subject: [PATCH 042/105] LLVM 13 compatibility and fixing void* type (#603)

* Made compatible with LLVM 13 and replaced void* with i8*
---
 cmake/LLVMHelper.cmake                    | 9 ++++++++-
 src/codegen/llvm/codegen_llvm_visitor.cpp | 3 +--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake
index a731fa0151..e27ac8d553 100644
--- a/cmake/LLVMHelper.cmake
+++ b/cmake/LLVMHelper.cmake
@@ -5,7 +5,14 @@
 find_package(LLVM REQUIRED CONFIG)
 
 # include LLVM header and core library
-llvm_map_components_to_libnames(LLVM_LIBS_TO_LINK core orcjit native)
+llvm_map_components_to_libnames(
+  LLVM_LIBS_TO_LINK
+  core
+  instcombine
+  native
+  orcjit
+  scalaropts
+  support)
 set(CMAKE_REQUIRED_INCLUDES ${LLVM_INCLUDE_DIRS})
 set(CMAKE_REQUIRED_LIBRARIES ${LLVM_LIBS_TO_LINK})
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index ea7e828035..cd42fffae3 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -1007,9 +1007,8 @@ void CodegenLLVMVisitor::wrap_kernel_functions() {
                                      " does not have an instance struct pointer argument!");
 
         // Create a wrapper void function that takes a void pointer as a single argument.
-        llvm::Type* void_type = llvm::Type::getVoidTy(*context);
         llvm::Type* i32_type = llvm::Type::getInt32Ty(*context);
-        llvm::Type* void_ptr_type = llvm::PointerType::get(void_type, /*AddressSpace=*/0);
+        llvm::Type* void_ptr_type = llvm::Type::getInt8PtrTy(*context);
         llvm::Function* wrapper_func = llvm::Function::Create(
             llvm::FunctionType::get(i32_type, {void_ptr_type}, /*isVarArg=*/false),
             llvm::Function::ExternalLinkage,

From 3c38a2e9421a9f0bf32839d8ea8a5985b3fed72c Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Tue, 20 Apr 2021 23:31:40 +0200
Subject: [PATCH 043/105] Allow LOCAL variable inside StatementBlock for LLVM
 IR generation (#599)

  - if LOCAL variable was declared inside DERIVATIVE block then
    we were getting error:
      "Stored value type does not match pointer operand type!"
  - the error was happening because scalar variable from epilogue
    loop was conflicting with the vector type variable in main loop
  - to avoid conflict between main and epilogue loop, rename all
    local variables in epilogue.
  - bug fix for recursive handling of LocalList statement

fixes #594
---
 .../llvm/codegen_llvm_helper_visitor.cpp      | 57 ++++++++++++++++---
 .../llvm/codegen_llvm_helper_visitor.hpp      |  1 +
 2 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 38678df3a1..420a08283b 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -459,12 +459,13 @@ void CodegenLLVMHelperVisitor::convert_to_instance_variable(ast::Node& node,
  * it to CodegenVarListStatement that will represent all variables as double.
  */
 void CodegenLLVMHelperVisitor::convert_local_statement(ast::StatementBlock& node) {
-    /// first process all children blocks if any
-    node.visit_children(*this);
+    /// collect all local statement block
+    const auto& statements = collect_nodes(node, {ast::AstNodeType::LOCAL_LIST_STATEMENT});
+
+    /// iterate over all statements and replace each with codegen variable
+    for (const auto& statement: statements) {
+        const auto& local_statement = std::dynamic_pointer_cast<ast::LocalListStatement>(statement);
 
-    /// check if block contains LOCAL statement
-    const auto& local_statement = visitor::get_local_list_statement(node);
-    if (local_statement) {
         /// create codegen variables from local variables
         /// clone variable to make new independent statement
         ast::CodegenVarVector variables;
@@ -474,16 +475,51 @@ void CodegenLLVMHelperVisitor::convert_local_statement(ast::StatementBlock& node
 
         /// remove local list statement now
         std::unordered_set<nmodl::ast::Statement*> to_delete({local_statement.get()});
-        node.erase_statement(to_delete);
+        /// local list statement is enclosed in statement block
+        const auto& parent_node = dynamic_cast<ast::StatementBlock*>(local_statement->get_parent());
+        parent_node->erase_statement(to_delete);
 
         /// create new codegen variable statement and insert at the beginning of the block
         auto type = new ast::CodegenVarType(FLOAT_TYPE);
-        auto statement = std::make_shared<ast::CodegenVarListStatement>(type, variables);
-        const auto& statements = node.get_statements();
-        node.insert_statement(statements.begin(), statement);
+        auto new_statement = std::make_shared<ast::CodegenVarListStatement>(type, variables);
+        const auto& statements = parent_node->get_statements();
+        parent_node->insert_statement(statements.begin(), new_statement);
     }
 }
 
+/**
+ * \brief Visit StatementBlock and rename all LOCAL variables
+ * @param node AST node representing Statement block
+ *
+ * Statement block in remainder loop will have same LOCAL variables from
+ * main loop. In order to avoid conflict during lookup, rename each local
+ * variable by appending unique number. The number used as suffix is just
+ * a counter used for Statement block.
+ */
+void CodegenLLVMHelperVisitor::rename_local_variables(ast::StatementBlock& node) {
+    /// local block counter just to append unique number
+    static int local_block_counter = 1;
+
+    /// collect all local statement block
+    const auto& statements = collect_nodes(node, {ast::AstNodeType::LOCAL_LIST_STATEMENT});
+
+    /// iterate over each statement and rename all variables
+    for (const auto& statement: statements) {
+        const auto& local_statement = std::dynamic_pointer_cast<ast::LocalListStatement>(statement);
+
+        /// rename local variable in entire statement block
+        for (auto& var: local_statement->get_variables()) {
+            std::string old_name = var->get_node_name();
+            std::string new_name = "{}_{}"_format(old_name, local_block_counter);
+            visitor::RenameVisitor(old_name, new_name).visit_statement_block(node);
+        }
+    }
+
+    /// make it unique for next statement block
+    local_block_counter++;
+}
+
+
 void CodegenLLVMHelperVisitor::visit_procedure_block(ast::ProcedureBlock& node) {
     node.visit_children(*this);
     create_function_for_node(node);
@@ -661,6 +697,9 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
             loop_count_expression(INDUCTION_VAR, NODECOUNT_VAR, /*vector_width=*/1);
         const auto& increment = loop_increment_expression(INDUCTION_VAR, /*vector_width=*/1);
 
+        /// rename local variables to avoid conflict with main loop
+        rename_local_variables(*loop_block);
+
         /// convert local statement to codegenvar statement
         convert_local_statement(*loop_block);
 
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index 446d5a6fd9..bbff588675 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -163,6 +163,7 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     void convert_to_instance_variable(ast::Node& node, std::string& index_var);
 
     void convert_local_statement(ast::StatementBlock& node);
+    void rename_local_variables(ast::StatementBlock& node);
 
     void visit_procedure_block(ast::ProcedureBlock& node) override;
     void visit_function_block(ast::FunctionBlock& node) override;

From 1f250eeaf1e5e9f10376f968b945f9c315db153b Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Thu, 22 Apr 2021 17:11:09 +0200
Subject: [PATCH 044/105] Update CI with LLVM v13 (trunk) (#605)

 * In order to use VecLibReplace pass, we need LLVM 13 / trunk
 * Change ubuntu image on azure from 16.04 to 18.04
 * Install llvm-13 nightly snapshot
 * Enable LLVM build on Ubuntu
 * For Mac OS use pre-built binary package from https://github.com/pramodk/llvm-nightly
 * We will see if we get OS X bottle from BlueBrain/homebrew-tap/pull/7
---
 azure-pipelines.yml | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 47dea2d1a7..f8cac1d6ad 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -99,6 +99,10 @@ stages:
         url="https://github.com/ispc/ispc/releases/download/${ispc_version}/ispc-${ispc_version}${ispc_version_suffix}-${url_os}.tar.gz";
         mkdir $(pwd)/$CMAKE_PKG/ispc
         wget --quiet --output-document=- $url | tar -xvzf - -C $(pwd)/$CMAKE_PKG/ispc --strip 1;
+        # install llvm nightly (future v13) TODO: this will fail now, FIX this!
+        wget https://apt.llvm.org/llvm.sh
+        chmod +x llvm.sh
+        sudo ./llvm.sh 13
       env:
         CMAKE_VER: 'v3.15.0'
         CMAKE_PKG: 'cmake-3.15.0-Linux-x86_64'
@@ -109,7 +113,7 @@ stages:
         mkdir -p $(Build.Repository.LocalPath)/build
         cd $(Build.Repository.LocalPath)/build
         cmake --version
-        cmake .. -DPYTHON_EXECUTABLE=$(which python3.7) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=Release -DNMODL_ENABLE_LLVM=OFF
+        cmake .. -DPYTHON_EXECUTABLE=$(which python3.7) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=Release -DNMODL_ENABLE_LLVM=ON -DLLVM_DIR=/usr/lib/llvm-13/share/llvm/cmake/
         make -j 2
         if [ $? -ne 0 ]
         then
@@ -178,8 +182,8 @@ stages:
       displayName: 'Build CoreNEURON and Run Integration Tests with ISPC compiler'
   - job: 'osx11'
     pool:
-      vmImage: 'macOS-11'
-    displayName: 'MacOS (11), AppleClang 12.0'
+      vmImage: 'macOS-10.15'
+    displayName: 'MacOS (10.15), AppleClang 13.0 (trunk, May 2021)'
     steps:
     - checkout: self
       submodules: True
@@ -188,11 +192,15 @@ stages:
         python3 -m pip install --upgrade pip setuptools
         python3 -m pip install --user 'Jinja2>=2.9.3' 'PyYAML>=3.13' pytest pytest-cov numpy 'sympy>=1.3'
       displayName: 'Install Dependencies'
+    - script: |
+        cd $HOME
+        git clone https://github.com/pramodk/llvm-nightly.git
+      displayName: 'Setup LLVM v13'
     - script: |
         export PATH=/usr/local/opt/flex/bin:/usr/local/opt/bison/bin:$PATH;
         mkdir -p $(Build.Repository.LocalPath)/build
         cd $(Build.Repository.LocalPath)/build
-        cmake .. -DPYTHON_EXECUTABLE=$(which python3) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=RelWithDebInfo -DNMODL_ENABLE_PYTHON_BINDINGS=OFF -DLLVM_DIR=`brew --prefix llvm`/lib/cmake/llvm -DNMODL_ENABLE_LLVM=ON
+        cmake .. -DPYTHON_EXECUTABLE=$(which python3) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=RelWithDebInfo -DNMODL_ENABLE_PYTHON_BINDINGS=OFF -DLLVM_DIR=$HOME/llvm-nightly/0421/osx/lib/cmake/llvm -DNMODL_ENABLE_LLVM=ON
         make -j 2
         if [ $? -ne 0 ]
         then
@@ -289,6 +297,7 @@ stages:
     - template: ci/upload-wheels.yml
   - job: 'macos_wheels'
     timeoutInMinutes: 45
+    condition: eq(1,2)
     pool:
       vmImage: 'macOS-11'
     strategy:

From f6dee6ecc34f652e2e2a2f35bdf228c4cd83f285 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 22 Apr 2021 09:08:33 -0700
Subject: [PATCH 045/105] Integrating vector maths library into LLVM codegen
 (#604)

Added support for replacing LLVM IR maths intrinsics with vector
maths functions from Accelerate, libmvec, MASSV, and SVML. To
trigger the replacement, a new `--veclib` option should be used.
This is only supported on LLVM 13+.

Example:
```
$ bin/nmodl hh.mod llvm --ir --vector-width 4 --veclib SVML
```

fixes #589

Co-authored-by: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
---
 CMakeLists.txt                            |  3 +
 cmake/LLVMHelper.cmake                    |  3 +
 src/codegen/llvm/codegen_llvm_visitor.cpp | 52 ++++++++++--
 src/codegen/llvm/codegen_llvm_visitor.hpp | 29 ++++++-
 src/main.cpp                              | 26 ++++--
 test/unit/codegen/codegen_llvm_ir.cpp     | 98 ++++++++++++++++++++++-
 6 files changed, 191 insertions(+), 20 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a98eee7cfc..e5a346f96a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -183,6 +183,9 @@ if(NMODL_ENABLE_LLVM)
   include(cmake/LLVMHelper.cmake)
   include_directories(${LLVM_INCLUDE_DIRS})
   add_definitions(-DNMODL_LLVM_BACKEND)
+  if(LLVM_VERSION VERSION_LESS_EQUAL 12)
+    add_definitions(-DLLVM_VERSION_LESS_THAN_13)
+  endif()
 endif()
 
 # =============================================================================
diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake
index e27ac8d553..f81a5a62e8 100644
--- a/cmake/LLVMHelper.cmake
+++ b/cmake/LLVMHelper.cmake
@@ -7,8 +7,11 @@ find_package(LLVM REQUIRED CONFIG)
 # include LLVM header and core library
 llvm_map_components_to_libnames(
   LLVM_LIBS_TO_LINK
+  analysis
+  codegen
   core
   instcombine
+  mc
   native
   orcjit
   scalaropts
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index cd42fffae3..1738d4139e 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -11,6 +11,7 @@
 #include "visitors/rename_visitor.hpp"
 #include "visitors/visitor_utils.hpp"
 
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
@@ -19,8 +20,13 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/ToolOutputFile.h"
 
+#ifndef LLVM_VERSION_LESS_THAN_13
+#include "llvm/CodeGen/ReplaceWithVeclib.h"
+#endif
+
 namespace nmodl {
 namespace codegen {
 
@@ -292,21 +298,21 @@ std::shared_ptr<ast::InstanceStruct> CodegenLLVMVisitor::get_instance_struct_ptr
     return instance_var_helper.instance;
 }
 
-void CodegenLLVMVisitor::run_llvm_opt_passes() {
+void CodegenLLVMVisitor::run_ir_opt_passes() {
     /// run some common optimisation passes that are commonly suggested
-    fpm.add(llvm::createInstructionCombiningPass());
-    fpm.add(llvm::createReassociatePass());
-    fpm.add(llvm::createGVNPass());
-    fpm.add(llvm::createCFGSimplificationPass());
+    opt_pm.add(llvm::createInstructionCombiningPass());
+    opt_pm.add(llvm::createReassociatePass());
+    opt_pm.add(llvm::createGVNPass());
+    opt_pm.add(llvm::createCFGSimplificationPass());
 
     /// initialize pass manager
-    fpm.doInitialization();
+    opt_pm.doInitialization();
 
     /// iterate over all functions and run the optimisation passes
     auto& functions = module->getFunctionList();
     for (auto& function: functions) {
         llvm::verifyFunction(function);
-        fpm.run(function);
+        opt_pm.run(function);
     }
 }
 
@@ -892,7 +898,37 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
 
     if (opt_passes) {
         logger->info("Running LLVM optimisation passes");
-        run_llvm_opt_passes();
+        run_ir_opt_passes();
+    }
+
+    // Optionally, replace LLVM's maths intrinsics with vector library calls.
+    if (vector_width > 1 && vector_library != llvm::TargetLibraryInfoImpl::NoLibrary) {
+#ifdef LLVM_VERSION_LESS_THAN_13
+        logger->warn(
+            "This version of LLVM does not support replacement of LLVM intrinsics with vector "
+            "library calls");
+#else
+        // First, get the target library information.
+        llvm::Triple triple(llvm::sys::getDefaultTargetTriple());
+        llvm::TargetLibraryInfoImpl target_lib_info = llvm::TargetLibraryInfoImpl(triple);
+
+        // Populate target library information with vectorisable functions. Since libmvec is
+        // supported for x86_64 only, have a check to catch other architectures.
+        if (vector_library != llvm::TargetLibraryInfoImpl::LIBMVEC_X86 ||
+            (triple.isX86() && triple.isArch64Bit())) {
+            target_lib_info.addVectorizableFunctionsFromVecLib(vector_library);
+        }
+
+        // Run the codegen optimisation passes that replace maths intrinsics.
+        codegen_pm.add(new llvm::TargetLibraryInfoWrapperPass(target_lib_info));
+        codegen_pm.add(new llvm::ReplaceWithVeclibLegacy);
+        codegen_pm.doInitialization();
+        for (auto& function: module->getFunctionList()) {
+            if (!function.isDeclaration())
+                codegen_pm.run(function);
+        }
+        codegen_pm.doFinalization();
+#endif
     }
 
     // If the output directory is specified, save the IR to .ll file.
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 1007258010..099613f8d4 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -23,6 +23,7 @@
 #include "utils/logger.hpp"
 #include "visitors/ast_visitor.hpp"
 
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -45,6 +46,16 @@ namespace codegen {
  * @{
  */
 
+/// A map to query vector library by its string value.
+static const std::map<std::string, llvm::TargetLibraryInfoImpl::VectorLibrary> veclib_map = {
+    {"Accelerate", llvm::TargetLibraryInfoImpl::Accelerate},
+#ifndef LLVM_VERSION_LESS_THAN_13
+    {"libmvec", llvm::TargetLibraryInfoImpl::LIBMVEC_X86},
+#endif
+    {"MASSV", llvm::TargetLibraryInfoImpl::MASSV},
+    {"SVML", llvm::TargetLibraryInfoImpl::SVML},
+    {"none", llvm::TargetLibraryInfoImpl::NoLibrary}};
+
 /**
  * \class CodegenLLVMVisitor
  * \brief %Visitor for transforming NMODL AST to LLVM IR
@@ -65,7 +76,14 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
 
     llvm::IRBuilder<> builder;
 
-    llvm::legacy::FunctionPassManager fpm;
+    // Pass manager for optimisation passes that are used for target code generation.
+    llvm::legacy::FunctionPassManager codegen_pm;
+
+    // Vector library used for maths functions.
+    llvm::TargetLibraryInfoImpl::VectorLibrary vector_library;
+
+    // Pass manager for optimisation passes that are run on IR and are not related to target.
+    llvm::legacy::FunctionPassManager opt_pm;
 
     // Stack to hold visited values
     std::vector<llvm::Value*> values;
@@ -97,7 +115,7 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
      * LLVM provides number of optimisation passes that can be run on the generated IR.
      * Here we run common optimisation LLVM passes that benefits code optimisation.
      */
-    void run_llvm_opt_passes();
+    void run_ir_opt_passes();
 
   public:
     /**
@@ -110,14 +128,17 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
                        const std::string& output_dir,
                        bool opt_passes,
                        bool use_single_precision = false,
-                       int vector_width = 1)
+                       int vector_width = 1,
+                       std::string vec_lib = "none")
         : mod_filename(mod_filename)
         , output_dir(output_dir)
         , opt_passes(opt_passes)
         , use_single_precision(use_single_precision)
         , vector_width(vector_width)
+        , vector_library(veclib_map.at(vec_lib))
         , builder(*context)
-        , fpm(module.get()) {}
+        , codegen_pm(module.get())
+        , opt_pm(module.get()) {}
 
 
     /**
diff --git a/src/main.cpp b/src/main.cpp
index 8d316763ea..c5e42673ef 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -174,11 +174,14 @@ int main(int argc, const char* argv[]) {
     bool llvm_float_type(false);
 
     /// run llvm optimisation passes
-    bool llvm_opt_passes(false);
+    bool llvm_ir_opt_passes(false);
 
     /// llvm vector width
     int llvm_vec_width = 1;
 
+    /// vector library
+    std::string vec_lib("none");
+
     /// run llvm benchmark
     bool run_benchmark(false);
 
@@ -318,14 +321,17 @@ int main(int argc, const char* argv[]) {
         llvm_ir,
         fmt::format("Generate LLVM IR ({})", llvm_ir))->ignore_case();
     llvm_opt->add_flag("--opt",
-        llvm_opt_passes,
-        fmt::format("Run LLVM optimisation passes ({})", llvm_opt_passes))->ignore_case();
+                       llvm_ir_opt_passes,
+                       fmt::format("Run LLVM optimisation passes ({})", llvm_ir_opt_passes))->ignore_case();
     llvm_opt->add_flag("--single-precision",
                        llvm_float_type,
                        fmt::format("Use single precision floating-point types ({})", llvm_float_type))->ignore_case();
     llvm_opt->add_option("--vector-width",
         llvm_vec_width,
         fmt::format("LLVM explicit vectorisation width ({})", llvm_vec_width))->ignore_case();
+    llvm_opt->add_option("--veclib",
+                         vec_lib,
+                         fmt::format("Vector library for maths functions ({})", vec_lib))->check(CLI::IsMember({"Accelerate", "libmvec", "MASSV", "SVML", "none"}));
 
     // LLVM IR benchmark options.
     auto benchmark_opt = app.add_subcommand("benchmark", "LLVM benchmark option")->ignore_case();
@@ -340,7 +346,11 @@ int main(int argc, const char* argv[]) {
                        fmt::format("Number of experiments for benchmarking ({})", repeat))->ignore_case();
     benchmark_opt->add_option("--backend",
                        backend,
+<<<<<<< HEAD
                        fmt::format("Target's backend ({})", backend))->ignore_case()->check(CLI::IsMember({"avx2", "default", "sse2"}));;
+=======
+                       "Target's backend ({})"_format(backend))->ignore_case()->check(CLI::IsMember({"avx2", "default", "sse2"}));
+>>>>>>> 88db707d (Integrating vector maths library into LLVM codegen (#604))
 #endif
     // clang-format on
 
@@ -655,7 +665,7 @@ int main(int argc, const char* argv[]) {
 
             if (run_benchmark) {
                 logger->info("Running LLVM benchmark");
-                benchmark::LLVMBuildInfo info{llvm_vec_width, llvm_opt_passes, llvm_float_type};
+                benchmark::LLVMBuildInfo info{llvm_vec_width, llvm_ir_opt_passes, llvm_float_type};
                 benchmark::LLVMBenchmark bench(
                     modfile, output_dir, info, repeat, instance_size, backend);
                 bench.benchmark(ast);
@@ -663,8 +673,12 @@ int main(int argc, const char* argv[]) {
 
             else if (llvm_ir) {
                 logger->info("Running LLVM backend code generator");
-                CodegenLLVMVisitor visitor(
-                    modfile, output_dir, llvm_opt_passes, llvm_float_type, llvm_vec_width);
+                CodegenLLVMVisitor visitor(modfile,
+                                           output_dir,
+                                           llvm_ir_opt_passes,
+                                           llvm_float_type,
+                                           llvm_vec_width,
+                                           vec_lib);
                 visitor.visit_program(*ast);
                 ast_to_nmodl(*ast, filepath("llvm", "mod"));
                 ast_to_json(*ast, filepath("llvm", "json"));
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index ec7319bfa6..1b0f236e3d 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -36,7 +36,8 @@ using nmodl::parser::NmodlDriver;
 std::string run_llvm_visitor(const std::string& text,
                              bool opt = false,
                              bool use_single_precision = false,
-                             int vector_width = 1) {
+                             int vector_width = 1,
+                             std::string vec_lib = "none") {
     NmodlDriver driver;
     const auto& ast = driver.parse_string(text);
 
@@ -48,7 +49,8 @@ std::string run_llvm_visitor(const std::string& text,
                                              /*output_dir=*/".",
                                              opt,
                                              use_single_precision,
-                                             vector_width);
+                                             vector_width,
+                                             vec_lib);
     llvm_visitor.visit_program(*ast);
     return llvm_visitor.dump_module();
 }
@@ -1056,6 +1058,98 @@ SCENARIO("Vectorised derivative block", "[visitor][llvm][derivative]") {
     }
 }
 
+//=============================================================================
+// Vector library calls.
+//=============================================================================
+
+SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
+    GIVEN("A vector LLVM intrinsic") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX hh
+                NONSPECIFIC_CURRENT il
+            }
+            STATE {
+                m
+            }
+            ASSIGNED {
+                v (mV)
+            }
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+                il = 2
+            }
+            DERIVATIVE states {
+                m = exp(m)
+            }
+        )";
+
+        THEN("it is replaced with an appropriate vector library call") {
+            std::smatch m;
+
+            // Check exponential intrinsic is created.
+            std::string no_library_module_str = run_llvm_visitor(nmodl_text,
+                                                                 /*opt=*/false,
+                                                                 /*use_single_precision=*/false,
+                                                                 /*vector_width=*/2);
+            std::regex exp_decl(R"(declare <2 x double> @llvm\.exp\.v2f64\(<2 x double>\))");
+            std::regex exp_call(R"(call <2 x double> @llvm\.exp\.v2f64\(<2 x double> .*\))");
+            REQUIRE(std::regex_search(no_library_module_str, m, exp_decl));
+            REQUIRE(std::regex_search(no_library_module_str, m, exp_call));
+
+#ifndef LLVM_VERSION_LESS_THAN_13
+            // Check exponential calls are replaced with calls to SVML library.
+            std::string svml_library_module_str = run_llvm_visitor(nmodl_text,
+                                                                   /*opt=*/false,
+                                                                   /*use_single_precision=*/false,
+                                                                   /*vector_width=*/2,
+                                                                   /*vec_lib=*/"SVML");
+            std::regex svml_exp_decl(R"(declare <2 x double> @__svml_exp2\(<2 x double>\))");
+            std::regex svml_exp_call(R"(call <2 x double> @__svml_exp2\(<2 x double> .*\))");
+            REQUIRE(std::regex_search(svml_library_module_str, m, svml_exp_decl));
+            REQUIRE(std::regex_search(svml_library_module_str, m, svml_exp_call));
+            REQUIRE(!std::regex_search(svml_library_module_str, m, exp_call));
+
+            // Check that supported exponential calls are replaced with calls to MASSV library (i.e.
+            // operating on vector of width 2).
+            std::string massv2_library_module_str = run_llvm_visitor(nmodl_text,
+                                                                     /*opt=*/false,
+                                                                     /*use_single_precision=*/false,
+                                                                     /*vector_width=*/2,
+                                                                     /*vec_lib=*/"MASSV");
+            std::regex massv2_exp_decl(R"(declare <2 x double> @__expd2_P8\(<2 x double>\))");
+            std::regex massv2_exp_call(R"(call <2 x double> @__expd2_P8\(<2 x double> .*\))");
+            REQUIRE(std::regex_search(massv2_library_module_str, m, massv2_exp_decl));
+            REQUIRE(std::regex_search(massv2_library_module_str, m, massv2_exp_call));
+            REQUIRE(!std::regex_search(massv2_library_module_str, m, exp_call));
+
+            // Check no replacement for MASSV happens for non-supported vector widths.
+            std::string massv4_library_module_str = run_llvm_visitor(nmodl_text,
+                                                                     /*opt=*/false,
+                                                                     /*use_single_precision=*/false,
+                                                                     /*vector_width=*/4,
+                                                                     /*vec_lib=*/"MASSV");
+            std::regex exp4_call(R"(call <4 x double> @llvm\.exp\.v4f64\(<4 x double> .*\))");
+            REQUIRE(std::regex_search(massv4_library_module_str, m, exp4_call));
+
+            // Check correct replacement of @llvm.exp.v4f32 into @vexpf when using Accelerate.
+            std::string accelerate_library_module_str =
+                run_llvm_visitor(nmodl_text,
+                                 /*opt=*/false,
+                                 /*use_single_precision=*/true,
+                                 /*vector_width=*/4,
+                                 /*vec_lib=*/"Accelerate");
+            std::regex accelerate_exp_decl(R"(declare <4 x float> @vexpf\(<4 x float>\))");
+            std::regex accelerate_exp_call(R"(call <4 x float> @vexpf\(<4 x float> .*\))");
+            std::regex fexp_call(R"(call <4 x float> @llvm\.exp\.v4f32\(<4 x float> .*\))");
+            REQUIRE(std::regex_search(accelerate_library_module_str, m, accelerate_exp_decl));
+            REQUIRE(std::regex_search(accelerate_library_module_str, m, accelerate_exp_call));
+            REQUIRE(!std::regex_search(accelerate_library_module_str, m, fexp_call));
+#endif
+        }
+    }
+}
+
 //=============================================================================
 // Optimization : dead code removal
 //=============================================================================

From b50027a54afcd71ab9b040bcc288492989cdd9d4 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 22 Apr 2021 16:25:35 -0700
Subject: [PATCH 046/105] Using shared libraries in LLVM JIT (#609)

* Integrated veclibs in benchmark and added shared libs support for JIT
* Tested on BBP Ubuntu Linux box
* Make sure to set LD_LIBRARY_PATH for Intel library dir
---
 cmake/LLVMHelper.cmake              |  1 +
 src/codegen/llvm/jit_driver.cpp     | 68 +++++++++++++++++++++++++----
 src/codegen/llvm/jit_driver.hpp     | 11 +++--
 src/codegen/llvm/llvm_benchmark.cpp |  5 ++-
 src/codegen/llvm/llvm_benchmark.hpp |  5 +++
 src/main.cpp                        | 21 +++++----
 6 files changed, 89 insertions(+), 22 deletions(-)

diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake
index f81a5a62e8..2b7db94a85 100644
--- a/cmake/LLVMHelper.cmake
+++ b/cmake/LLVMHelper.cmake
@@ -10,6 +10,7 @@ llvm_map_components_to_libnames(
   analysis
   codegen
   core
+  executionengine
   instcombine
   mc
   native
diff --git a/src/codegen/llvm/jit_driver.cpp b/src/codegen/llvm/jit_driver.cpp
index 842c500810..ec08e8856d 100644
--- a/src/codegen/llvm/jit_driver.cpp
+++ b/src/codegen/llvm/jit_driver.cpp
@@ -11,9 +11,11 @@
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
 #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
 #include "llvm/ExecutionEngine/Orc/LLJIT.h"
+#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -22,27 +24,55 @@
 namespace nmodl {
 namespace runner {
 
-void JITDriver::init(std::string features) {
+void JITDriver::init(std::string features, std::vector<std::string>& lib_paths) {
     llvm::InitializeNativeTarget();
     llvm::InitializeNativeTargetAsmPrinter();
 
+    // Set the target triple and the data layout for the module.
+    set_triple_and_data_layout(features);
+    auto data_layout = module->getDataLayout();
+
+    // Create object linking function callback.
+    auto object_linking_layer_creator = [&](llvm::orc::ExecutionSession& session,
+                                            const llvm::Triple& triple) {
+        // Create linking layer.
+        auto layer = std::make_unique<llvm::orc::RTDyldObjectLinkingLayer>(session, []() {
+            return std::make_unique<llvm::SectionMemoryManager>();
+        });
+        for (const auto& lib_path: lib_paths) {
+            // For every library path, create a corresponding memory buffer.
+            auto memory_buffer = llvm::MemoryBuffer::getFile(lib_path);
+            if (!memory_buffer)
+                throw std::runtime_error("Unable to create memory buffer for " + lib_path);
+
+            // Create a new JIT library instance for this session and resolve symbols.
+            auto& jd = session.createBareJITDylib(std::string(lib_path));
+            auto loaded =
+                llvm::orc::DynamicLibrarySearchGenerator::Load(lib_path.data(),
+                                                               data_layout.getGlobalPrefix());
+
+            if (!loaded)
+                throw std::runtime_error("Unable to load " + lib_path);
+            jd.addGenerator(std::move(*loaded));
+            cantFail(layer->add(jd, std::move(*memory_buffer)));
+        }
+
+        return layer;
+    };
+
     // Create IR compile function callback.
     auto compile_function_creator = [&](llvm::orc::JITTargetMachineBuilder tm_builder)
         -> llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
         // Create target machine with some features possibly turned off.
         auto tm = create_target(&tm_builder, features);
-
-        // Set the target triple and the data layout for the module.
-        module->setDataLayout(tm->createDataLayout());
-        module->setTargetTriple(tm->getTargetTriple().getTriple());
-
         return std::make_unique<llvm::orc::TMOwningSimpleCompiler>(std::move(tm));
     };
 
     // Set JIT instance and extract the data layout from the module.
-    auto jit_instance = cantFail(
-        llvm::orc::LLJITBuilder().setCompileFunctionCreator(compile_function_creator).create());
-    auto data_layout = module->getDataLayout();
+    auto jit_instance = cantFail(llvm::orc::LLJITBuilder()
+                                     .setCompileFunctionCreator(compile_function_creator)
+                                     .setObjectLinkingLayerCreator(object_linking_layer_creator)
+                                     .create());
 
     // Add a ThreadSafeModule to the driver.
     llvm::orc::ThreadSafeModule tsm(std::move(module), std::make_unique<llvm::LLVMContext>());
@@ -80,5 +110,25 @@ std::unique_ptr<llvm::TargetMachine> JITDriver::create_target(
     return std::unique_ptr<llvm::TargetMachine>(tm);
 }
 
+void JITDriver::set_triple_and_data_layout(const std::string& features) {
+    // Get the default target triple for the host.
+    auto target_triple = llvm::sys::getDefaultTargetTriple();
+    std::string error_msg;
+    auto* target = llvm::TargetRegistry::lookupTarget(target_triple, error_msg);
+    if (!target)
+        throw std::runtime_error("Error " + error_msg + "\n");
+
+    // Get the CPU information and set a target machine to create the data layout.
+    std::string cpu(llvm::sys::getHostCPUName());
+
+    std::unique_ptr<llvm::TargetMachine> tm(
+        target->createTargetMachine(target_triple, cpu, features, {}, {}));
+    if (!tm)
+        throw std::runtime_error("Error: could not create the target machine\n");
+
+    // Set data layout and the target triple to the module.
+    module->setDataLayout(tm->createDataLayout());
+    module->setTargetTriple(target_triple);
+}
 }  // namespace runner
 }  // namespace nmodl
diff --git a/src/codegen/llvm/jit_driver.hpp b/src/codegen/llvm/jit_driver.hpp
index f994a57303..d46e605054 100644
--- a/src/codegen/llvm/jit_driver.hpp
+++ b/src/codegen/llvm/jit_driver.hpp
@@ -37,7 +37,7 @@ class JITDriver {
         : module(std::move(m)) {}
 
     /// Initialize the JIT.
-    void init(std::string features);
+    void init(std::string features, std::vector<std::string>& lib_paths);
 
     /// Lookup the entry-point without arguments in the JIT and execute it, returning the result.
     template <typename ReturnType>
@@ -66,6 +66,9 @@ class JITDriver {
     /// A wrapper around llvm::createTargetMachine to turn on/off certain CPU features.
     std::unique_ptr<llvm::TargetMachine> create_target(llvm::orc::JITTargetMachineBuilder* builder,
                                                        const std::string& features);
+
+    /// Sets the triple and the data layout for the module.
+    void set_triple_and_data_layout(const std::string& features);
 };
 
 /**
@@ -79,9 +82,11 @@ class Runner {
     std::unique_ptr<JITDriver> driver = std::make_unique<JITDriver>(std::move(module));
 
   public:
-    Runner(std::unique_ptr<llvm::Module> m, std::string features = "")
+    Runner(std::unique_ptr<llvm::Module> m,
+           std::string features = "",
+           std::vector<std::string> lib_paths = {})
         : module(std::move(m)) {
-        driver->init(features);
+        driver->init(features, lib_paths);
     }
 
     /// Run the entry-point function without arguments.
diff --git a/src/codegen/llvm/llvm_benchmark.cpp b/src/codegen/llvm/llvm_benchmark.cpp
index 6ab9ff4982..4c49ce30df 100644
--- a/src/codegen/llvm/llvm_benchmark.cpp
+++ b/src/codegen/llvm/llvm_benchmark.cpp
@@ -43,7 +43,8 @@ void LLVMBenchmark::benchmark(const std::shared_ptr<ast::Program>& node) {
                                         output_dir,
                                         llvm_build_info.opt_passes,
                                         llvm_build_info.use_single_precision,
-                                        llvm_build_info.vector_width);
+                                        llvm_build_info.vector_width,
+                                        llvm_build_info.vec_lib);
     generate_llvm(visitor, node);
 
     // Finally, run the benchmark and log the measurements.
@@ -103,7 +104,7 @@ void LLVMBenchmark::run_benchmark(codegen::CodegenLLVMVisitor& visitor,
 
     std::string features_str = llvm::join(features.begin(), features.end(), ",");
     std::unique_ptr<llvm::Module> m = visitor.get_module();
-    runner::Runner runner(std::move(m), features_str);
+    runner::Runner runner(std::move(m), features_str, shared_libs);
 
     // Benchmark every kernel.
     for (const auto& kernel_name: kernel_names) {
diff --git a/src/codegen/llvm/llvm_benchmark.hpp b/src/codegen/llvm/llvm_benchmark.hpp
index 30ebf182e8..d23567d79d 100644
--- a/src/codegen/llvm/llvm_benchmark.hpp
+++ b/src/codegen/llvm/llvm_benchmark.hpp
@@ -20,6 +20,7 @@ struct LLVMBuildInfo {
     int vector_width;
     bool opt_passes;
     bool use_single_precision;
+    std::string vec_lib;
 };
 
 /**
@@ -33,6 +34,8 @@ class LLVMBenchmark {
 
     std::string output_dir;
 
+    std::vector<std::string> shared_libs;
+
     int num_experiments;
 
     int instance_size;
@@ -65,12 +68,14 @@ class LLVMBenchmark {
   public:
     LLVMBenchmark(const std::string& mod_filename,
                   const std::string& output_dir,
+                  std::vector<std::string> shared_libs,
                   LLVMBuildInfo info,
                   int num_experiments,
                   int instance_size,
                   const std::string& backend)
         : mod_filename(mod_filename)
         , output_dir(output_dir)
+        , shared_libs(shared_libs)
         , num_experiments(num_experiments)
         , instance_size(instance_size)
         , backend(backend)
diff --git a/src/main.cpp b/src/main.cpp
index c5e42673ef..5934735309 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -179,9 +179,12 @@ int main(int argc, const char* argv[]) {
     /// llvm vector width
     int llvm_vec_width = 1;
 
-    /// vector library
+    /// vector library name
     std::string vec_lib("none");
 
+    /// list of shared libraries to link
+    std::vector<std::string> libs;
+
     /// run llvm benchmark
     bool run_benchmark(false);
 
@@ -338,6 +341,9 @@ int main(int argc, const char* argv[]) {
     benchmark_opt->add_flag("--run",
                        run_benchmark,
                        fmt::format("Run LLVM benchmark ({})", run_benchmark))->ignore_case();
+    benchmark_opt->add_option("--libs", libs, "Shared libraries to link IR against")
+            ->ignore_case()
+            ->check(CLI::ExistingFile);
     benchmark_opt->add_option("--instance-size",
                        instance_size,
                        fmt::format("Instance struct size ({})", instance_size))->ignore_case();
@@ -346,11 +352,7 @@ int main(int argc, const char* argv[]) {
                        fmt::format("Number of experiments for benchmarking ({})", repeat))->ignore_case();
     benchmark_opt->add_option("--backend",
                        backend,
-<<<<<<< HEAD
-                       fmt::format("Target's backend ({})", backend))->ignore_case()->check(CLI::IsMember({"avx2", "default", "sse2"}));;
-=======
-                       "Target's backend ({})"_format(backend))->ignore_case()->check(CLI::IsMember({"avx2", "default", "sse2"}));
->>>>>>> 88db707d (Integrating vector maths library into LLVM codegen (#604))
+                       fmt::format("Target's backend ({})", backend))->ignore_case()->check(CLI::IsMember({"avx2", "default", "sse2"}));
 #endif
     // clang-format on
 
@@ -665,9 +667,12 @@ int main(int argc, const char* argv[]) {
 
             if (run_benchmark) {
                 logger->info("Running LLVM benchmark");
-                benchmark::LLVMBuildInfo info{llvm_vec_width, llvm_ir_opt_passes, llvm_float_type};
+                benchmark::LLVMBuildInfo info{llvm_vec_width,
+                                              llvm_ir_opt_passes,
+                                              llvm_float_type,
+                                              vec_lib};
                 benchmark::LLVMBenchmark bench(
-                    modfile, output_dir, info, repeat, instance_size, backend);
+                    modfile, output_dir, libs, info, repeat, instance_size, backend);
                 bench.benchmark(ast);
             }
 

From 4c9e1e16136d6c01caa27c5a076801aa3bdb1ba7 Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Sat, 24 Apr 2021 21:43:05 +0200
Subject: [PATCH 047/105] Avoid local std::ofstream object causing segfault
 (#614)

- std::ofstream().rdbuf() was used but as it was a local object,
   it becomes invalid at the end of function scope
 - make std::ofstream as member variable
---
 src/codegen/llvm/llvm_benchmark.cpp | 2 --
 src/codegen/llvm/llvm_benchmark.hpp | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/codegen/llvm/llvm_benchmark.cpp b/src/codegen/llvm/llvm_benchmark.cpp
index 4c49ce30df..c93b723cb0 100644
--- a/src/codegen/llvm/llvm_benchmark.cpp
+++ b/src/codegen/llvm/llvm_benchmark.cpp
@@ -145,8 +145,6 @@ void LLVMBenchmark::set_log_output() {
 
     // Otherwise, dump logs to the specified file.
     std::string filename = output_dir + "/" + mod_filename + ".log";
-    std::ofstream ofs;
-
     ofs.open(filename.c_str());
 
     if (ofs.fail())
diff --git a/src/codegen/llvm/llvm_benchmark.hpp b/src/codegen/llvm/llvm_benchmark.hpp
index d23567d79d..646912c253 100644
--- a/src/codegen/llvm/llvm_benchmark.hpp
+++ b/src/codegen/llvm/llvm_benchmark.hpp
@@ -46,6 +46,8 @@ class LLVMBenchmark {
 
     std::shared_ptr<std::ostream> log_stream;
 
+    std::ofstream ofs;
+
     /// Disable the specified feature.
     void disable(const std::string& feature, std::vector<std::string>& host_features);
 

From be984b85cfc3b78399383ab32f829eebf4362097 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Fri, 30 Apr 2021 15:29:31 -0700
Subject: [PATCH 048/105] Refactoring of runners' infrastructure and dumping
 object files (#620)

The following is added:

1. Dumping object files in JIT.

A functionality to dump (enabled by default) the generated from LLVM IR file binary to `.o` has been added to benchmarking. Now, in addition to logs, a `v<vector_width>_<mod_filename>.o` is generated. The reasons it is an object file and not an assembly (hence not included in logs) are the following:

- LLVM does not have library functions that take the object and turn back into assembly, but rather `object -> file -> assembly` path. It also has a `llvm-objdump` tool, but it is intended as a command-line utility and does not have a well-defined API.

- Writing custom functions to produce a readable assembly is not a priority. Also, mimicking `objdump` functionality would be difficult.

- Both `objdump` and `llvm-objdump` can be used to isnpect the `.o` file manually.

2. Refactoring of `Runner` class.

In addition to the support of dumping the binary, `Runner`and `JITDriver` classes were refactored to have a nicer OOP-style.

fixes #611

Co-authored-by: Pramod S Kumbhar <pramod.s.kumbhar@gmail.com>
---
 src/codegen/llvm/jit_driver.cpp              | 11 ++-
 src/codegen/llvm/jit_driver.hpp              | 98 +++++++++++++++-----
 src/codegen/llvm/llvm_benchmark.cpp          |  6 +-
 src/codegen/llvm/main.cpp                    |  3 +-
 test/unit/codegen/codegen_llvm_execution.cpp | 12 ++-
 5 files changed, 102 insertions(+), 28 deletions(-)

diff --git a/src/codegen/llvm/jit_driver.cpp b/src/codegen/llvm/jit_driver.cpp
index ec08e8856d..7910036848 100644
--- a/src/codegen/llvm/jit_driver.cpp
+++ b/src/codegen/llvm/jit_driver.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
 #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
 #include "llvm/ExecutionEngine/Orc/LLJIT.h"
+#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/Support/Host.h"
@@ -24,7 +25,9 @@
 namespace nmodl {
 namespace runner {
 
-void JITDriver::init(std::string features, std::vector<std::string>& lib_paths) {
+void JITDriver::init(std::string features,
+                     std::vector<std::string> lib_paths,
+                     ObjDumpInfo* dump_info) {
     llvm::InitializeNativeTarget();
     llvm::InitializeNativeTargetAsmPrinter();
 
@@ -83,6 +86,12 @@ void JITDriver::init(std::string features, std::vector<std::string>& lib_paths)
     llvm::orc::JITDylib& sym_tab = jit->getMainJITDylib();
     sym_tab.addGenerator(cantFail(llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
         data_layout.getGlobalPrefix())));
+
+    // Optionally, dump the binary to the object file.
+    if (dump_info) {
+        jit->getObjTransformLayer().setTransform(
+            llvm::orc::DumpObjects(dump_info->output_dir, dump_info->filename));
+    }
 }
 
 std::unique_ptr<llvm::TargetMachine> JITDriver::create_target(
diff --git a/src/codegen/llvm/jit_driver.hpp b/src/codegen/llvm/jit_driver.hpp
index d46e605054..dfd06ca7ee 100644
--- a/src/codegen/llvm/jit_driver.hpp
+++ b/src/codegen/llvm/jit_driver.hpp
@@ -20,9 +20,18 @@
 namespace nmodl {
 namespace runner {
 
+/// A struct to hold the information for dumping object file.
+struct ObjDumpInfo {
+    /// Object file name.
+    std::string filename;
+
+    /// Object file output directory.
+    std::string output_dir;
+};
+
 /**
  * \class JITDriver
- * \brief Driver to execute MOD file function via LLVM IR backend
+ * \brief Driver to execute a MOD file function via LLVM IR backend.
  */
 class JITDriver {
   private:
@@ -33,13 +42,15 @@ class JITDriver {
     std::unique_ptr<llvm::Module> module;
 
   public:
-    JITDriver(std::unique_ptr<llvm::Module> m)
+    explicit JITDriver(std::unique_ptr<llvm::Module> m)
         : module(std::move(m)) {}
 
-    /// Initialize the JIT.
-    void init(std::string features, std::vector<std::string>& lib_paths);
+    /// Initializes the JIT.
+    void init(std::string features = "",
+              std::vector<std::string> lib_paths = {},
+              ObjDumpInfo* dump_info = nullptr);
 
-    /// Lookup the entry-point without arguments in the JIT and execute it, returning the result.
+    /// Lookups the entry-point without arguments in the JIT and executes it, returning the result.
     template <typename ReturnType>
     ReturnType execute_without_arguments(const std::string& entry_point) {
         auto expected_symbol = jit->lookup(entry_point);
@@ -51,7 +62,7 @@ class JITDriver {
         return result;
     }
 
-    /// Lookup the entry-point with an argument in the JIT and execute it, returning the result.
+    /// Lookups the entry-point with an argument in the JIT and executes it, returning the result.
     template <typename ReturnType, typename ArgType>
     ReturnType execute_with_arguments(const std::string& entry_point, ArgType arg) {
         auto expected_symbol = jit->lookup(entry_point);
@@ -63,7 +74,8 @@ class JITDriver {
         return result;
     }
 
-    /// A wrapper around llvm::createTargetMachine to turn on/off certain CPU features.
+  private:
+    /// Creates llvm::TargetMachine with certain CPU features turned on/off.
     std::unique_ptr<llvm::TargetMachine> create_target(llvm::orc::JITTargetMachineBuilder* builder,
                                                        const std::string& features);
 
@@ -72,35 +84,79 @@ class JITDriver {
 };
 
 /**
- * \class Runner
- * \brief A wrapper around JITDriver to execute an entry point in the LLVM IR module.
+ * \class BaseRunner
+ * \brief A base runner class that provides functionality to execute an
+ * entry point in the LLVM IR module.
  */
-class Runner {
-  private:
-    std::unique_ptr<llvm::Module> module;
+class BaseRunner {
+  protected:
+    std::unique_ptr<JITDriver> driver;
 
-    std::unique_ptr<JITDriver> driver = std::make_unique<JITDriver>(std::move(module));
+    explicit BaseRunner(std::unique_ptr<llvm::Module> m)
+        : driver(std::make_unique<JITDriver>(std::move(m))) {}
 
   public:
-    Runner(std::unique_ptr<llvm::Module> m,
-           std::string features = "",
-           std::vector<std::string> lib_paths = {})
-        : module(std::move(m)) {
-        driver->init(features, lib_paths);
-    }
+    /// Sets up the JIT driver.
+    virtual void initialize_driver() = 0;
 
-    /// Run the entry-point function without arguments.
+    /// Runs the entry-point function without arguments.
     template <typename ReturnType>
     ReturnType run_without_arguments(const std::string& entry_point) {
         return driver->template execute_without_arguments<ReturnType>(entry_point);
     }
 
-    /// Run the entry-point function with a pointer to the data as an argument.
+    /// Runs the entry-point function with a pointer to the data as an argument.
     template <typename ReturnType, typename ArgType>
     ReturnType run_with_argument(const std::string& entry_point, ArgType arg) {
         return driver->template execute_with_arguments<ReturnType, ArgType>(entry_point, arg);
     }
 };
 
+/**
+ * \class TestRunner
+ * \brief A simple runner for testing purposes.
+ */
+class TestRunner: public BaseRunner {
+  public:
+    explicit TestRunner(std::unique_ptr<llvm::Module> m)
+        : BaseRunner(std::move(m)) {}
+
+    virtual void initialize_driver() {
+        driver->init();
+    }
+};
+
+/**
+ * \class BenchmarkRunner
+ * \brief A runner with benchmarking functionality. It takes user-specified CPU
+ * features into account, as well as it can link against shared libraries.
+ */
+class BenchmarkRunner: public BaseRunner {
+  private:
+    /// Information on dumping object file generated from LLVM IR.
+    ObjDumpInfo dump_info;
+
+    /// CPU features specified by the user.
+    std::string features;
+
+    /// Shared libraries' paths to link against.
+    std::vector<std::string> shared_lib_paths;
+
+  public:
+    BenchmarkRunner(std::unique_ptr<llvm::Module> m,
+                    std::string filename,
+                    std::string output_dir,
+                    std::string features = "",
+                    std::vector<std::string> lib_paths = {})
+        : BaseRunner(std::move(m))
+        , dump_info{filename, output_dir}
+        , features(features)
+        , shared_lib_paths(lib_paths) {}
+
+    virtual void initialize_driver() {
+        driver->init(features, shared_lib_paths, &dump_info);
+    }
+};
+
 }  // namespace runner
 }  // namespace nmodl
diff --git a/src/codegen/llvm/llvm_benchmark.cpp b/src/codegen/llvm/llvm_benchmark.cpp
index c93b723cb0..87e36ec822 100644
--- a/src/codegen/llvm/llvm_benchmark.cpp
+++ b/src/codegen/llvm/llvm_benchmark.cpp
@@ -104,7 +104,11 @@ void LLVMBenchmark::run_benchmark(codegen::CodegenLLVMVisitor& visitor,
 
     std::string features_str = llvm::join(features.begin(), features.end(), ",");
     std::unique_ptr<llvm::Module> m = visitor.get_module();
-    runner::Runner runner(std::move(m), features_str, shared_libs);
+
+    // Create the benchmark runner and intialize it.
+    std::string filename = "v" + std::to_string(llvm_build_info.vector_width) + "_" + mod_filename;
+    runner::BenchmarkRunner runner(std::move(m), filename, output_dir, features_str, shared_libs);
+    runner.initialize_driver();
 
     // Benchmark every kernel.
     for (const auto& kernel_name: kernel_names) {
diff --git a/src/codegen/llvm/main.cpp b/src/codegen/llvm/main.cpp
index acbdc37f19..b700f5ad59 100644
--- a/src/codegen/llvm/main.cpp
+++ b/src/codegen/llvm/main.cpp
@@ -64,7 +64,8 @@ int main(int argc, const char* argv[]) {
         throw std::runtime_error(
             "Error: entry-point functions with non-double return type are not supported\n");
 
-    Runner runner(std::move(module));
+    TestRunner runner(std::move(module));
+    runner.initialize_driver();
 
     // Since only double type is supported, provide explicit double type to the running function.
     auto r = runner.run_without_arguments<double>(entry_point_name);
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index d0d0408853..db6cd08f51 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -131,7 +131,8 @@ SCENARIO("Arithmetic expression", "[llvm][runner]") {
         llvm_visitor.visit_program(*ast);
 
         std::unique_ptr<llvm::Module> m = llvm_visitor.get_module();
-        Runner runner(std::move(m));
+        TestRunner runner(std::move(m));
+        runner.initialize_driver();
 
         THEN("functions are evaluated correctly") {
             auto exp_result = runner.run_without_arguments<double>("exponential");
@@ -232,7 +233,8 @@ SCENARIO("Optimised arithmetic expression", "[llvm][runner]") {
         llvm_visitor.visit_program(*ast);
 
         std::unique_ptr<llvm::Module> m = llvm_visitor.get_module();
-        Runner runner(std::move(m));
+        TestRunner runner(std::move(m));
+        runner.initialize_driver();
 
         THEN("optimizations preserve function results") {
             // Check exponential is turned into a constant.
@@ -326,7 +328,8 @@ SCENARIO("Simple scalar kernel", "[llvm][runner]") {
 
         // Set up the JIT runner.
         std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
-        Runner runner(std::move(module));
+        TestRunner runner(std::move(module));
+        runner.initialize_driver();
 
         THEN("Values in struct have changed according to the formula") {
             runner.run_with_argument<int, void*>("__nrn_state_test_wrapper",
@@ -413,7 +416,8 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
 
         // Set up the JIT runner.
         std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
-        Runner runner(std::move(module));
+        TestRunner runner(std::move(module));
+        runner.initialize_driver();
 
         THEN("Values in struct have changed according to the formula") {
             runner.run_with_argument<int, void*>("__nrn_state_test_wrapper",

From 3cf65cfcbf587c9981d5b6d62bd45e7726c0800e Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Fri, 7 May 2021 15:13:53 -0700
Subject: [PATCH 049/105] Optimisation levels for benchmarking (#623)

This PR adds two flags to the benchmarking pipeline:

* `--opt-level-ir`: This flag is used to run `-On` passes on the
   generated LLVM IR module.
* `--opt-level-codegen`: This flag is used for setting optimisation level
    for machine code generation inside the JIT target machine.
* As an example:

```bash
$ ./nmodl file.mod \
     llvm --ir --vector-width 1 \
     benchmark --run --instance-size 10000000 --repeat 20 --opt-level-ir 2 --opt-level-codegen 2
```

fixes #616
---
 cmake/LLVMHelper.cmake              |   3 +
 src/codegen/llvm/jit_driver.cpp     | 197 ++++++++++++++++++++--------
 src/codegen/llvm/jit_driver.hpp     |  36 ++---
 src/codegen/llvm/llvm_benchmark.cpp |  40 +++---
 src/codegen/llvm/llvm_benchmark.hpp |  59 +++++----
 src/main.cpp                        |  59 ++++++---
 6 files changed, 266 insertions(+), 128 deletions(-)

diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake
index 2b7db94a85..b0c8b2a48b 100644
--- a/cmake/LLVMHelper.cmake
+++ b/cmake/LLVMHelper.cmake
@@ -12,9 +12,12 @@ llvm_map_components_to_libnames(
   core
   executionengine
   instcombine
+  ipo
   mc
   native
   orcjit
+  target
+  transformutils
   scalaropts
   support)
 set(CMAKE_REQUIRED_INCLUDES ${LLVM_INCLUDE_DIRS})
diff --git a/src/codegen/llvm/jit_driver.cpp b/src/codegen/llvm/jit_driver.cpp
index 7910036848..1e8eb4bfd0 100644
--- a/src/codegen/llvm/jit_driver.cpp
+++ b/src/codegen/llvm/jit_driver.cpp
@@ -8,6 +8,7 @@
 #include "jit_driver.hpp"
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
 
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
@@ -18,21 +19,139 @@
 #include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
 
 namespace nmodl {
 namespace runner {
 
+/****************************************************************************************/
+/*                            Utilities for JIT driver                                  */
+/****************************************************************************************/
+
+/// Initialises some LLVM optimisation passes.
+static void initialise_optimisation_passes() {
+    auto& registry = *llvm::PassRegistry::getPassRegistry();
+    llvm::initializeCore(registry);
+    llvm::initializeTransformUtils(registry);
+    llvm::initializeScalarOpts(registry);
+    llvm::initializeInstCombine(registry);
+    llvm::initializeAnalysis(registry);
+}
+
+/// Populates pass managers with passes for the given optimisation levels.
+static void populate_pms(llvm::legacy::FunctionPassManager& func_pm,
+                         llvm::legacy::PassManager& module_pm,
+                         int opt_level,
+                         int size_level,
+                         llvm::TargetMachine* tm) {
+    // First, set the pass manager builder with some basic optimisation information.
+    llvm::PassManagerBuilder pm_builder;
+    pm_builder.OptLevel = opt_level;
+    pm_builder.SizeLevel = size_level;
+    pm_builder.DisableUnrollLoops = opt_level == 0;
+
+    // If target machine is defined, then initialise the TargetTransformInfo for the target.
+    if (tm) {
+        module_pm.add(createTargetTransformInfoWrapperPass(tm->getTargetIRAnalysis()));
+        func_pm.add(createTargetTransformInfoWrapperPass(tm->getTargetIRAnalysis()));
+    }
+
+    // Populate pass managers.
+    pm_builder.populateModulePassManager(module_pm);
+    pm_builder.populateFunctionPassManager(func_pm);
+}
+
+/// Runs the function and module passes on the provided module.
+static void run_optimisation_passes(llvm::Module& module,
+                                    llvm::legacy::FunctionPassManager& func_pm,
+                                    llvm::legacy::PassManager& module_pm) {
+    func_pm.doInitialization();
+    auto& functions = module.getFunctionList();
+    for (auto& function: functions) {
+        llvm::verifyFunction(function);
+        func_pm.run(function);
+    }
+    func_pm.doFinalization();
+    module_pm.run(module);
+}
+
+/// Optimises the given LLVM IR module.
+static void optimise_module(llvm::Module& module,
+                            int opt_level,
+                            llvm::TargetMachine* tm = nullptr) {
+    llvm::legacy::FunctionPassManager func_pm(&module);
+    llvm::legacy::PassManager module_pm;
+    populate_pms(func_pm, module_pm, opt_level, /*size_level=*/0, tm);
+    run_optimisation_passes(module, func_pm, module_pm);
+}
+
+/// Sets the target triple and the data layout of the module.
+static void set_triple_and_data_layout(llvm::Module& module, const std::string& features) {
+    // Get the default target triple for the host.
+    auto target_triple = llvm::sys::getDefaultTargetTriple();
+    std::string error_msg;
+    auto* target = llvm::TargetRegistry::lookupTarget(target_triple, error_msg);
+    if (!target)
+        throw std::runtime_error("Error " + error_msg + "\n");
+
+    // Get the CPU information and set a target machine to create the data layout.
+    std::string cpu(llvm::sys::getHostCPUName());
+    std::unique_ptr<llvm::TargetMachine> tm(
+        target->createTargetMachine(target_triple, cpu, features, {}, {}));
+    if (!tm)
+        throw std::runtime_error("Error: could not create the target machine\n");
+
+    // Set data layout and the target triple to the module.
+    module.setDataLayout(tm->createDataLayout());
+    module.setTargetTriple(target_triple);
+}
+
+/// Creates llvm::TargetMachine with certain CPU features turned on/off.
+static std::unique_ptr<llvm::TargetMachine> create_target(
+    llvm::orc::JITTargetMachineBuilder* tm_builder,
+    const std::string& features,
+    int opt_level) {
+    // First, look up the target.
+    std::string error_msg;
+    auto target_triple = tm_builder->getTargetTriple().getTriple();
+    auto* target = llvm::TargetRegistry::lookupTarget(target_triple, error_msg);
+    if (!target)
+        throw std::runtime_error("Error " + error_msg + "\n");
+
+    // Create default target machine with provided features.
+    auto tm = target->createTargetMachine(target_triple,
+                                          llvm::sys::getHostCPUName().str(),
+                                          features,
+                                          tm_builder->getOptions(),
+                                          tm_builder->getRelocationModel(),
+                                          tm_builder->getCodeModel(),
+                                          static_cast<llvm::CodeGenOpt::Level>(opt_level),
+                                          /*JIT=*/true);
+    if (!tm)
+        throw std::runtime_error("Error: could not create the target machine\n");
+
+    return std::unique_ptr<llvm::TargetMachine>(tm);
+}
+
+/****************************************************************************************/
+/*                                      JIT driver                                      */
+/****************************************************************************************/
+
 void JITDriver::init(std::string features,
                      std::vector<std::string> lib_paths,
-                     ObjDumpInfo* dump_info) {
+                     BenchmarkInfo* benchmark_info) {
     llvm::InitializeNativeTarget();
     llvm::InitializeNativeTargetAsmPrinter();
+    initialise_optimisation_passes();
 
     // Set the target triple and the data layout for the module.
-    set_triple_and_data_layout(features);
+    set_triple_and_data_layout(*module, features);
     auto data_layout = module->getDataLayout();
 
     // Create object linking function callback.
@@ -67,11 +186,31 @@ void JITDriver::init(std::string features,
     auto compile_function_creator = [&](llvm::orc::JITTargetMachineBuilder tm_builder)
         -> llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
         // Create target machine with some features possibly turned off.
-        auto tm = create_target(&tm_builder, features);
+        auto tm = create_target(&tm_builder, features, benchmark_info->opt_level_codegen);
+
+        // Optimise the LLVM IR module.
+        optimise_module(*module, benchmark_info->opt_level_ir, tm.get());
+
+        // Save optimised module to .ll file if benchmarking.
+        if (benchmark_info) {
+            std::error_code error_code;
+            std::unique_ptr<llvm::ToolOutputFile> out =
+                std::make_unique<llvm::ToolOutputFile>(benchmark_info->output_dir + "/" +
+                                                           benchmark_info->filename + "_opt.ll",
+                                                       error_code,
+                                                       llvm::sys::fs::OF_Text);
+            if (error_code)
+                throw std::runtime_error("Error: " + error_code.message());
+
+            std::unique_ptr<llvm::AssemblyAnnotationWriter> annotator;
+            module->print(out->os(), annotator.get());
+            out->keep();
+        }
+
         return std::make_unique<llvm::orc::TMOwningSimpleCompiler>(std::move(tm));
     };
 
-    // Set JIT instance and extract the data layout from the module.
+    // Set the JIT instance.
     auto jit_instance = cantFail(llvm::orc::LLJITBuilder()
                                      .setCompileFunctionCreator(compile_function_creator)
                                      .setObjectLinkingLayerCreator(object_linking_layer_creator)
@@ -88,56 +227,10 @@ void JITDriver::init(std::string features,
         data_layout.getGlobalPrefix())));
 
     // Optionally, dump the binary to the object file.
-    if (dump_info) {
+    if (benchmark_info) {
         jit->getObjTransformLayer().setTransform(
-            llvm::orc::DumpObjects(dump_info->output_dir, dump_info->filename));
+            llvm::orc::DumpObjects(benchmark_info->output_dir, benchmark_info->filename));
     }
 }
-
-std::unique_ptr<llvm::TargetMachine> JITDriver::create_target(
-    llvm::orc::JITTargetMachineBuilder* builder,
-    const std::string& features) {
-    // First, look up the target.
-    std::string error_msg;
-    auto target_triple = builder->getTargetTriple().getTriple();
-    auto* target = llvm::TargetRegistry::lookupTarget(target_triple, error_msg);
-    if (!target)
-        throw std::runtime_error("Error " + error_msg + "\n");
-
-    // Create default target machine with provided features.
-    auto tm = target->createTargetMachine(target_triple,
-                                          llvm::sys::getHostCPUName().str(),
-                                          features,
-                                          builder->getOptions(),
-                                          builder->getRelocationModel(),
-                                          builder->getCodeModel(),
-                                          /*OL=*/llvm::CodeGenOpt::Default,
-                                          /*JIT=*/true);
-    if (!tm)
-        throw std::runtime_error("Error: could not create the target machine\n");
-
-    return std::unique_ptr<llvm::TargetMachine>(tm);
-}
-
-void JITDriver::set_triple_and_data_layout(const std::string& features) {
-    // Get the default target triple for the host.
-    auto target_triple = llvm::sys::getDefaultTargetTriple();
-    std::string error_msg;
-    auto* target = llvm::TargetRegistry::lookupTarget(target_triple, error_msg);
-    if (!target)
-        throw std::runtime_error("Error " + error_msg + "\n");
-
-    // Get the CPU information and set a target machine to create the data layout.
-    std::string cpu(llvm::sys::getHostCPUName());
-
-    std::unique_ptr<llvm::TargetMachine> tm(
-        target->createTargetMachine(target_triple, cpu, features, {}, {}));
-    if (!tm)
-        throw std::runtime_error("Error: could not create the target machine\n");
-
-    // Set data layout and the target triple to the module.
-    module->setDataLayout(tm->createDataLayout());
-    module->setTargetTriple(target_triple);
-}
 }  // namespace runner
 }  // namespace nmodl
diff --git a/src/codegen/llvm/jit_driver.hpp b/src/codegen/llvm/jit_driver.hpp
index dfd06ca7ee..151ec177d8 100644
--- a/src/codegen/llvm/jit_driver.hpp
+++ b/src/codegen/llvm/jit_driver.hpp
@@ -20,13 +20,19 @@
 namespace nmodl {
 namespace runner {
 
-/// A struct to hold the information for dumping object file.
-struct ObjDumpInfo {
-    /// Object file name.
+/// A struct to hold the information for benchmarking.
+struct BenchmarkInfo {
+    /// Object filename to dump.
     std::string filename;
 
     /// Object file output directory.
     std::string output_dir;
+
+    /// Optimisation level for generated IR.
+    int opt_level_ir;
+
+    /// Optimisation level for machine code generation.
+    int opt_level_codegen;
 };
 
 /**
@@ -45,10 +51,10 @@ class JITDriver {
     explicit JITDriver(std::unique_ptr<llvm::Module> m)
         : module(std::move(m)) {}
 
-    /// Initializes the JIT.
+    /// Initializes the JIT driver.
     void init(std::string features = "",
               std::vector<std::string> lib_paths = {},
-              ObjDumpInfo* dump_info = nullptr);
+              BenchmarkInfo* benchmark_info = nullptr);
 
     /// Lookups the entry-point without arguments in the JIT and executes it, returning the result.
     template <typename ReturnType>
@@ -73,14 +79,6 @@ class JITDriver {
         ReturnType result = res(arg);
         return result;
     }
-
-  private:
-    /// Creates llvm::TargetMachine with certain CPU features turned on/off.
-    std::unique_ptr<llvm::TargetMachine> create_target(llvm::orc::JITTargetMachineBuilder* builder,
-                                                       const std::string& features);
-
-    /// Sets the triple and the data layout for the module.
-    void set_triple_and_data_layout(const std::string& features);
 };
 
 /**
@@ -133,8 +131,8 @@ class TestRunner: public BaseRunner {
  */
 class BenchmarkRunner: public BaseRunner {
   private:
-    /// Information on dumping object file generated from LLVM IR.
-    ObjDumpInfo dump_info;
+    /// Benchmarking information passed to JIT driver.
+    BenchmarkInfo benchmark_info;
 
     /// CPU features specified by the user.
     std::string features;
@@ -147,14 +145,16 @@ class BenchmarkRunner: public BaseRunner {
                     std::string filename,
                     std::string output_dir,
                     std::string features = "",
-                    std::vector<std::string> lib_paths = {})
+                    std::vector<std::string> lib_paths = {},
+                    int opt_level_ir = 0,
+                    int opt_level_codegen = 0)
         : BaseRunner(std::move(m))
-        , dump_info{filename, output_dir}
+        , benchmark_info{filename, output_dir, opt_level_ir, opt_level_codegen}
         , features(features)
         , shared_lib_paths(lib_paths) {}
 
     virtual void initialize_driver() {
-        driver->init(features, shared_lib_paths, &dump_info);
+        driver->init(features, shared_lib_paths, &benchmark_info);
     }
 };
 
diff --git a/src/codegen/llvm/llvm_benchmark.cpp b/src/codegen/llvm/llvm_benchmark.cpp
index 87e36ec822..df0c54517d 100644
--- a/src/codegen/llvm/llvm_benchmark.cpp
+++ b/src/codegen/llvm/llvm_benchmark.cpp
@@ -19,10 +19,24 @@
 namespace nmodl {
 namespace benchmark {
 
-
 /// Precision for the timing measurements.
 static constexpr int PRECISION = 9;
 
+/// Get the host CPU features in the format:
+///   +feature,+feature,-feature,+feature,...
+/// where `+` indicates that the feature is enabled.
+static std::vector<std::string> get_cpu_features() {
+    std::string cpu(llvm::sys::getHostCPUName());
+
+    llvm::SubtargetFeatures features;
+    llvm::StringMap<bool> host_features;
+    if (llvm::sys::getHostCPUFeatures(host_features)) {
+        for (auto& f: host_features)
+            features.AddFeature(f.first(), f.second);
+    }
+    return features.getFeatures();
+}
+
 
 void LLVMBenchmark::disable(const std::string& feature, std::vector<std::string>& host_features) {
     for (auto& host_feature: host_features) {
@@ -34,7 +48,7 @@ void LLVMBenchmark::disable(const std::string& feature, std::vector<std::string>
     }
 }
 
-void LLVMBenchmark::benchmark(const std::shared_ptr<ast::Program>& node) {
+void LLVMBenchmark::run(const std::shared_ptr<ast::Program>& node) {
     // First, set the output stream for the logs.
     set_log_output();
 
@@ -65,18 +79,6 @@ void LLVMBenchmark::generate_llvm(codegen::CodegenLLVMVisitor& visitor,
                 << diff.count() << "\n\n";
 }
 
-std::vector<std::string> LLVMBenchmark::get_cpu_features() {
-    std::string cpu(llvm::sys::getHostCPUName());
-
-    llvm::SubtargetFeatures features;
-    llvm::StringMap<bool> host_features;
-    if (llvm::sys::getHostCPUFeatures(host_features)) {
-        for (auto& f: host_features)
-            features.AddFeature(f.first(), f.second);
-    }
-    return features.getFeatures();
-}
-
 void LLVMBenchmark::run_benchmark(codegen::CodegenLLVMVisitor& visitor,
                                   const std::shared_ptr<ast::Program>& node) {
     // Set the codegen data helper and find the kernels.
@@ -105,9 +107,15 @@ void LLVMBenchmark::run_benchmark(codegen::CodegenLLVMVisitor& visitor,
     std::string features_str = llvm::join(features.begin(), features.end(), ",");
     std::unique_ptr<llvm::Module> m = visitor.get_module();
 
-    // Create the benchmark runner and intialize it.
+    // Create the benchmark runner and initialize it.
     std::string filename = "v" + std::to_string(llvm_build_info.vector_width) + "_" + mod_filename;
-    runner::BenchmarkRunner runner(std::move(m), filename, output_dir, features_str, shared_libs);
+    runner::BenchmarkRunner runner(std::move(m),
+                                   filename,
+                                   output_dir,
+                                   features_str,
+                                   shared_libs,
+                                   opt_level_ir,
+                                   opt_level_codegen);
     runner.initialize_driver();
 
     // Benchmark every kernel.
diff --git a/src/codegen/llvm/llvm_benchmark.hpp b/src/codegen/llvm/llvm_benchmark.hpp
index 646912c253..c2c781d7f0 100644
--- a/src/codegen/llvm/llvm_benchmark.hpp
+++ b/src/codegen/llvm/llvm_benchmark.hpp
@@ -30,43 +30,39 @@ struct LLVMBuildInfo {
  */
 class LLVMBenchmark {
   private:
+    /// Source MOD file name.
     std::string mod_filename;
 
+    /// The output directory for logs and other files.
     std::string output_dir;
 
+    /// Paths to shared libraries.
     std::vector<std::string> shared_libs;
 
+    /// The number of experiments to repeat.
     int num_experiments;
 
+    /// The size of the instance struct for benchmarking.
     int instance_size;
 
+    /// Benchmarking backend
     std::string backend;
 
+    /// Optimisation level for LLVM IR transformations.
+    int opt_level_ir;
+
+    /// Optimisation level for machine code generation.
+    int opt_level_codegen;
+
+    /// LLVM visitor information.
     LLVMBuildInfo llvm_build_info;
 
+    /// The log output stream (file or stdout).
     std::shared_ptr<std::ostream> log_stream;
 
+    /// Filestream for dumping logs to the file.
     std::ofstream ofs;
 
-    /// Disable the specified feature.
-    void disable(const std::string& feature, std::vector<std::string>& host_features);
-
-    /// Visits the AST to construct the LLVM IR module.
-    void generate_llvm(codegen::CodegenLLVMVisitor& visitor,
-                       const std::shared_ptr<ast::Program>& node);
-
-    /// Get the host CPU features in the format:
-    ///   +feature,+feature,-feature,+feature,...
-    /// where `+` indicates that the feature is enabled.
-    std::vector<std::string> get_cpu_features();
-
-    /// Runs the main body of the benchmark, executing the compute kernels.
-    void run_benchmark(codegen::CodegenLLVMVisitor& visitor,
-                       const std::shared_ptr<ast::Program>& node);
-
-    /// Sets the log output stream (file or console).
-    void set_log_output();
-
   public:
     LLVMBenchmark(const std::string& mod_filename,
                   const std::string& output_dir,
@@ -74,17 +70,36 @@ class LLVMBenchmark {
                   LLVMBuildInfo info,
                   int num_experiments,
                   int instance_size,
-                  const std::string& backend)
+                  const std::string& backend,
+                  int opt_level_ir,
+                  int opt_level_codegen)
         : mod_filename(mod_filename)
         , output_dir(output_dir)
         , shared_libs(shared_libs)
         , num_experiments(num_experiments)
         , instance_size(instance_size)
         , backend(backend)
-        , llvm_build_info(info) {}
+        , llvm_build_info(info)
+        , opt_level_ir(opt_level_ir)
+        , opt_level_codegen(opt_level_codegen) {}
 
     /// Runs the benchmark.
-    void benchmark(const std::shared_ptr<ast::Program>& node);
+    void run(const std::shared_ptr<ast::Program>& node);
+
+  private:
+    /// Disables the specified feature in the target.
+    void disable(const std::string& feature, std::vector<std::string>& host_features);
+
+    /// Visits the AST to construct the LLVM IR module.
+    void generate_llvm(codegen::CodegenLLVMVisitor& visitor,
+                       const std::shared_ptr<ast::Program>& node);
+
+    /// Runs the main body of the benchmark, executing the compute kernels.
+    void run_benchmark(codegen::CodegenLLVMVisitor& visitor,
+                       const std::shared_ptr<ast::Program>& node);
+
+    /// Sets the log output stream (file or console).
+    void set_log_output();
 };
 
 
diff --git a/src/main.cpp b/src/main.cpp
index 5934735309..4604a8df50 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -180,19 +180,25 @@ int main(int argc, const char* argv[]) {
     int llvm_vec_width = 1;
 
     /// vector library name
-    std::string vec_lib("none");
-
-    /// list of shared libraries to link
-    std::vector<std::string> libs;
+    std::string vector_library("none");
 
     /// run llvm benchmark
-    bool run_benchmark(false);
+    bool run_llvm_benchmark(false);
+
+    /// optimisation level for IR generation
+    int llvm_opt_level_ir = 0;
+
+    /// optimisation level for machine code generation
+    int llvm_opt_level_codegen = 0;
+
+    /// list of shared libraries to link against in JIT
+    std::vector<std::string> shared_lib_paths;
 
     /// the size of the instance struct for the benchmark
     int instance_size = 10000;
 
-    /// the number of experiments to run for the benchmarking
-    int repeat = 100;
+    /// the number of repeated experiments for the benchmarking
+    int num_experiments = 100;
 
     /// specify the backend for LLVM IR to target
     std::string backend = "default";
@@ -333,23 +339,29 @@ int main(int argc, const char* argv[]) {
         llvm_vec_width,
         fmt::format("LLVM explicit vectorisation width ({})", llvm_vec_width))->ignore_case();
     llvm_opt->add_option("--veclib",
-                         vec_lib,
-                         fmt::format("Vector library for maths functions ({})", vec_lib))->check(CLI::IsMember({"Accelerate", "libmvec", "MASSV", "SVML", "none"}));
+                         vector_library,
+                         fmt::format("Vector library for maths functions ({})", vector_library))->check(CLI::IsMember({"Accelerate", "libmvec", "MASSV", "SVML", "none"}));
 
     // LLVM IR benchmark options.
     auto benchmark_opt = app.add_subcommand("benchmark", "LLVM benchmark option")->ignore_case();
     benchmark_opt->add_flag("--run",
-                       run_benchmark,
-                       fmt::format("Run LLVM benchmark ({})", run_benchmark))->ignore_case();
-    benchmark_opt->add_option("--libs", libs, "Shared libraries to link IR against")
+                            run_llvm_benchmark,
+                            fmt::format("Run LLVM benchmark ({})", run_llvm_benchmark))->ignore_case();
+    benchmark_opt->add_option("--opt-level-ir",
+                              llvm_opt_level_ir,
+                              fmt::format("LLVM IR optimisation level (O{})", llvm_opt_level_ir))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"}));
+    benchmark_opt->add_option("--opt-level-codegen",
+                              llvm_opt_level_codegen,
+                              fmt::format("Machine code optimisation level (O{})", llvm_opt_level_codegen))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"}));
+    benchmark_opt->add_option("--libs", shared_lib_paths, "Shared libraries to link IR against")
             ->ignore_case()
             ->check(CLI::ExistingFile);
     benchmark_opt->add_option("--instance-size",
                        instance_size,
                        fmt::format("Instance struct size ({})", instance_size))->ignore_case();
     benchmark_opt->add_option("--repeat",
-                       repeat,
-                       fmt::format("Number of experiments for benchmarking ({})", repeat))->ignore_case();
+                              num_experiments,
+                              fmt::format("Number of experiments for benchmarking ({})", num_experiments))->ignore_case();
     benchmark_opt->add_option("--backend",
                        backend,
                        fmt::format("Target's backend ({})", backend))->ignore_case()->check(CLI::IsMember({"avx2", "default", "sse2"}));
@@ -665,15 +677,22 @@ int main(int argc, const char* argv[]) {
 
 #ifdef NMODL_LLVM_BACKEND
 
-            if (run_benchmark) {
+            if (run_llvm_benchmark) {
                 logger->info("Running LLVM benchmark");
                 benchmark::LLVMBuildInfo info{llvm_vec_width,
                                               llvm_ir_opt_passes,
                                               llvm_float_type,
-                                              vec_lib};
-                benchmark::LLVMBenchmark bench(
-                    modfile, output_dir, libs, info, repeat, instance_size, backend);
-                bench.benchmark(ast);
+                                              vector_library};
+                benchmark::LLVMBenchmark benchmark(modfile,
+                                                   output_dir,
+                                                   shared_lib_paths,
+                                                   info,
+                                                   num_experiments,
+                                                   instance_size,
+                                                   backend,
+                                                   llvm_opt_level_ir,
+                                                   llvm_opt_level_codegen);
+                benchmark.run(ast);
             }
 
             else if (llvm_ir) {
@@ -683,7 +702,7 @@ int main(int argc, const char* argv[]) {
                                            llvm_ir_opt_passes,
                                            llvm_float_type,
                                            llvm_vec_width,
-                                           vec_lib);
+                                           vector_library);
                 visitor.visit_program(*ast);
                 ast_to_nmodl(*ast, filepath("llvm", "mod"));
                 ast_to_json(*ast, filepath("llvm", "json"));

From 2356c192d1600a5ab1e5745a96c40d7e2e991e99 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Sat, 8 May 2021 02:44:01 -0700
Subject: [PATCH 050/105] Adding function debug information (#628)

Added debug support to LLVM code generation pipeline. Currently,
only basic support was added:

1. Debug information about functions (name)
2. Debug information about module

**What has been changed and added**

1. A new class `DebugBuilder` was created. It is used as a wrapper
around LLVM's `DIBuilder` and holds important information such as
`LLVMContext`, debug file and compile unit. It also wraps `DIBuilder`'s
functionality into a more suitable API.

2. A temporary `Location` struct has been added. It encapsulates the
location of the source AST construct and reflects `ModToken` on LLVM
code generation level. It is only used if the location of the source NMODL
function is known.

3. LLVM visitor know takes an extra `add_debug_information` flag and
handles debug information creation. Fore readability, `IRBuilder` was
renamed to `ir_builder`.

4. JIT runner is now able to listen for GDB, perf (build LLVM with
`-DLLVM_USE_PERF=ON`) and VTune (build LLVM with
`-DLLVM_USE_INTEL_JITEVENTS=ON`) events.

5. Necessary cmake changes were added to optionally support JIT event
listeners (`-DNMODL_HAVE_JIT_EVENT_LISTENERS`).

**How to generate debug information**

Debug information is attached to every function, procedure or artificially
created kernel (and corresponding wrappers). Debug information is enable
by default, so to turn it off use ` --disable-debug-info` flag. For example,
the given NMODL
```nmodl
1   FUNCTION func(x) {
2     func = x
3   }
4
5   PROCEDURE proc() {}
```
is transformed (running `./bin/nmodl <filename>.mod llvm --ir`) into
```llvm
define double @func(double %x1) !dbg !4 {
  ; ...
}

define i32 @proc() !dbg !6 {
  ; ...
}

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3}

!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "NMODL-LLVM", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "foo", directory: ".")
!2 = !{}
!3 = !{i32 2, !"Debug Version", i32 3}
!4 = distinct !DISubprogram(name: "func", linkageName: "func", scope: null, file: !1, line: 1, type: !5, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!5 = !DISubroutineType(types: !2)
!6 = distinct !DISubprogram(name: "proc", linkageName: "proc", scope: null, file: !1, line: 5, type: !5, scopeLine: 5, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
```

fixes #592 #612

Co-authored-by: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
---
 CMakeLists.txt                                |   2 +
 cmake/LLVMHelper.cmake                        |  38 ++--
 src/codegen/llvm/CMakeLists.txt               |   4 +-
 .../llvm/codegen_llvm_helper_visitor.cpp      |   3 +
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 201 ++++++++++--------
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  18 +-
 src/codegen/llvm/jit_driver.cpp               |  19 ++
 src/codegen/llvm/jit_driver.hpp               |  11 +
 src/codegen/llvm/llvm_benchmark.cpp           |   3 +-
 src/codegen/llvm/llvm_debug_builder.cpp       |  63 ++++++
 src/codegen/llvm/llvm_debug_builder.hpp       |  70 ++++++
 src/main.cpp                                  |   9 +-
 12 files changed, 333 insertions(+), 108 deletions(-)
 create mode 100644 src/codegen/llvm/llvm_debug_builder.cpp
 create mode 100644 src/codegen/llvm/llvm_debug_builder.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e5a346f96a..a16a090f9c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,6 +23,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
 option(NMODL_ENABLE_PYTHON_BINDINGS "Enable pybind11 based python bindings" OFF)
 option(NMODL_ENABLE_LEGACY_UNITS "Use original faraday, R, etc. instead of 2019 nist constants" OFF)
 option(NMODL_ENABLE_LLVM "Enable LLVM based code generation" ON)
+option(NMODL_ENABLE_JIT_EVENT_LISTENERS "Enable JITEventListener for Perf and Vtune" OFF)
 
 if(NMODL_ENABLE_LEGACY_UNITS)
   add_definitions(-DUSE_LEGACY_UNITS)
@@ -290,6 +291,7 @@ if(NMODL_ENABLE_LLVM)
   message(STATUS "  VERSION           | ${LLVM_PACKAGE_VERSION}")
   message(STATUS "  INCLUDE           | ${LLVM_INCLUDE_DIRS}")
   message(STATUS "  CMAKE             | ${LLVM_CMAKE_DIR}")
+  message(STATUS "  JIT LISTENERS     | ${NMODL_ENABLE_JIT_EVENT_LISTENERS}")
 endif()
 message(STATUS "--------------+--------------------------------------------------------------")
 message(STATUS " See documentation : https://github.com/BlueBrain/nmodl/")
diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake
index b0c8b2a48b..780ae29cfa 100644
--- a/cmake/LLVMHelper.cmake
+++ b/cmake/LLVMHelper.cmake
@@ -4,22 +4,28 @@
 
 find_package(LLVM REQUIRED CONFIG)
 
-# include LLVM header and core library
-llvm_map_components_to_libnames(
-  LLVM_LIBS_TO_LINK
-  analysis
-  codegen
-  core
-  executionengine
-  instcombine
-  ipo
-  mc
-  native
-  orcjit
-  target
-  transformutils
-  scalaropts
-  support)
+# include LLVM libraries
+set(NMODL_LLVM_COMPONENTS
+    analysis
+    codegen
+    core
+    executionengine
+    instcombine
+    ipo
+    mc
+    native
+    orcjit
+    target
+    transformutils
+    scalaropts
+    support)
+
+if(NMODL_ENABLE_JIT_EVENT_LISTENERS)
+  list(APPEND NMODL_LLVM_COMPONENTS inteljitevents perfjitevents)
+endif()
+
+llvm_map_components_to_libnames(LLVM_LIBS_TO_LINK ${NMODL_LLVM_COMPONENTS})
+
 set(CMAKE_REQUIRED_INCLUDES ${LLVM_INCLUDE_DIRS})
 set(CMAKE_REQUIRED_LIBRARIES ${LLVM_LIBS_TO_LINK})
 
diff --git a/src/codegen/llvm/CMakeLists.txt b/src/codegen/llvm/CMakeLists.txt
index 09e37d3896..4e0f4d90d0 100644
--- a/src/codegen/llvm/CMakeLists.txt
+++ b/src/codegen/llvm/CMakeLists.txt
@@ -9,7 +9,9 @@ set(LLVM_CODEGEN_SOURCE_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.hpp)
+    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_debug_builder.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_debug_builder.hpp)
 
 # =============================================================================
 # LLVM codegen library and executable
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 420a08283b..a6af725eb7 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -193,6 +193,9 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
     /// we have all information for code generation function, create a new node
     /// which will be inserted later into AST
     auto function = std::make_shared<ast::CodegenFunction>(fun_ret_type, name, arguments, block);
+    if (node.get_token()) {
+        function->set_token(*node.get_token()->clone());
+    }
     codegen_functions.push_back(function);
 }
 /**
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 1738d4139e..830814286e 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -16,7 +16,6 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/FileSystem.h"
@@ -69,7 +68,7 @@ llvm::Value* CodegenLLVMVisitor::create_gep(const std::string& name, llvm::Value
     indices.push_back(llvm::ConstantInt::get(index_type, 0));
     indices.push_back(index);
 
-    return builder.CreateInBoundsGEP(lookup(name), indices);
+    return ir_builder.CreateInBoundsGEP(lookup(name), indices);
 }
 
 llvm::Value* CodegenLLVMVisitor::codegen_indexed_name(const ast::IndexedName& node) {
@@ -86,7 +85,7 @@ llvm::Value* CodegenLLVMVisitor::codegen_instance_var(const ast::CodegenInstance
         throw std::runtime_error("Error: " + member_name + " is not a member of the instance!");
 
     // Load the instance struct given its name from the ValueSymbolTable.
-    llvm::Value* instance_ptr = builder.CreateLoad(lookup(instance_name));
+    llvm::Value* instance_ptr = ir_builder.CreateLoad(lookup(instance_name));
 
     // Create a GEP instruction to get a pointer to the member.
     int member_index = instance_var_helper.get_variable_index(member_name);
@@ -95,7 +94,7 @@ llvm::Value* CodegenLLVMVisitor::codegen_instance_var(const ast::CodegenInstance
     std::vector<llvm::Value*> indices;
     indices.push_back(llvm::ConstantInt::get(index_type, 0));
     indices.push_back(llvm::ConstantInt::get(index_type, member_index));
-    llvm::Value* member_ptr = builder.CreateInBoundsGEP(instance_ptr, indices);
+    llvm::Value* member_ptr = ir_builder.CreateInBoundsGEP(instance_ptr, indices);
 
     // Get the member AST node from the instance AST node, for which we proceed with the code
     // generation. If the member is scalar, return the pointer to it straight away.
@@ -122,25 +121,25 @@ llvm::Value* CodegenLLVMVisitor::codegen_instance_var(const ast::CodegenInstance
     // load the member which would be indexed later.
     llvm::Type* type = get_codegen_var_type(*codegen_var_with_type->get_type());
     llvm::Value* instance_member =
-        builder.CreateLoad(llvm::PointerType::get(type, /*AddressSpace=*/0), member_ptr);
+        ir_builder.CreateLoad(llvm::PointerType::get(type, /*AddressSpace=*/0), member_ptr);
 
     // Check if the code is vectorised and the index is indirect.
     std::string id = member_indexed_name->get_length()->get_node_name();
     if (id != kernel_id && is_kernel_code && vector_width > 1) {
         // Calculate a vector of addresses via GEP instruction, and then created a gather to load
         // indirectly.
-        llvm::Value* addresses = builder.CreateInBoundsGEP(instance_member, {i64_index});
-        return builder.CreateMaskedGather(addresses, llvm::Align());
+        llvm::Value* addresses = ir_builder.CreateInBoundsGEP(instance_member, {i64_index});
+        return ir_builder.CreateMaskedGather(addresses, llvm::Align());
     }
 
-    llvm::Value* member_addr = builder.CreateInBoundsGEP(instance_member, {i64_index});
+    llvm::Value* member_addr = ir_builder.CreateInBoundsGEP(instance_member, {i64_index});
 
     // If the code is vectorised, then bitcast to a vector pointer.
     if (is_kernel_code && vector_width > 1) {
         llvm::Type* vector_type =
             llvm::PointerType::get(llvm::FixedVectorType::get(type, vector_width),
                                    /*AddressSpace=*/0);
-        return builder.CreateBitCast(member_addr, vector_type);
+        return ir_builder.CreateBitCast(member_addr, vector_type);
     }
     return member_addr;
 }
@@ -152,7 +151,7 @@ llvm::Value* CodegenLLVMVisitor::get_array_index(const ast::IndexedName& node) {
     llvm::Value* index_value;
     if (node.get_length()->is_name()) {
         llvm::Value* ptr = lookup(node.get_length()->get_node_name());
-        index_value = builder.CreateLoad(ptr);
+        index_value = ir_builder.CreateLoad(ptr);
     } else {
         node.get_length()->accept(*this);
         index_value = values.back();
@@ -169,15 +168,15 @@ llvm::Value* CodegenLLVMVisitor::get_array_index(const ast::IndexedName& node) {
     if (auto index_type = llvm::dyn_cast<llvm::IntegerType>(index_value->getType())) {
         if (index_type->getBitWidth() == i64_type->getIntegerBitWidth())
             return index_value;
-        return builder.CreateSExtOrTrunc(index_value, i64_type);
+        return ir_builder.CreateSExtOrTrunc(index_value, i64_type);
     }
 
     auto vector_type = llvm::cast<llvm::FixedVectorType>(index_value->getType());
     auto element_type = llvm::cast<llvm::IntegerType>(vector_type->getElementType());
     if (element_type->getBitWidth() == i64_type->getIntegerBitWidth())
         return index_value;
-    return builder.CreateSExtOrTrunc(index_value,
-                                     llvm::FixedVectorType::get(i64_type, vector_width));
+    return ir_builder.CreateSExtOrTrunc(index_value,
+                                        llvm::FixedVectorType::get(i64_type, vector_width));
 }
 
 int CodegenLLVMVisitor::get_array_length(const ast::IndexedName& node) {
@@ -334,11 +333,12 @@ void CodegenLLVMVisitor::create_external_method_call(const std::string& name,
         argument_values.push_back(value);
     }
 
-#define DISPATCH(method_name, intrinsic)                                                           \
-    if (name == (method_name)) {                                                                   \
-        llvm::Value* result = builder.CreateIntrinsic(intrinsic, argument_types, argument_values); \
-        values.push_back(result);                                                                  \
-        return;                                                                                    \
+#define DISPATCH(method_name, intrinsic)                                            \
+    if (name == (method_name)) {                                                    \
+        llvm::Value* result =                                                       \
+            ir_builder.CreateIntrinsic(intrinsic, argument_types, argument_values); \
+        values.push_back(result);                                                   \
+        return;                                                                     \
     }
 
     DISPATCH("exp", llvm::Intrinsic::exp);
@@ -360,7 +360,7 @@ void CodegenLLVMVisitor::create_function_call(llvm::Function* func,
     std::vector<llvm::Value*> argument_values;
     argument_values.reserve(arguments.size());
     pack_function_call_arguments(arguments, argument_values);
-    llvm::Value* call = builder.CreateCall(func, argument_values);
+    llvm::Value* call = ir_builder.CreateCall(func, argument_values);
     values.push_back(call);
 }
 
@@ -382,7 +382,7 @@ void CodegenLLVMVisitor::create_printf_call(const ast::ExpressionVector& argumen
     std::vector<llvm::Value*> argument_values;
     argument_values.reserve(arguments.size());
     pack_function_call_arguments(arguments, argument_values);
-    builder.CreateCall(printf, argument_values);
+    ir_builder.CreateCall(printf, argument_values);
 }
 
 void CodegenLLVMVisitor::emit_procedure_or_function_declaration(const ast::CodegenFunction& node) {
@@ -397,10 +397,21 @@ void CodegenLLVMVisitor::emit_procedure_or_function_declaration(const ast::Codeg
     llvm::Type* return_type = get_codegen_var_type(*node.get_return_type());
 
     // Create a function that is automatically inserted into module's symbol table.
-    llvm::Function::Create(llvm::FunctionType::get(return_type, arg_types, /*isVarArg=*/false),
-                           llvm::Function::ExternalLinkage,
-                           name,
-                           *module);
+    auto func =
+        llvm::Function::Create(llvm::FunctionType::get(return_type, arg_types, /*isVarArg=*/false),
+                               llvm::Function::ExternalLinkage,
+                               name,
+                               *module);
+
+    // Add function debug information, with location information if it exists.
+    if (add_debug_information) {
+        if (node.get_token()) {
+            Location loc{node.get_token()->start_line(), node.get_token()->start_column()};
+            debug_builder.add_function_debug_info(func, &loc);
+        } else {
+            debug_builder.add_function_debug_info(func);
+        }
+    }
 }
 
 llvm::Value* CodegenLLVMVisitor::lookup(const std::string& name) {
@@ -416,7 +427,7 @@ void CodegenLLVMVisitor::pack_function_call_arguments(const ast::ExpressionVecto
         if (arg->is_string()) {
             // If the argument is a string, create a global i8* variable with it.
             auto string_arg = std::dynamic_pointer_cast<ast::String>(arg);
-            llvm::Value* str = builder.CreateGlobalStringPtr(string_arg->get_value());
+            llvm::Value* str = ir_builder.CreateGlobalStringPtr(string_arg->get_value());
             arg_values.push_back(str);
         } else {
             arg->accept(*this);
@@ -443,10 +454,10 @@ llvm::Value* CodegenLLVMVisitor::visit_arithmetic_bin_op(llvm::Value* lhs,
             result = llvm_fp_op(lhs, rhs);           \
         return result;
 
-        DISPATCH(ast::BinaryOp::BOP_ADDITION, builder.CreateFAdd, builder.CreateAdd);
-        DISPATCH(ast::BinaryOp::BOP_DIVISION, builder.CreateFDiv, builder.CreateSDiv);
-        DISPATCH(ast::BinaryOp::BOP_MULTIPLICATION, builder.CreateFMul, builder.CreateMul);
-        DISPATCH(ast::BinaryOp::BOP_SUBTRACTION, builder.CreateFSub, builder.CreateSub);
+        DISPATCH(ast::BinaryOp::BOP_ADDITION, ir_builder.CreateFAdd, ir_builder.CreateAdd);
+        DISPATCH(ast::BinaryOp::BOP_DIVISION, ir_builder.CreateFDiv, ir_builder.CreateSDiv);
+        DISPATCH(ast::BinaryOp::BOP_MULTIPLICATION, ir_builder.CreateFMul, ir_builder.CreateMul);
+        DISPATCH(ast::BinaryOp::BOP_SUBTRACTION, ir_builder.CreateFSub, ir_builder.CreateSub);
 
 #undef DISPATCH
 
@@ -461,15 +472,15 @@ void CodegenLLVMVisitor::visit_assign_op(const ast::BinaryExpression& node, llvm
         throw std::runtime_error("Error: only VarName assignment is supported!");
 
     llvm::Value* ptr = get_variable_ptr(*var);
-    builder.CreateStore(rhs, ptr);
+    ir_builder.CreateStore(rhs, ptr);
 }
 
 llvm::Value* CodegenLLVMVisitor::visit_logical_bin_op(llvm::Value* lhs,
                                                       llvm::Value* rhs,
                                                       unsigned op) {
     const auto& bin_op = static_cast<ast::BinaryOp>(op);
-    return bin_op == ast::BinaryOp::BOP_AND ? builder.CreateAnd(lhs, rhs)
-                                            : builder.CreateOr(lhs, rhs);
+    return bin_op == ast::BinaryOp::BOP_AND ? ir_builder.CreateAnd(lhs, rhs)
+                                            : ir_builder.CreateOr(lhs, rhs);
 }
 
 llvm::Value* CodegenLLVMVisitor::visit_comparison_bin_op(llvm::Value* lhs,
@@ -488,12 +499,14 @@ llvm::Value* CodegenLLVMVisitor::visit_comparison_bin_op(llvm::Value* lhs,
             result = i_llvm_op(lhs, rhs);                    \
         return result;
 
-        DISPATCH(ast::BinaryOp::BOP_EXACT_EQUAL, builder.CreateICmpEQ, builder.CreateFCmpOEQ);
-        DISPATCH(ast::BinaryOp::BOP_GREATER, builder.CreateICmpSGT, builder.CreateFCmpOGT);
-        DISPATCH(ast::BinaryOp::BOP_GREATER_EQUAL, builder.CreateICmpSGE, builder.CreateFCmpOGE);
-        DISPATCH(ast::BinaryOp::BOP_LESS, builder.CreateICmpSLT, builder.CreateFCmpOLT);
-        DISPATCH(ast::BinaryOp::BOP_LESS_EQUAL, builder.CreateICmpSLE, builder.CreateFCmpOLE);
-        DISPATCH(ast::BinaryOp::BOP_NOT_EQUAL, builder.CreateICmpNE, builder.CreateFCmpONE);
+        DISPATCH(ast::BinaryOp::BOP_EXACT_EQUAL, ir_builder.CreateICmpEQ, ir_builder.CreateFCmpOEQ);
+        DISPATCH(ast::BinaryOp::BOP_GREATER, ir_builder.CreateICmpSGT, ir_builder.CreateFCmpOGT);
+        DISPATCH(ast::BinaryOp::BOP_GREATER_EQUAL,
+                 ir_builder.CreateICmpSGE,
+                 ir_builder.CreateFCmpOGE);
+        DISPATCH(ast::BinaryOp::BOP_LESS, ir_builder.CreateICmpSLT, ir_builder.CreateFCmpOLT);
+        DISPATCH(ast::BinaryOp::BOP_LESS_EQUAL, ir_builder.CreateICmpSLE, ir_builder.CreateFCmpOLE);
+        DISPATCH(ast::BinaryOp::BOP_NOT_EQUAL, ir_builder.CreateICmpNE, ir_builder.CreateFCmpONE);
 
 #undef DISPATCH
 
@@ -602,7 +615,7 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     is_kernel_code = false;
 
     // Get the current and the next blocks within the function.
-    llvm::BasicBlock* curr_block = builder.GetInsertBlock();
+    llvm::BasicBlock* curr_block = ir_builder.GetInsertBlock();
     llvm::BasicBlock* next = curr_block->getNextNode();
     llvm::Function* func = curr_block->getParent();
 
@@ -633,31 +646,31 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     }
 
     // Branch to condition basic block and insert condition code there.
-    builder.CreateBr(for_cond);
-    builder.SetInsertPoint(for_cond);
+    ir_builder.CreateBr(for_cond);
+    ir_builder.SetInsertPoint(for_cond);
     node.get_condition()->accept(*this);
 
     // Extract the condition to decide whether to branch to the loop body or loop exit.
     llvm::Value* cond = values.back();
     values.pop_back();
-    builder.CreateCondBr(cond, for_body, exit);
+    ir_builder.CreateCondBr(cond, for_body, exit);
 
     // Generate code for the loop body and create the basic block for the increment.
-    builder.SetInsertPoint(for_body);
+    ir_builder.SetInsertPoint(for_body);
     is_kernel_code = true;
     const auto& statement_block = node.get_statement_block();
     statement_block->accept(*this);
     is_kernel_code = false;
-    builder.CreateBr(for_inc);
+    ir_builder.CreateBr(for_inc);
 
     // Process increment.
-    builder.SetInsertPoint(for_inc);
+    ir_builder.SetInsertPoint(for_inc);
     node.get_increment()->accept(*this);
 
     // Create a branch to condition block, then generate exit code out of the loop. Restore the
     // vector width.
-    builder.CreateBr(for_cond);
-    builder.SetInsertPoint(exit);
+    ir_builder.CreateBr(for_cond);
+    ir_builder.SetInsertPoint(exit);
     vector_width = tmp_vector_width;
     is_kernel_code = true;
 }
@@ -672,7 +685,7 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
     // Create the entry basic block of the function/procedure and point the local named values table
     // to the symbol table.
     llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", func);
-    builder.SetInsertPoint(body);
+    ir_builder.SetInsertPoint(body);
 
     // When processing a function, it returns a value named <function_name> in NMODL. Therefore, we
     // first run RenameVisitor to rename it into ret_<function_name>. This will aid in avoiding
@@ -687,9 +700,10 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
     unsigned i = 0;
     for (auto& arg: func->args()) {
         std::string arg_name = arguments[i++].get()->get_node_name();
-        llvm::Value* alloca = builder.CreateAlloca(arg.getType(), /*ArraySize=*/nullptr, arg_name);
+        llvm::Type* arg_type = arg.getType();
+        llvm::Value* alloca = ir_builder.CreateAlloca(arg_type, /*ArraySize=*/nullptr, arg_name);
         arg.setName(arg_name);
-        builder.CreateStore(&arg, alloca);
+        ir_builder.CreateStore(&arg, alloca);
     }
 
     // Process function or procedure body. If the function is a compute kernel, then set the
@@ -705,7 +719,7 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
 
     // If function has a void return type, add a terminator not handled by CodegenReturnVar.
     if (has_void_ret_type)
-        builder.CreateRetVoid();
+        ir_builder.CreateRetVoid();
 
     // Clear local values stack and remove the pointer to the local symbol table.
     values.clear();
@@ -717,8 +731,8 @@ void CodegenLLVMVisitor::visit_codegen_return_statement(const ast::CodegenReturn
         throw std::runtime_error("Error: CodegenReturnStatement must contain a name node\n");
 
     std::string ret = "ret_" + current_func->getName().str();
-    llvm::Value* ret_value = builder.CreateLoad(lookup(ret));
-    builder.CreateRet(ret_value);
+    llvm::Value* ret_value = ir_builder.CreateLoad(lookup(ret));
+    ir_builder.CreateRet(ret_value);
 }
 
 void CodegenLLVMVisitor::visit_codegen_var_list_statement(
@@ -750,7 +764,7 @@ void CodegenLLVMVisitor::visit_codegen_var_list_statement(
         } else {
             throw std::runtime_error("Error: Unsupported local variable type");
         }
-        builder.CreateAlloca(var_type, /*ArraySize=*/nullptr, name);
+        ir_builder.CreateAlloca(var_type, /*ArraySize=*/nullptr, name);
     }
 }
 
@@ -785,7 +799,7 @@ void CodegenLLVMVisitor::visit_function_call(const ast::FunctionCall& node) {
 
 void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
     // Get the current and the next blocks within the function.
-    llvm::BasicBlock* curr_block = builder.GetInsertBlock();
+    llvm::BasicBlock* curr_block = ir_builder.GetInsertBlock();
     llvm::BasicBlock* next = curr_block->getNextNode();
     llvm::Function* func = curr_block->getParent();
 
@@ -799,9 +813,9 @@ void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
     values.pop_back();
 
     // Process the true block.
-    builder.SetInsertPoint(true_block);
+    ir_builder.SetInsertPoint(true_block);
     node.get_statement_block()->accept(*this);
-    builder.CreateBr(merge_block);
+    ir_builder.CreateBr(merge_block);
 
     // Save the merge block and proceed with codegen for `else if` statements.
     llvm::BasicBlock* exit = merge_block;
@@ -809,11 +823,11 @@ void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
         // Link the current block to the true and else blocks.
         llvm::BasicBlock* else_block =
             llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block);
-        builder.SetInsertPoint(curr_block);
-        builder.CreateCondBr(cond, true_block, else_block);
+        ir_builder.SetInsertPoint(curr_block);
+        ir_builder.CreateCondBr(cond, true_block, else_block);
 
         // Process else block.
-        builder.SetInsertPoint(else_block);
+        ir_builder.SetInsertPoint(else_block);
         else_if->get_condition()->accept(*this);
         cond = values.back();
         values.pop_back();
@@ -823,13 +837,13 @@ void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
         true_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block);
         llvm::BasicBlock* tmp = merge_block;
         merge_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block);
-        builder.SetInsertPoint(merge_block);
-        builder.CreateBr(tmp);
+        ir_builder.SetInsertPoint(merge_block);
+        ir_builder.CreateBr(tmp);
 
         // Process true block.
-        builder.SetInsertPoint(true_block);
+        ir_builder.SetInsertPoint(true_block);
         else_if->get_statement_block()->accept(*this);
-        builder.CreateBr(merge_block);
+        ir_builder.CreateBr(merge_block);
         curr_block = else_block;
     }
 
@@ -838,15 +852,15 @@ void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
     llvm::BasicBlock* else_block;
     if (elses) {
         else_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block);
-        builder.SetInsertPoint(else_block);
+        ir_builder.SetInsertPoint(else_block);
         elses->get_statement_block()->accept(*this);
-        builder.CreateBr(merge_block);
+        ir_builder.CreateBr(merge_block);
     } else {
         else_block = merge_block;
     }
-    builder.SetInsertPoint(curr_block);
-    builder.CreateCondBr(cond, true_block, else_block);
-    builder.SetInsertPoint(exit);
+    ir_builder.SetInsertPoint(curr_block);
+    ir_builder.CreateCondBr(cond, true_block, else_block);
+    ir_builder.SetInsertPoint(exit);
 }
 
 void CodegenLLVMVisitor::visit_integer(const ast::Integer& node) {
@@ -867,9 +881,13 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     CodegenLLVMHelperVisitor v{vector_width};
     const auto& functions = v.get_codegen_functions(node);
     instance_var_helper = v.get_instance_var_helper();
-
     kernel_id = v.get_kernel_id();
 
+    // Create compile unit if adding debug information to the module.
+    if (add_debug_information) {
+        debug_builder.create_compile_unit(*module, module->getModuleIdentifier(), output_dir);
+    }
+
     // For every function, generate its declaration. Thus, we can look up
     // `llvm::Function` in the symbol table in the module.
     for (const auto& func: functions) {
@@ -889,6 +907,11 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
         visit_codegen_function(*func);
     }
 
+    // Finalize the debug information.
+    if (add_debug_information) {
+        debug_builder.finalize();
+    }
+
     // Verify the generated LLVM IR module.
     std::string error;
     llvm::raw_string_ostream ostream(error);
@@ -958,9 +981,9 @@ void CodegenLLVMVisitor::visit_unary_expression(const ast::UnaryExpression& node
     llvm::Value* value = values.back();
     values.pop_back();
     if (op == ast::UOP_NEGATION) {
-        values.push_back(builder.CreateFNeg(value));
+        values.push_back(ir_builder.CreateFNeg(value));
     } else if (op == ast::UOP_NOT) {
-        values.push_back(builder.CreateNot(value));
+        values.push_back(ir_builder.CreateNot(value));
     } else {
         throw std::runtime_error("Error: unsupported unary operator\n");
     }
@@ -971,7 +994,7 @@ void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
 
     // Finally, load the variable from the pointer value unless it has already been loaded (e.g. via
     // gather instruction).
-    llvm::Value* var = ptr->getType()->isPointerTy() ? builder.CreateLoad(ptr) : ptr;
+    llvm::Value* var = ptr->getType()->isPointerTy() ? ir_builder.CreateLoad(ptr) : ptr;
 
     // If the value should not be vectorised, or it is already a vector, add it to the stack.
     if (!is_kernel_code || vector_width <= 1 || var->getType()->isVectorTy()) {
@@ -981,13 +1004,13 @@ void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
 
     // Otherwise, if we are generating vectorised inside the loop, replicate the value to form a
     // vector of `vector_width`.
-    llvm::Value* vector_var = builder.CreateVectorSplat(vector_width, var);
+    llvm::Value* vector_var = ir_builder.CreateVectorSplat(vector_width, var);
     values.push_back(vector_var);
 }
 
 void CodegenLLVMVisitor::visit_while_statement(const ast::WhileStatement& node) {
     // Get the current and the next blocks within the function.
-    llvm::BasicBlock* curr_block = builder.GetInsertBlock();
+    llvm::BasicBlock* curr_block = ir_builder.GetInsertBlock();
     llvm::BasicBlock* next = curr_block->getNextNode();
     llvm::Function* func = curr_block->getParent();
 
@@ -996,20 +1019,20 @@ void CodegenLLVMVisitor::visit_while_statement(const ast::WhileStatement& node)
     llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next);
     llvm::BasicBlock* exit = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next);
 
-    builder.CreateBr(header);
-    builder.SetInsertPoint(header);
+    ir_builder.CreateBr(header);
+    ir_builder.SetInsertPoint(header);
 
     // Generate code for condition and create branch to the body block.
     node.get_condition()->accept(*this);
     llvm::Value* condition = values.back();
     values.pop_back();
-    builder.CreateCondBr(condition, body, exit);
+    ir_builder.CreateCondBr(condition, body, exit);
 
-    builder.SetInsertPoint(body);
+    ir_builder.SetInsertPoint(body);
     node.get_statement_block()->accept(*this);
-    builder.CreateBr(header);
+    ir_builder.CreateBr(header);
 
-    builder.SetInsertPoint(exit);
+    ir_builder.SetInsertPoint(exit);
 }
 
 void CodegenLLVMVisitor::find_kernel_names(std::vector<std::string>& container) {
@@ -1050,17 +1073,23 @@ void CodegenLLVMVisitor::wrap_kernel_functions() {
             llvm::Function::ExternalLinkage,
             "__" + kernel_name + "_wrapper",
             *module);
+
+        // Optionally, add debug information for the wrapper function.
+        if (add_debug_information) {
+            debug_builder.add_function_debug_info(wrapper_func);
+        }
+
         llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", wrapper_func);
-        builder.SetInsertPoint(body);
+        ir_builder.SetInsertPoint(body);
 
         // Proceed with bitcasting the void pointer to the struct pointer type, calling the kernel
         // and adding a terminator.
-        llvm::Value* bitcasted = builder.CreateBitCast(wrapper_func->getArg(0),
-                                                       instance_struct_ptr_type);
+        llvm::Value* bitcasted = ir_builder.CreateBitCast(wrapper_func->getArg(0),
+                                                          instance_struct_ptr_type);
         std::vector<llvm::Value*> args;
         args.push_back(bitcasted);
-        builder.CreateCall(kernel, args);
-        builder.CreateRet(llvm::ConstantInt::get(i32_type, 0));
+        ir_builder.CreateCall(kernel, args);
+        ir_builder.CreateRet(llvm::ConstantInt::get(i32_type, 0));
     }
 }
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 099613f8d4..450e1872a4 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -19,11 +19,13 @@
 #include <string>
 
 #include "codegen/llvm/codegen_llvm_helper_visitor.hpp"
+#include "codegen/llvm/llvm_debug_builder.hpp"
 #include "symtab/symbol_table.hpp"
 #include "utils/logger.hpp"
 #include "visitors/ast_visitor.hpp"
 
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -74,7 +76,14 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
 
     std::unique_ptr<llvm::Module> module = std::make_unique<llvm::Module>(mod_filename, *context);
 
-    llvm::IRBuilder<> builder;
+    // LLVM IR builder.
+    llvm::IRBuilder<> ir_builder;
+
+    // Debug information builder.
+    DebugBuilder debug_builder;
+
+    // Add debug information to the module.
+    bool add_debug_information;
 
     // Pass manager for optimisation passes that are used for target code generation.
     llvm::legacy::FunctionPassManager codegen_pm;
@@ -129,14 +138,17 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
                        bool opt_passes,
                        bool use_single_precision = false,
                        int vector_width = 1,
-                       std::string vec_lib = "none")
+                       std::string vec_lib = "none",
+                       bool add_debug_information = false)
         : mod_filename(mod_filename)
         , output_dir(output_dir)
         , opt_passes(opt_passes)
         , use_single_precision(use_single_precision)
         , vector_width(vector_width)
         , vector_library(veclib_map.at(vec_lib))
-        , builder(*context)
+        , add_debug_information(add_debug_information)
+        , ir_builder(*context)
+        , debug_builder(*module)
         , codegen_pm(module.get())
         , opt_pm(module.get()) {}
 
diff --git a/src/codegen/llvm/jit_driver.cpp b/src/codegen/llvm/jit_driver.cpp
index 1e8eb4bfd0..532cd20b8f 100644
--- a/src/codegen/llvm/jit_driver.cpp
+++ b/src/codegen/llvm/jit_driver.cpp
@@ -154,6 +154,16 @@ void JITDriver::init(std::string features,
     set_triple_and_data_layout(*module, features);
     auto data_layout = module->getDataLayout();
 
+    // If benchmarking, enable listeners to use GDB, perf or VTune. Note that LLVM should be built
+    // with listeners on (e.g. -DLLVM_USE_PERF=ON).
+    if (benchmark_info) {
+        gdb_event_listener = llvm::JITEventListener::createGDBRegistrationListener();
+#if defined(NMODL_HAVE_JIT_EVENT_LISTENERS)
+        perf_event_listener = llvm::JITEventListener::createPerfJITEventListener();
+        intel_event_listener = llvm::JITEventListener::createIntelJITEventListener();
+#endif
+    }
+
     // Create object linking function callback.
     auto object_linking_layer_creator = [&](llvm::orc::ExecutionSession& session,
                                             const llvm::Triple& triple) {
@@ -161,6 +171,15 @@ void JITDriver::init(std::string features,
         auto layer = std::make_unique<llvm::orc::RTDyldObjectLinkingLayer>(session, []() {
             return std::make_unique<llvm::SectionMemoryManager>();
         });
+
+        // Register event listeners if they exist.
+        if (gdb_event_listener)
+            layer->registerJITEventListener(*gdb_event_listener);
+        if (perf_event_listener)
+            layer->registerJITEventListener(*perf_event_listener);
+        if (intel_event_listener)
+            layer->registerJITEventListener(*intel_event_listener);
+
         for (const auto& lib_path: lib_paths) {
             // For every library path, create a corresponding memory buffer.
             auto memory_buffer = llvm::MemoryBuffer::getFile(lib_path);
diff --git a/src/codegen/llvm/jit_driver.hpp b/src/codegen/llvm/jit_driver.hpp
index 151ec177d8..afb1317cd8 100644
--- a/src/codegen/llvm/jit_driver.hpp
+++ b/src/codegen/llvm/jit_driver.hpp
@@ -15,6 +15,7 @@
  * \brief \copybrief nmodl::runner::JITDriver
  */
 
+#include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/Orc/LLJIT.h"
 
 namespace nmodl {
@@ -45,8 +46,18 @@ class JITDriver {
 
     std::unique_ptr<llvm::orc::LLJIT> jit;
 
+    /// LLVM IR module to execute.
     std::unique_ptr<llvm::Module> module;
 
+    /// GDB event listener.
+    llvm::JITEventListener* gdb_event_listener = nullptr;
+
+    /// perf event listener.
+    llvm::JITEventListener* perf_event_listener = nullptr;
+
+    /// Intel event listener.
+    llvm::JITEventListener* intel_event_listener = nullptr;
+
   public:
     explicit JITDriver(std::unique_ptr<llvm::Module> m)
         : module(std::move(m)) {}
diff --git a/src/codegen/llvm/llvm_benchmark.cpp b/src/codegen/llvm/llvm_benchmark.cpp
index df0c54517d..adbe653f1e 100644
--- a/src/codegen/llvm/llvm_benchmark.cpp
+++ b/src/codegen/llvm/llvm_benchmark.cpp
@@ -58,7 +58,8 @@ void LLVMBenchmark::run(const std::shared_ptr<ast::Program>& node) {
                                         llvm_build_info.opt_passes,
                                         llvm_build_info.use_single_precision,
                                         llvm_build_info.vector_width,
-                                        llvm_build_info.vec_lib);
+                                        llvm_build_info.vec_lib,
+                                        /*add_debug_information=*/true);
     generate_llvm(visitor, node);
 
     // Finally, run the benchmark and log the measurements.
diff --git a/src/codegen/llvm/llvm_debug_builder.cpp b/src/codegen/llvm/llvm_debug_builder.cpp
new file mode 100644
index 0000000000..5682a6e904
--- /dev/null
+++ b/src/codegen/llvm/llvm_debug_builder.cpp
@@ -0,0 +1,63 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include "codegen/llvm/llvm_debug_builder.hpp"
+
+namespace nmodl {
+namespace codegen {
+
+
+static constexpr const char debug_version_key[] = "Debug Version";
+
+
+void DebugBuilder::add_function_debug_info(llvm::Function* function, Location* loc) {
+    // Create the function debug type (subroutine type). We are not interested in parameters and
+    // types, and therefore passing llvm::None as argument suffices for now.
+    llvm::DISubroutineType* subroutine_type = di_builder.createSubroutineType(
+        di_builder.getOrCreateTypeArray(llvm::None));
+    llvm::DISubprogram::DISPFlags sp_flags = llvm::DISubprogram::SPFlagDefinition |
+                                             llvm::DISubprogram::SPFlagOptimized;
+    // If there is no location associated with the function, just use 0.
+    int line = loc ? loc->line : 0;
+    llvm::DISubprogram* program = di_builder.createFunction(compile_unit,
+                                                            function->getName(),
+                                                            function->getName(),
+                                                            file,
+                                                            line,
+                                                            subroutine_type,
+                                                            line,
+                                                            llvm::DINode::FlagZero,
+                                                            sp_flags);
+    function->setSubprogram(program);
+    di_builder.finalizeSubprogram(program);
+}
+
+void DebugBuilder::create_compile_unit(llvm::Module& module,
+                                       const std::string& debug_filename,
+                                       const std::string& debug_output_dir) {
+    // Create the debug file and compile unit for the module.
+    file = di_builder.createFile(debug_filename, debug_output_dir);
+    compile_unit = di_builder.createCompileUnit(llvm::dwarf::DW_LANG_C,
+                                                file,
+                                                /*Producer=*/"NMODL-LLVM",
+                                                /*isOptimized=*/false,
+                                                /*Flags=*/"",
+                                                /*RV=*/0);
+
+    // Add a flag to the module to specify that it has debug information.
+    if (!module.getModuleFlag(debug_version_key)) {
+        module.addModuleFlag(llvm::Module::Warning,
+                             debug_version_key,
+                             llvm::DEBUG_METADATA_VERSION);
+    }
+}
+
+void DebugBuilder::finalize() {
+    di_builder.finalize();
+}
+}  // namespace codegen
+}  // namespace nmodl
diff --git a/src/codegen/llvm/llvm_debug_builder.hpp b/src/codegen/llvm/llvm_debug_builder.hpp
new file mode 100644
index 0000000000..9322cd461a
--- /dev/null
+++ b/src/codegen/llvm/llvm_debug_builder.hpp
@@ -0,0 +1,70 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+#include <string>
+
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+
+namespace nmodl {
+namespace codegen {
+
+/// A struct to store AST location information.
+/// \todo Currently, not all AST nodes have location information. Moreover,
+/// some may not have it as they were artificially introduced (e.g.
+/// CodegenForStatement). This simple wrapper suffices for now, but in future
+/// we may want to handle this properly.
+struct Location {
+    /// Line in the file.
+    int line;
+
+    /// Column in the file.
+    int column;
+};
+
+
+/**
+ * \class DebugBuilder
+ * \brief A helper class to create debug information for LLVM IR module.
+ * \todo Only function debug information is supported.
+ */
+class DebugBuilder {
+  private:
+    /// Debug information builder.
+    llvm::DIBuilder di_builder;
+
+    /// LLVM context.
+    llvm::LLVMContext& context;
+
+    /// Debug compile unit for the module.
+    llvm::DICompileUnit* compile_unit = nullptr;
+
+    /// Debug file pointer.
+    llvm::DIFile* file = nullptr;
+
+  public:
+    DebugBuilder(llvm::Module& module)
+        : di_builder(module)
+        , context(module.getContext()) {}
+
+    /// Adds function debug information with an optional location.
+    void add_function_debug_info(llvm::Function* function, Location* loc = nullptr);
+
+    /// Creates the compile unit for and sets debug flags for the module.
+    void create_compile_unit(llvm::Module& module,
+                             const std::string& debug_filename,
+                             const std::string& debug_output_dir);
+
+    /// Finalizes the debug information.
+    void finalize();
+};
+}  // namespace codegen
+}  // namespace nmodl
diff --git a/src/main.cpp b/src/main.cpp
index 4604a8df50..d2e73d37da 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -182,6 +182,9 @@ int main(int argc, const char* argv[]) {
     /// vector library name
     std::string vector_library("none");
 
+    /// disable debug information generation for the IR
+    bool disable_debug_information(false);
+
     /// run llvm benchmark
     bool run_llvm_benchmark(false);
 
@@ -329,6 +332,9 @@ int main(int argc, const char* argv[]) {
     llvm_opt->add_flag("--ir",
         llvm_ir,
         fmt::format("Generate LLVM IR ({})", llvm_ir))->ignore_case();
+    llvm_opt->add_flag("--disable-debug-info",
+                       disable_debug_information,
+                       fmt::format("Disable debug information ({})", disable_debug_information))->ignore_case();
     llvm_opt->add_flag("--opt",
                        llvm_ir_opt_passes,
                        fmt::format("Run LLVM optimisation passes ({})", llvm_ir_opt_passes))->ignore_case();
@@ -702,7 +708,8 @@ int main(int argc, const char* argv[]) {
                                            llvm_ir_opt_passes,
                                            llvm_float_type,
                                            llvm_vec_width,
-                                           vector_library);
+                                           vector_library,
+                                           !disable_debug_information);
                 visitor.visit_program(*ast);
                 ast_to_nmodl(*ast, filepath("llvm", "mod"));
                 ast_to_json(*ast, filepath("llvm", "json"));

From 92eadeb4f4e81777c69540cbb4f2c5eba60e164c Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Sat, 8 May 2021 03:22:06 -0700
Subject: [PATCH 051/105] Fixed using benchmarking_info in TestRunner (#631)

---
 src/codegen/llvm/jit_driver.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/codegen/llvm/jit_driver.cpp b/src/codegen/llvm/jit_driver.cpp
index 532cd20b8f..2a6842d0fb 100644
--- a/src/codegen/llvm/jit_driver.cpp
+++ b/src/codegen/llvm/jit_driver.cpp
@@ -205,13 +205,13 @@ void JITDriver::init(std::string features,
     auto compile_function_creator = [&](llvm::orc::JITTargetMachineBuilder tm_builder)
         -> llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
         // Create target machine with some features possibly turned off.
-        auto tm = create_target(&tm_builder, features, benchmark_info->opt_level_codegen);
+        int opt_level_codegen = benchmark_info ? benchmark_info->opt_level_codegen : 0;
+        auto tm = create_target(&tm_builder, features, opt_level_codegen);
 
-        // Optimise the LLVM IR module.
-        optimise_module(*module, benchmark_info->opt_level_ir, tm.get());
-
-        // Save optimised module to .ll file if benchmarking.
+        // Optimise the LLVM IR module and save it to .ll file if benchmarking.
         if (benchmark_info) {
+            optimise_module(*module, benchmark_info->opt_level_ir, tm.get());
+
             std::error_code error_code;
             std::unique_ptr<llvm::ToolOutputFile> out =
                 std::make_unique<llvm::ToolOutputFile>(benchmark_info->output_dir + "/" +

From dbccdaada872de15b7fe158f9a2964dd9054a782 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 11 May 2021 16:14:34 -0700
Subject: [PATCH 052/105] Fixed addition of SOLVE block to kernel's FOR loop
 (#636)

* Fix `append_statements_from_block` function in LLVM helper visitor.
* Before, if nonspecific current was not specified, the whole `BREAKPOINT`
   block would be added to the kernel body.
* This led to cases when `SOLVE` block was together with the actual
    solution to `DERIVATIVE`
---
 src/codegen/llvm/codegen_llvm_helper_visitor.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index a6af725eb7..bc8317fecb 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -248,7 +248,12 @@ std::shared_ptr<ast::InstanceStruct> CodegenLLVMHelperVisitor::create_instance_s
 static void append_statements_from_block(ast::StatementVector& statements,
                                          const std::shared_ptr<ast::StatementBlock>& block) {
     const auto& block_statements = block->get_statements();
-    statements.insert(statements.end(), block_statements.begin(), block_statements.end());
+    for (const auto& statement: block_statements) {
+        const auto& expression_statement = std::dynamic_pointer_cast<ast::ExpressionStatement>(
+            statement);
+        if (!expression_statement->get_expression()->is_solve_block())
+            statements.push_back(statement);
+    }
 }
 
 static std::shared_ptr<ast::CodegenAtomicStatement> create_atomic_statement(std::string& lhs_str,
@@ -638,7 +643,6 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
         /// add breakpoint block if no current
         if (info.currents.empty() && info.breakpoint_node != nullptr) {
             auto block = info.breakpoint_node->get_statement_block();
-            // \todo this automatically adds `SOLVE states METHOD ...`
             append_statements_from_block(loop_body_statements, block);
         }
 

From e299af160b415076d9ca5d057b2a2971287cc9e2 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 13 May 2021 00:49:23 -0700
Subject: [PATCH 053/105] IR builder redesign for LLVM IR code generation
 pipeline (#634)

Improves the code structure for the LLVM code generation pipeline

The following changes were added:

1. New IR builder class.

Before, LLVM visitor just simply used `llvm::IRBuilder<>` class to
generate instructions. Recently, this (as well as adding the functionality
to the visitor on the go) had led to code duplication and it became hard
to introduce new features nicely.

Hence, a special `IRBuilder` class is now used. This class is a wrapper
around `llvm::IRBuilder<>` that keeps track of certain IR generation
specific fields (that are unrelated to the visitor), defines an API that the
visitor can use to generate LLVM IR.

Also, this IR builder has been designed to be nearly fully-independent from
NMODL AST nodes. this allows it to be more generic and to be more
extensible.

2. Visitor clean-up

 LLVM visitor has been refactored to take the new IR builder class into
account. Also, the functions were reordered, refactored and renamed to
better reflect the intended use and provide encapsulation.

3. Scatter preparation

The functionality of the generating code for `CodegenInstanceVar` node
has been extended with `read_from_or_write_to_instance(...)` function.
Now, an optional `value_to_store` is passed to indicate whether the code
needs to be generated for reading the instance variable or writing to it.


fixes #538
---
 src/codegen/llvm/CMakeLists.txt           |   4 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp | 926 ++++++++--------------
 src/codegen/llvm/codegen_llvm_visitor.hpp | 342 +++-----
 src/codegen/llvm/llvm_ir_builder.cpp      | 427 ++++++++++
 src/codegen/llvm/llvm_ir_builder.hpp      | 272 +++++++
 5 files changed, 1134 insertions(+), 837 deletions(-)
 create mode 100644 src/codegen/llvm/llvm_ir_builder.cpp
 create mode 100644 src/codegen/llvm/llvm_ir_builder.hpp

diff --git a/src/codegen/llvm/CMakeLists.txt b/src/codegen/llvm/CMakeLists.txt
index 4e0f4d90d0..78380485fa 100644
--- a/src/codegen/llvm/CMakeLists.txt
+++ b/src/codegen/llvm/CMakeLists.txt
@@ -11,7 +11,9 @@ set(LLVM_CODEGEN_SOURCE_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_debug_builder.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_debug_builder.hpp)
+    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_debug_builder.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_ir_builder.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_ir_builder.hpp)
 
 # =============================================================================
 # LLVM codegen library and executable
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 830814286e..a86a5cd8b5 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -14,10 +14,8 @@
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
-#include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/ToolOutputFile.h"
@@ -34,7 +32,7 @@ static constexpr const char instance_struct_type_name[] = "__instance_var__type"
 
 
 /****************************************************************************************/
-/*                            Helper routines                                           */
+/*                                  Helper routines                                     */
 /****************************************************************************************/
 
 /// A utility to check for supported Statement AST nodes.
@@ -44,8 +42,8 @@ static bool is_supported_statement(const ast::Statement& statement) {
            statement.is_if_statement() || statement.is_while_statement();
 }
 
-/// A utility to check of the kernel body can be vectorised.
-static bool can_vectorise(const ast::CodegenForStatement& statement, symtab::SymbolTable* sym_tab) {
+/// A utility to check that the kernel body can be vectorised.
+static bool can_vectorize(const ast::CodegenForStatement& statement, symtab::SymbolTable* sym_tab) {
     // Check that function calls are made to external methods only.
     const auto& function_calls = collect_nodes(statement, {ast::AstNodeType::FUNCTION_CALL});
     for (const auto& call: function_calls) {
@@ -62,458 +60,352 @@ static bool can_vectorise(const ast::CodegenForStatement& statement, symtab::Sym
     return collected.empty();
 }
 
-llvm::Value* CodegenLLVMVisitor::create_gep(const std::string& name, llvm::Value* index) {
-    llvm::Type* index_type = llvm::Type::getInt64Ty(*context);
-    std::vector<llvm::Value*> indices;
-    indices.push_back(llvm::ConstantInt::get(index_type, 0));
-    indices.push_back(index);
-
-    return ir_builder.CreateInBoundsGEP(lookup(name), indices);
-}
-
-llvm::Value* CodegenLLVMVisitor::codegen_indexed_name(const ast::IndexedName& node) {
-    llvm::Value* index = get_array_index(node);
-    return create_gep(node.get_node_name(), index);
+llvm::Value* CodegenLLVMVisitor::accept_and_get(const std::shared_ptr<ast::Node>& node) {
+    node->accept(*this);
+    return ir_builder.pop_last_value();
 }
 
-llvm::Value* CodegenLLVMVisitor::codegen_instance_var(const ast::CodegenInstanceVar& node) {
-    const auto& member_node = node.get_member_var();
-    const auto& instance_name = node.get_instance_var()->get_node_name();
-    const auto& member_name = member_node->get_node_name();
-
-    if (!instance_var_helper.is_an_instance_variable(member_name))
-        throw std::runtime_error("Error: " + member_name + " is not a member of the instance!");
+void CodegenLLVMVisitor::create_external_function_call(const std::string& name,
+                                                       const ast::ExpressionVector& arguments) {
+    if (name == "printf") {
+        create_printf_call(arguments);
+        return;
+    }
 
-    // Load the instance struct given its name from the ValueSymbolTable.
-    llvm::Value* instance_ptr = ir_builder.CreateLoad(lookup(instance_name));
+    ValueVector argument_values;
+    TypeVector argument_types;
+    for (const auto& arg: arguments) {
+        llvm::Value* value = accept_and_get(arg);
+        llvm::Type* type = value->getType();
+        argument_types.push_back(type);
+        argument_values.push_back(value);
+    }
+    ir_builder.create_intrinsic(name, argument_values, argument_types);
+}
 
-    // Create a GEP instruction to get a pointer to the member.
-    int member_index = instance_var_helper.get_variable_index(member_name);
-    llvm::Type* index_type = llvm::Type::getInt32Ty(*context);
+void CodegenLLVMVisitor::create_function_call(llvm::Function* func,
+                                              const std::string& name,
+                                              const ast::ExpressionVector& arguments) {
+    // Check that function is called with the expected number of arguments.
+    if (!func->isVarArg() && arguments.size() != func->arg_size()) {
+        throw std::runtime_error("Error: Incorrect number of arguments passed");
+    }
 
-    std::vector<llvm::Value*> indices;
-    indices.push_back(llvm::ConstantInt::get(index_type, 0));
-    indices.push_back(llvm::ConstantInt::get(index_type, member_index));
-    llvm::Value* member_ptr = ir_builder.CreateInBoundsGEP(instance_ptr, indices);
+    // Pack function call arguments to vector and create a call instruction.
+    ValueVector argument_values;
+    argument_values.reserve(arguments.size());
+    create_function_call_arguments(arguments, argument_values);
+    ir_builder.create_function_call(func, argument_values);
+}
 
-    // Get the member AST node from the instance AST node, for which we proceed with the code
-    // generation. If the member is scalar, return the pointer to it straight away.
-    auto codegen_var_with_type = instance_var_helper.get_variable(member_name);
-    if (!codegen_var_with_type->get_is_pointer()) {
-        return member_ptr;
+void CodegenLLVMVisitor::create_function_call_arguments(const ast::ExpressionVector& arguments,
+                                                        ValueVector& arg_values) {
+    for (const auto& arg: arguments) {
+        if (arg->is_string()) {
+            // If the argument is a string, create a global i8* variable with it.
+            auto string_arg = std::dynamic_pointer_cast<ast::String>(arg);
+            arg_values.push_back(ir_builder.create_global_string(*string_arg));
+        } else {
+            llvm::Value* value = accept_and_get(arg);
+            arg_values.push_back(value);
+        }
     }
+}
 
-    // Otherwise, the codegen variable is a pointer, and the member AST node must be an IndexedName.
-    auto member_var_name = std::dynamic_pointer_cast<ast::VarName>(member_node);
-    if (!member_var_name->get_name()->is_indexed_name())
-        throw std::runtime_error("Error: " + member_name + " is not an IndexedName!");
-
-    // Proceed to creating a GEP instruction to get the pointer to the member's element.
-    auto member_indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(
-        member_var_name->get_name());
+void CodegenLLVMVisitor::create_function_declaration(const ast::CodegenFunction& node) {
+    const auto& name = node.get_node_name();
+    const auto& arguments = node.get_arguments();
 
-    if (!member_indexed_name->get_length()->is_name())
-        throw std::runtime_error("Error: " + member_name + " must be indexed with a variable!");
+    // Procedure or function parameters are doubles by default.
+    TypeVector arg_types;
+    for (size_t i = 0; i < arguments.size(); ++i)
+        arg_types.push_back(get_codegen_var_type(*arguments[i]->get_type()));
 
-    llvm::Value* i64_index = get_array_index(*member_indexed_name);
-
-    // The codegen variable type is always a scalar, so we need to transform it to a pointer. Then
-    // load the member which would be indexed later.
-    llvm::Type* type = get_codegen_var_type(*codegen_var_with_type->get_type());
-    llvm::Value* instance_member =
-        ir_builder.CreateLoad(llvm::PointerType::get(type, /*AddressSpace=*/0), member_ptr);
-
-    // Check if the code is vectorised and the index is indirect.
-    std::string id = member_indexed_name->get_length()->get_node_name();
-    if (id != kernel_id && is_kernel_code && vector_width > 1) {
-        // Calculate a vector of addresses via GEP instruction, and then created a gather to load
-        // indirectly.
-        llvm::Value* addresses = ir_builder.CreateInBoundsGEP(instance_member, {i64_index});
-        return ir_builder.CreateMaskedGather(addresses, llvm::Align());
-    }
+    llvm::Type* return_type = get_codegen_var_type(*node.get_return_type());
 
-    llvm::Value* member_addr = ir_builder.CreateInBoundsGEP(instance_member, {i64_index});
+    // Create a function that is automatically inserted into module's symbol table.
+    auto func =
+        llvm::Function::Create(llvm::FunctionType::get(return_type, arg_types, /*isVarArg=*/false),
+                               llvm::Function::ExternalLinkage,
+                               name,
+                               *module);
 
-    // If the code is vectorised, then bitcast to a vector pointer.
-    if (is_kernel_code && vector_width > 1) {
-        llvm::Type* vector_type =
-            llvm::PointerType::get(llvm::FixedVectorType::get(type, vector_width),
-                                   /*AddressSpace=*/0);
-        return ir_builder.CreateBitCast(member_addr, vector_type);
+    // Add function debug information, with location information if it exists.
+    if (add_debug_information) {
+        if (node.get_token()) {
+            Location loc{node.get_token()->start_line(), node.get_token()->start_column()};
+            debug_builder.add_function_debug_info(func, &loc);
+        } else {
+            debug_builder.add_function_debug_info(func);
+        }
     }
-    return member_addr;
 }
 
-llvm::Value* CodegenLLVMVisitor::get_array_index(const ast::IndexedName& node) {
-    // Process the index expression. It can either be a Name node:
-    //    k[id]     // id is an integer
-    // or an integer expression.
-    llvm::Value* index_value;
-    if (node.get_length()->is_name()) {
-        llvm::Value* ptr = lookup(node.get_length()->get_node_name());
-        index_value = ir_builder.CreateLoad(ptr);
-    } else {
-        node.get_length()->accept(*this);
-        index_value = values.back();
-        values.pop_back();
-    }
+void CodegenLLVMVisitor::create_printf_call(const ast::ExpressionVector& arguments) {
+    // First, create printf declaration or insert it if it does not exit.
+    std::string name = "printf";
+    llvm::Function* printf = module->getFunction(name);
+    if (!printf) {
+        llvm::FunctionType* printf_type = llvm::FunctionType::get(ir_builder.get_i32_type(),
+                                                                  ir_builder.get_i8_ptr_type(),
+                                                                  /*isVarArg=*/true);
 
-    // Check if index is a double. While it is possible to use casting from double to integer
-    // values, we choose not to support these cases.
-    if (!index_value->getType()->isIntOrIntVectorTy())
-        throw std::runtime_error("Error: only integer indexing is supported!");
-
-    // Conventionally, in LLVM array indices are 64 bit.
-    llvm::Type* i64_type = llvm::Type::getInt64Ty(*context);
-    if (auto index_type = llvm::dyn_cast<llvm::IntegerType>(index_value->getType())) {
-        if (index_type->getBitWidth() == i64_type->getIntegerBitWidth())
-            return index_value;
-        return ir_builder.CreateSExtOrTrunc(index_value, i64_type);
+        printf =
+            llvm::Function::Create(printf_type, llvm::Function::ExternalLinkage, name, *module);
     }
 
-    auto vector_type = llvm::cast<llvm::FixedVectorType>(index_value->getType());
-    auto element_type = llvm::cast<llvm::IntegerType>(vector_type->getElementType());
-    if (element_type->getBitWidth() == i64_type->getIntegerBitWidth())
-        return index_value;
-    return ir_builder.CreateSExtOrTrunc(index_value,
-                                        llvm::FixedVectorType::get(i64_type, vector_width));
+    // Create a call instruction.
+    ValueVector argument_values;
+    argument_values.reserve(arguments.size());
+    create_function_call_arguments(arguments, argument_values);
+    ir_builder.create_function_call(printf, argument_values, /*use_result=*/false);
 }
 
-int CodegenLLVMVisitor::get_array_length(const ast::IndexedName& node) {
-    auto integer = std::dynamic_pointer_cast<ast::Integer>(node.get_length());
-    if (!integer)
-        throw std::runtime_error("Error: only integer length is supported!");
-
-    // Check if integer value is taken from a macro.
-    if (!integer->get_macro())
-        return integer->get_value();
-    const auto& macro = sym_tab->lookup(integer->get_macro()->get_node_name());
-    return static_cast<int>(*macro->get_value());
+void CodegenLLVMVisitor::find_kernel_names(std::vector<std::string>& container) {
+    // By convention, only kernel functions have a return type of void.
+    const auto& functions = module->getFunctionList();
+    for (const auto& func: functions) {
+        if (func.getReturnType()->isVoidTy()) {
+            container.push_back(func.getName().str());
+        }
+    }
 }
 
 llvm::Type* CodegenLLVMVisitor::get_codegen_var_type(const ast::CodegenVarType& node) {
     switch (node.get_type()) {
     case ast::AstNodeType::BOOLEAN:
-        return llvm::Type::getInt1Ty(*context);
+        return ir_builder.get_boolean_type();
     case ast::AstNodeType::DOUBLE:
-        return get_default_fp_type();
+        return ir_builder.get_fp_type();
     case ast::AstNodeType::INSTANCE_STRUCT:
         return get_instance_struct_type();
     case ast::AstNodeType::INTEGER:
-        return llvm::Type::getInt32Ty(*context);
+        return ir_builder.get_i32_type();
     case ast::AstNodeType::VOID:
-        return llvm::Type::getVoidTy(*context);
+        return ir_builder.get_void_type();
     default:
         throw std::runtime_error("Error: expecting a type in CodegenVarType node\n");
     }
 }
 
-llvm::Value* CodegenLLVMVisitor::get_constant_int_vector(int value) {
-    llvm::Type* i32_type = llvm::Type::getInt32Ty(*context);
-    std::vector<llvm::Constant*> constants;
-    for (unsigned i = 0; i < vector_width; ++i) {
-        const auto& element = llvm::ConstantInt::get(i32_type, value);
-        constants.push_back(element);
-    }
-    return llvm::ConstantVector::get(constants);
+llvm::Value* CodegenLLVMVisitor::get_index(const ast::IndexedName& node) {
+    // In NMODL, the index is either an integer expression or a named constant, such as "id".
+    llvm::Value* index_value = node.get_length()->is_name()
+                                   ? ir_builder.create_load(node.get_length()->get_node_name())
+                                   : accept_and_get(node.get_length());
+    return ir_builder.create_index(index_value);
 }
 
-llvm::Value* CodegenLLVMVisitor::get_constant_fp_vector(const std::string& value) {
-    llvm::Type* fp_type = get_default_fp_type();
-    std::vector<llvm::Constant*> constants;
-    for (unsigned i = 0; i < vector_width; ++i) {
-        const auto& element = llvm::ConstantFP::get(fp_type, value);
-        constants.push_back(element);
+llvm::Type* CodegenLLVMVisitor::get_instance_struct_type() {
+    TypeVector member_types;
+    for (const auto& variable: instance_var_helper.instance->get_codegen_vars()) {
+        // Get the type information of the codegen variable.
+        const auto& is_pointer = variable->get_is_pointer();
+        const auto& nmodl_type = variable->get_type()->get_type();
+
+        // Create the corresponding LLVM type.
+        switch (nmodl_type) {
+        case ast::AstNodeType::DOUBLE:
+            member_types.push_back(is_pointer ? ir_builder.get_fp_ptr_type()
+                                              : ir_builder.get_fp_type());
+            break;
+        case ast::AstNodeType::INTEGER:
+            member_types.push_back(is_pointer ? ir_builder.get_i32_ptr_type()
+                                              : ir_builder.get_i32_type());
+            break;
+        default:
+            throw std::runtime_error("Error: unsupported type found in instance struct\n");
+        }
     }
-    return llvm::ConstantVector::get(constants);
-}
 
-llvm::Type* CodegenLLVMVisitor::get_default_fp_type() {
-    if (use_single_precision)
-        return llvm::Type::getFloatTy(*context);
-    return llvm::Type::getDoubleTy(*context);
+    return ir_builder.get_struct_ptr_type(mod_filename + instance_struct_type_name, member_types);
 }
 
-llvm::Type* CodegenLLVMVisitor::get_default_fp_ptr_type() {
-    if (use_single_precision)
-        return llvm::Type::getFloatPtrTy(*context);
-    return llvm::Type::getDoublePtrTy(*context);
+int CodegenLLVMVisitor::get_num_elements(const ast::IndexedName& node) {
+    // First, verify if the length is an integer value.
+    const auto& integer = std::dynamic_pointer_cast<ast::Integer>(node.get_length());
+    if (!integer)
+        throw std::runtime_error("Error: only integer length is supported\n");
+
+    // Check if the length value is a constant.
+    if (!integer->get_macro())
+        return integer->get_value();
+
+    // Otherwise, the length is taken from the macro.
+    const auto& macro = sym_tab->lookup(integer->get_macro()->get_node_name());
+    return static_cast<int>(*macro->get_value());
 }
 
-llvm::Type* CodegenLLVMVisitor::get_instance_struct_type() {
-    std::vector<llvm::Type*> members;
-    for (const auto& variable: instance_var_helper.instance->get_codegen_vars()) {
-        auto is_pointer = variable->get_is_pointer();
-        auto nmodl_type = variable->get_type()->get_type();
+llvm::Value* CodegenLLVMVisitor::read_from_or_write_to_instance(const ast::CodegenInstanceVar& node,
+                                                                llvm::Value* maybe_value_to_store) {
+    const auto& instance_name = node.get_instance_var()->get_node_name();
+    const auto& member_node = node.get_member_var();
+    const auto& member_name = member_node->get_node_name();
 
-        llvm::Type* i32_type = llvm::Type::getInt32Ty(*context);
-        llvm::Type* i32ptr_type = llvm::Type::getInt32PtrTy(*context);
+    if (!instance_var_helper.is_an_instance_variable(member_name))
+        throw std::runtime_error("Error: " + member_name +
+                                 " is not a member of the instance variable\n");
 
-        switch (nmodl_type) {
-#define DISPATCH(type, llvm_ptr_type, llvm_type)                       \
-    case type:                                                         \
-        members.push_back(is_pointer ? (llvm_ptr_type) : (llvm_type)); \
-        break;
+    // Load the instance struct by its name.
+    llvm::Value* instance_ptr = ir_builder.create_load(instance_name);
 
-            DISPATCH(ast::AstNodeType::DOUBLE, get_default_fp_ptr_type(), get_default_fp_type());
-            DISPATCH(ast::AstNodeType::INTEGER, i32ptr_type, i32_type);
+    // Get the pointer to the specified member.
+    int member_index = instance_var_helper.get_variable_index(member_name);
+    llvm::Value* member_ptr = ir_builder.get_struct_member_ptr(instance_ptr, member_index);
 
-#undef DISPATCH
-        default:
-            throw std::runtime_error("Error: unsupported type found in instance struct");
+    // Check if the member is scalar. Load the value or store to it straight away. Otherwise, we
+    // need some extra handling.
+    auto codegen_var_with_type = instance_var_helper.get_variable(member_name);
+    if (!codegen_var_with_type->get_is_pointer()) {
+        if (maybe_value_to_store) {
+            ir_builder.create_store(member_ptr, maybe_value_to_store);
+            return nullptr;
+        } else {
+            return ir_builder.create_load(member_ptr);
         }
     }
 
-    llvm::StructType* llvm_struct_type =
-        llvm::StructType::create(*context, mod_filename + instance_struct_type_name);
-    llvm_struct_type->setBody(members);
-    return llvm::PointerType::get(llvm_struct_type, /*AddressSpace=*/0);
+    // Check that the member is an indexed name indeed, and that it is indexed by a named constant
+    // (e.g. "id").
+    const auto& member_var_name = std::dynamic_pointer_cast<ast::VarName>(member_node);
+    if (!member_var_name->get_name()->is_indexed_name())
+        throw std::runtime_error("Error: " + member_name + " is not an IndexedName\n");
+
+    const auto& member_indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(
+        member_var_name->get_name());
+    if (!member_indexed_name->get_length()->is_name())
+        throw std::runtime_error("Error: " + member_name + " must be indexed with a variable!");
+
+    // Get the index to the member and the id used to index it.
+    llvm::Value* i64_index = get_index(*member_indexed_name);
+    const std::string id = member_indexed_name->get_length()->get_node_name();
+
+    // Load the member of the instance struct.
+    llvm::Value* instance_member = ir_builder.create_load(member_ptr);
+
+    // Create a pointer to the specified element of the struct member.
+    return ir_builder.load_to_or_store_from_array(id,
+                                                  i64_index,
+                                                  instance_member,
+                                                  maybe_value_to_store);
 }
 
-llvm::Value* CodegenLLVMVisitor::get_variable_ptr(const ast::VarName& node) {
+llvm::Value* CodegenLLVMVisitor::read_variable(const ast::VarName& node) {
     const auto& identifier = node.get_name();
-    if (!identifier->is_name() && !identifier->is_indexed_name() &&
-        !identifier->is_codegen_instance_var()) {
-        throw std::runtime_error("Error: Unsupported variable type - " + node.get_node_name());
-    }
 
-    llvm::Value* ptr;
-    if (identifier->is_name())
-        ptr = lookup(node.get_node_name());
+    if (identifier->is_name()) {
+        return ir_builder.create_load(node.get_node_name());
+    }
 
     if (identifier->is_indexed_name()) {
-        auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
-        ptr = codegen_indexed_name(*indexed_name);
+        const auto& indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
+        llvm::Value* index = get_index(*indexed_name);
+        return ir_builder.create_load_from_array(node.get_node_name(), index);
     }
 
     if (identifier->is_codegen_instance_var()) {
-        auto instance_var = std::dynamic_pointer_cast<ast::CodegenInstanceVar>(identifier);
-        ptr = codegen_instance_var(*instance_var);
+        const auto& instance_var = std::dynamic_pointer_cast<ast::CodegenInstanceVar>(identifier);
+        return read_from_or_write_to_instance(*instance_var);
     }
-    return ptr;
-}
 
-std::shared_ptr<ast::InstanceStruct> CodegenLLVMVisitor::get_instance_struct_ptr() {
-    return instance_var_helper.instance;
+    throw std::runtime_error("Error: the type of '" + node.get_node_name() +
+                             "' is not supported\n");
 }
 
 void CodegenLLVMVisitor::run_ir_opt_passes() {
-    /// run some common optimisation passes that are commonly suggested
+    // Run some common optimisation passes that are commonly suggested.
     opt_pm.add(llvm::createInstructionCombiningPass());
     opt_pm.add(llvm::createReassociatePass());
     opt_pm.add(llvm::createGVNPass());
     opt_pm.add(llvm::createCFGSimplificationPass());
 
-    /// initialize pass manager
+    // Initialize pass manager.
     opt_pm.doInitialization();
 
-    /// iterate over all functions and run the optimisation passes
+    // Iterate over all functions and run the optimisation passes.
     auto& functions = module->getFunctionList();
     for (auto& function: functions) {
         llvm::verifyFunction(function);
         opt_pm.run(function);
     }
+    opt_pm.doFinalization();
 }
 
-void CodegenLLVMVisitor::create_external_method_call(const std::string& name,
-                                                     const ast::ExpressionVector& arguments) {
-    if (name == "printf") {
-        create_printf_call(arguments);
-        return;
+void CodegenLLVMVisitor::write_to_variable(const ast::VarName& node, llvm::Value* value) {
+    const auto& identifier = node.get_name();
+    if (!identifier->is_name() && !identifier->is_indexed_name() &&
+        !identifier->is_codegen_instance_var()) {
+        throw std::runtime_error("Error: the type of '" + node.get_node_name() +
+                                 "' is not supported\n");
     }
 
-    std::vector<llvm::Value*> argument_values;
-    std::vector<llvm::Type*> argument_types;
-    for (const auto& arg: arguments) {
-        arg->accept(*this);
-        llvm::Value* value = values.back();
-        llvm::Type* type = value->getType();
-        values.pop_back();
-        argument_types.push_back(type);
-        argument_values.push_back(value);
+    if (identifier->is_name()) {
+        ir_builder.create_store(node.get_node_name(), value);
     }
 
-#define DISPATCH(method_name, intrinsic)                                            \
-    if (name == (method_name)) {                                                    \
-        llvm::Value* result =                                                       \
-            ir_builder.CreateIntrinsic(intrinsic, argument_types, argument_values); \
-        values.push_back(result);                                                   \
-        return;                                                                     \
+    if (identifier->is_indexed_name()) {
+        const auto& indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
+        llvm::Value* index = get_index(*indexed_name);
+        ir_builder.create_store_to_array(node.get_node_name(), index, value);
     }
 
-    DISPATCH("exp", llvm::Intrinsic::exp);
-    DISPATCH("pow", llvm::Intrinsic::pow);
-#undef DISPATCH
-
-    throw std::runtime_error("Error: External method" + name + " is not currently supported");
-}
-
-void CodegenLLVMVisitor::create_function_call(llvm::Function* func,
-                                              const std::string& name,
-                                              const ast::ExpressionVector& arguments) {
-    // Check that function is called with the expected number of arguments.
-    if (!func->isVarArg() && arguments.size() != func->arg_size()) {
-        throw std::runtime_error("Error: Incorrect number of arguments passed");
+    if (identifier->is_codegen_instance_var()) {
+        const auto& instance_var = std::dynamic_pointer_cast<ast::CodegenInstanceVar>(identifier);
+        read_from_or_write_to_instance(*instance_var, value);
     }
-
-    // Pack function call arguments to vector and create a call instruction.
-    std::vector<llvm::Value*> argument_values;
-    argument_values.reserve(arguments.size());
-    pack_function_call_arguments(arguments, argument_values);
-    llvm::Value* call = ir_builder.CreateCall(func, argument_values);
-    values.push_back(call);
 }
 
-void CodegenLLVMVisitor::create_printf_call(const ast::ExpressionVector& arguments) {
-    // First, create printf declaration or insert it if it does not exit.
-    std::string name = "printf";
-    llvm::Function* printf = module->getFunction(name);
-    if (!printf) {
-        llvm::Type* ptr_type = llvm::Type::getInt8PtrTy(*context);
-        llvm::Type* i32_type = llvm::Type::getInt32Ty(*context);
-        llvm::FunctionType* printf_type =
-            llvm::FunctionType::get(i32_type, ptr_type, /*isVarArg=*/true);
-
-        printf =
-            llvm::Function::Create(printf_type, llvm::Function::ExternalLinkage, name, *module);
-    }
-
-    // Create a call instruction.
-    std::vector<llvm::Value*> argument_values;
-    argument_values.reserve(arguments.size());
-    pack_function_call_arguments(arguments, argument_values);
-    ir_builder.CreateCall(printf, argument_values);
-}
+void CodegenLLVMVisitor::wrap_kernel_functions() {
+    // First, identify all kernels.
+    std::vector<std::string> kernel_names;
+    find_kernel_names(kernel_names);
 
-void CodegenLLVMVisitor::emit_procedure_or_function_declaration(const ast::CodegenFunction& node) {
-    const auto& name = node.get_node_name();
-    const auto& arguments = node.get_arguments();
+    for (const auto& kernel_name: kernel_names) {
+        // Get the kernel function and the instance struct type.
+        auto kernel = module->getFunction(kernel_name);
+        if (!kernel)
+            throw std::runtime_error("Error: kernel " + kernel_name + " is not found\n");
 
-    // Procedure or function parameters are doubles by default.
-    std::vector<llvm::Type*> arg_types;
-    for (size_t i = 0; i < arguments.size(); ++i)
-        arg_types.push_back(get_codegen_var_type(*arguments[i]->get_type()));
+        if (std::distance(kernel->args().begin(), kernel->args().end()) != 1)
+            throw std::runtime_error("Error: kernel " + kernel_name +
+                                     " must have a single argument\n");
 
-    llvm::Type* return_type = get_codegen_var_type(*node.get_return_type());
+        auto instance_struct_ptr_type = llvm::dyn_cast<llvm::PointerType>(
+            kernel->getArg(0)->getType());
+        if (!instance_struct_ptr_type)
+            throw std::runtime_error("Error: kernel " + kernel_name +
+                                     " does not have an instance struct pointer as an argument\n");
 
-    // Create a function that is automatically inserted into module's symbol table.
-    auto func =
-        llvm::Function::Create(llvm::FunctionType::get(return_type, arg_types, /*isVarArg=*/false),
-                               llvm::Function::ExternalLinkage,
-                               name,
-                               *module);
+        // Create a wrapper void function that takes a void pointer as a single argument.
+        llvm::Type* i32_type = ir_builder.get_i32_type();
+        llvm::Type* void_ptr_type = ir_builder.get_i8_ptr_type();
+        llvm::Function* wrapper_func = llvm::Function::Create(
+            llvm::FunctionType::get(i32_type, {void_ptr_type}, /*isVarArg=*/false),
+            llvm::Function::ExternalLinkage,
+            "__" + kernel_name + "_wrapper",
+            *module);
 
-    // Add function debug information, with location information if it exists.
-    if (add_debug_information) {
-        if (node.get_token()) {
-            Location loc{node.get_token()->start_line(), node.get_token()->start_column()};
-            debug_builder.add_function_debug_info(func, &loc);
-        } else {
-            debug_builder.add_function_debug_info(func);
+        // Optionally, add debug information for the wrapper function.
+        if (add_debug_information) {
+            debug_builder.add_function_debug_info(wrapper_func);
         }
-    }
-}
 
-llvm::Value* CodegenLLVMVisitor::lookup(const std::string& name) {
-    auto val = current_func->getValueSymbolTable()->lookup(name);
-    if (!val)
-        throw std::runtime_error("Error: variable " + name + " is not in scope\n");
-    return val;
-}
+        ir_builder.create_block_and_set_insertion_point(wrapper_func);
 
-void CodegenLLVMVisitor::pack_function_call_arguments(const ast::ExpressionVector& arguments,
-                                                      std::vector<llvm::Value*>& arg_values) {
-    for (const auto& arg: arguments) {
-        if (arg->is_string()) {
-            // If the argument is a string, create a global i8* variable with it.
-            auto string_arg = std::dynamic_pointer_cast<ast::String>(arg);
-            llvm::Value* str = ir_builder.CreateGlobalStringPtr(string_arg->get_value());
-            arg_values.push_back(str);
-        } else {
-            arg->accept(*this);
-            llvm::Value* value = values.back();
-            values.pop_back();
-            arg_values.push_back(value);
-        }
-    }
-}
-
-llvm::Value* CodegenLLVMVisitor::visit_arithmetic_bin_op(llvm::Value* lhs,
-                                                         llvm::Value* rhs,
-                                                         unsigned op) {
-    const auto& bin_op = static_cast<ast::BinaryOp>(op);
-    llvm::Type* lhs_type = lhs->getType();
-    llvm::Value* result;
-
-    switch (bin_op) {
-#define DISPATCH(binary_op, llvm_fp_op, llvm_int_op) \
-    case binary_op:                                  \
-        if (lhs_type->isIntOrIntVectorTy())          \
-            result = llvm_int_op(lhs, rhs);          \
-        else                                         \
-            result = llvm_fp_op(lhs, rhs);           \
-        return result;
-
-        DISPATCH(ast::BinaryOp::BOP_ADDITION, ir_builder.CreateFAdd, ir_builder.CreateAdd);
-        DISPATCH(ast::BinaryOp::BOP_DIVISION, ir_builder.CreateFDiv, ir_builder.CreateSDiv);
-        DISPATCH(ast::BinaryOp::BOP_MULTIPLICATION, ir_builder.CreateFMul, ir_builder.CreateMul);
-        DISPATCH(ast::BinaryOp::BOP_SUBTRACTION, ir_builder.CreateFSub, ir_builder.CreateSub);
-
-#undef DISPATCH
+        // Proceed with bitcasting the void pointer to the struct pointer type, calling the kernel
+        // and adding a terminator.
+        llvm::Value* bitcasted = ir_builder.create_bitcast(wrapper_func->getArg(0),
+                                                           instance_struct_ptr_type);
+        ValueVector args;
+        args.push_back(bitcasted);
+        ir_builder.create_function_call(kernel, args, /*use_result=*/false);
 
-    default:
-        return nullptr;
+        // Create a 0 return value and a return instruction.
+        ir_builder.create_i32_constant(0);
+        ir_builder.create_return(ir_builder.pop_last_value());
     }
 }
 
-void CodegenLLVMVisitor::visit_assign_op(const ast::BinaryExpression& node, llvm::Value* rhs) {
-    auto var = dynamic_cast<ast::VarName*>(node.get_lhs().get());
-    if (!var)
-        throw std::runtime_error("Error: only VarName assignment is supported!");
-
-    llvm::Value* ptr = get_variable_ptr(*var);
-    ir_builder.CreateStore(rhs, ptr);
-}
-
-llvm::Value* CodegenLLVMVisitor::visit_logical_bin_op(llvm::Value* lhs,
-                                                      llvm::Value* rhs,
-                                                      unsigned op) {
-    const auto& bin_op = static_cast<ast::BinaryOp>(op);
-    return bin_op == ast::BinaryOp::BOP_AND ? ir_builder.CreateAnd(lhs, rhs)
-                                            : ir_builder.CreateOr(lhs, rhs);
-}
-
-llvm::Value* CodegenLLVMVisitor::visit_comparison_bin_op(llvm::Value* lhs,
-                                                         llvm::Value* rhs,
-                                                         unsigned op) {
-    const auto& bin_op = static_cast<ast::BinaryOp>(op);
-    llvm::Type* lhs_type = lhs->getType();
-    llvm::Value* result;
-
-    switch (bin_op) {
-#define DISPATCH(binary_op, i_llvm_op, f_llvm_op)            \
-    case binary_op:                                          \
-        if (lhs_type->isDoubleTy() || lhs_type->isFloatTy()) \
-            result = f_llvm_op(lhs, rhs);                    \
-        else                                                 \
-            result = i_llvm_op(lhs, rhs);                    \
-        return result;
-
-        DISPATCH(ast::BinaryOp::BOP_EXACT_EQUAL, ir_builder.CreateICmpEQ, ir_builder.CreateFCmpOEQ);
-        DISPATCH(ast::BinaryOp::BOP_GREATER, ir_builder.CreateICmpSGT, ir_builder.CreateFCmpOGT);
-        DISPATCH(ast::BinaryOp::BOP_GREATER_EQUAL,
-                 ir_builder.CreateICmpSGE,
-                 ir_builder.CreateFCmpOGE);
-        DISPATCH(ast::BinaryOp::BOP_LESS, ir_builder.CreateICmpSLT, ir_builder.CreateFCmpOLT);
-        DISPATCH(ast::BinaryOp::BOP_LESS_EQUAL, ir_builder.CreateICmpSLE, ir_builder.CreateFCmpOLE);
-        DISPATCH(ast::BinaryOp::BOP_NOT_EQUAL, ir_builder.CreateICmpNE, ir_builder.CreateFCmpONE);
-
-#undef DISPATCH
-
-    default:
-        return nullptr;
-    }
-}
 
 /****************************************************************************************/
 /*                            Overloaded visitor routines                               */
@@ -525,43 +417,18 @@ void CodegenLLVMVisitor::visit_binary_expression(const ast::BinaryExpression& no
 
     // Process rhs first, since lhs is handled differently for assignment and binary
     // operators.
-    node.get_rhs()->accept(*this);
-    llvm::Value* rhs = values.back();
-    values.pop_back();
+    llvm::Value* rhs = accept_and_get(node.get_rhs());
     if (op == ast::BinaryOp::BOP_ASSIGN) {
-        visit_assign_op(node, rhs);
-        return;
-    }
+        auto var = dynamic_cast<ast::VarName*>(node.get_lhs().get());
+        if (!var)
+            throw std::runtime_error("Error: only 'VarName' assignment is supported\n");
 
-    node.get_lhs()->accept(*this);
-    llvm::Value* lhs = values.back();
-    values.pop_back();
-
-    llvm::Value* result;
-    switch (op) {
-    case ast::BOP_ADDITION:
-    case ast::BOP_DIVISION:
-    case ast::BOP_MULTIPLICATION:
-    case ast::BOP_SUBTRACTION:
-        result = visit_arithmetic_bin_op(lhs, rhs, op);
-        break;
-    case ast::BOP_AND:
-    case ast::BOP_OR:
-        result = visit_logical_bin_op(lhs, rhs, op);
-        break;
-    case ast::BOP_EXACT_EQUAL:
-    case ast::BOP_GREATER:
-    case ast::BOP_GREATER_EQUAL:
-    case ast::BOP_LESS:
-    case ast::BOP_LESS_EQUAL:
-    case ast::BOP_NOT_EQUAL:
-        result = visit_comparison_bin_op(lhs, rhs, op);
-        break;
-    default:
-        throw std::runtime_error("Error: binary operator is not supported\n");
+        write_to_variable(*var, rhs);
+        return;
     }
 
-    values.push_back(result);
+    llvm::Value* lhs = accept_and_get(node.get_lhs());
+    ir_builder.create_binary_op(lhs, rhs, op);
 }
 
 void CodegenLLVMVisitor::visit_statement_block(const ast::StatementBlock& node) {
@@ -573,9 +440,7 @@ void CodegenLLVMVisitor::visit_statement_block(const ast::StatementBlock& node)
 }
 
 void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) {
-    const auto& constant = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*context),
-                                                  node.get_value());
-    values.push_back(constant);
+    ir_builder.create_boolean_constant(node.get_value());
 }
 
 // Generating FOR loop in LLVM IR creates the following structure:
@@ -612,10 +477,10 @@ void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) {
 //  +---------------------------+
 void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatement& node) {
     // Disable vector code generation for condition and increment blocks.
-    is_kernel_code = false;
+    ir_builder.stop_vectorization();
 
     // Get the current and the next blocks within the function.
-    llvm::BasicBlock* curr_block = ir_builder.GetInsertBlock();
+    llvm::BasicBlock* curr_block = ir_builder.get_current_block();
     llvm::BasicBlock* next = curr_block->getNextNode();
     llvm::Function* func = curr_block->getParent();
 
@@ -631,10 +496,12 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     int tmp_vector_width = vector_width;
 
     // Check if the kernel can be vectorised. If not, generate scalar code.
-    if (!can_vectorise(node, sym_tab)) {
-        logger->info("Cannot vectorise the for loop in '" + current_func->getName().str() + "'");
+    if (!can_vectorize(node, sym_tab)) {
+        logger->info("Cannot vectorise the for loop in '" + ir_builder.get_current_function_name() +
+                     "'");
         logger->info("Generating scalar code...");
         vector_width = 1;
+        ir_builder.generate_scalar_code();
     }
 
     // First, initialise the loop in the same basic block. This block is optional. Also, reset
@@ -643,36 +510,33 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
         node.get_initialization()->accept(*this);
     } else {
         vector_width = 1;
+        ir_builder.generate_scalar_code();
     }
 
     // Branch to condition basic block and insert condition code there.
-    ir_builder.CreateBr(for_cond);
-    ir_builder.SetInsertPoint(for_cond);
-    node.get_condition()->accept(*this);
+    ir_builder.create_br_and_set_insertion_point(for_cond);
 
     // Extract the condition to decide whether to branch to the loop body or loop exit.
-    llvm::Value* cond = values.back();
-    values.pop_back();
-    ir_builder.CreateCondBr(cond, for_body, exit);
+    llvm::Value* cond = accept_and_get(node.get_condition());
+    ir_builder.create_cond_br(cond, for_body, exit);
 
     // Generate code for the loop body and create the basic block for the increment.
-    ir_builder.SetInsertPoint(for_body);
-    is_kernel_code = true;
+    ir_builder.set_insertion_point(for_body);
+    ir_builder.start_vectorization();
     const auto& statement_block = node.get_statement_block();
     statement_block->accept(*this);
-    is_kernel_code = false;
-    ir_builder.CreateBr(for_inc);
-
+    ir_builder.stop_vectorization();
+    ir_builder.create_br_and_set_insertion_point(for_inc);
     // Process increment.
-    ir_builder.SetInsertPoint(for_inc);
     node.get_increment()->accept(*this);
 
     // Create a branch to condition block, then generate exit code out of the loop. Restore the
     // vector width.
-    ir_builder.CreateBr(for_cond);
-    ir_builder.SetInsertPoint(exit);
+    ir_builder.create_br(for_cond);
+    ir_builder.set_insertion_point(exit);
     vector_width = tmp_vector_width;
-    is_kernel_code = true;
+    ir_builder.generate_vectorized_code();
+    ir_builder.start_vectorization();
 }
 
 
@@ -680,12 +544,11 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
     const auto& name = node.get_node_name();
     const auto& arguments = node.get_arguments();
     llvm::Function* func = module->getFunction(name);
-    current_func = func;
+    ir_builder.set_function(func);
 
     // Create the entry basic block of the function/procedure and point the local named values table
     // to the symbol table.
-    llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", func);
-    ir_builder.SetInsertPoint(body);
+    llvm::BasicBlock* body = ir_builder.create_block_and_set_insertion_point(func);
 
     // When processing a function, it returns a value named <function_name> in NMODL. Therefore, we
     // first run RenameVisitor to rename it into ret_<function_name>. This will aid in avoiding
@@ -697,84 +560,59 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
 
 
     // Allocate parameters on the stack and add them to the symbol table.
-    unsigned i = 0;
-    for (auto& arg: func->args()) {
-        std::string arg_name = arguments[i++].get()->get_node_name();
-        llvm::Type* arg_type = arg.getType();
-        llvm::Value* alloca = ir_builder.CreateAlloca(arg_type, /*ArraySize=*/nullptr, arg_name);
-        arg.setName(arg_name);
-        ir_builder.CreateStore(&arg, alloca);
-    }
+    ir_builder.allocate_function_arguments(func, arguments);
 
     // Process function or procedure body. If the function is a compute kernel, then set the
     // corresponding flags. The return statement is handled in a separate visitor.
     bool has_void_ret_type = node.get_return_type()->get_type() == ast::AstNodeType::VOID;
     if (has_void_ret_type) {
-        is_kernel_code = true;
+        ir_builder.start_vectorization();
         block->accept(*this);
-        is_kernel_code = false;
+        ir_builder.stop_vectorization();
     } else {
         block->accept(*this);
     }
 
     // If function has a void return type, add a terminator not handled by CodegenReturnVar.
     if (has_void_ret_type)
-        ir_builder.CreateRetVoid();
+        ir_builder.create_return();
 
     // Clear local values stack and remove the pointer to the local symbol table.
-    values.clear();
-    current_func = nullptr;
+    ir_builder.clear_function();
 }
 
 void CodegenLLVMVisitor::visit_codegen_return_statement(const ast::CodegenReturnStatement& node) {
     if (!node.get_statement()->is_name())
         throw std::runtime_error("Error: CodegenReturnStatement must contain a name node\n");
 
-    std::string ret = "ret_" + current_func->getName().str();
-    llvm::Value* ret_value = ir_builder.CreateLoad(lookup(ret));
-    ir_builder.CreateRet(ret_value);
+    std::string ret = "ret_" + ir_builder.get_current_function_name();
+    llvm::Value* ret_value = ir_builder.create_load(ret);
+    ir_builder.create_return(ret_value);
 }
 
 void CodegenLLVMVisitor::visit_codegen_var_list_statement(
     const ast::CodegenVarListStatement& node) {
-    llvm::Type* scalar_var_type = get_codegen_var_type(*node.get_var_type());
+    llvm::Type* scalar_type = get_codegen_var_type(*node.get_var_type());
     for (const auto& variable: node.get_variables()) {
-        std::string name = variable->get_node_name();
         const auto& identifier = variable->get_name();
+        std::string name = variable->get_node_name();
+
         // Local variable can be a scalar (Node AST class) or an array (IndexedName AST class). For
-        // each case, create memory allocations with the corresponding LLVM type.
-        llvm::Type* var_type;
+        // each case, create memory allocations.
         if (identifier->is_indexed_name()) {
-            auto indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
-            int length = get_array_length(*indexed_name);
-            var_type = llvm::ArrayType::get(scalar_var_type, length);
+            const auto& indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(identifier);
+            int length = get_num_elements(*indexed_name);
+            ir_builder.create_array_alloca(name, scalar_type, length);
         } else if (identifier->is_name()) {
-            // This case corresponds to a scalar or vector local variable.
-            const auto& identifier_name = identifier->get_node_name();
-
-            // Even if generating vectorised code, some variables still need to be scalar.
-            // Particularly, the induction variable "id" and remainder loop variables (that start
-            // with "epilogue").
-            if (is_kernel_code && vector_width > 1 && identifier_name != kernel_id &&
-                identifier_name.rfind("epilogue", 0)) {
-                var_type = llvm::FixedVectorType::get(scalar_var_type, vector_width);
-            } else {
-                var_type = scalar_var_type;
-            }
+            ir_builder.create_scalar_or_vector_alloca(name, scalar_type);
         } else {
-            throw std::runtime_error("Error: Unsupported local variable type");
+            throw std::runtime_error("Error: unsupported local variable type\n");
         }
-        ir_builder.CreateAlloca(var_type, /*ArraySize=*/nullptr, name);
     }
 }
 
 void CodegenLLVMVisitor::visit_double(const ast::Double& node) {
-    if (is_kernel_code && vector_width > 1) {
-        values.push_back(get_constant_fp_vector(node.get_value()));
-        return;
-    }
-    const auto& constant = llvm::ConstantFP::get(get_default_fp_type(), node.get_value());
-    values.push_back(constant);
+    ir_builder.create_fp_constant(node.get_value());
 }
 
 void CodegenLLVMVisitor::visit_function_block(const ast::FunctionBlock& node) {
@@ -783,23 +621,22 @@ void CodegenLLVMVisitor::visit_function_block(const ast::FunctionBlock& node) {
 
 void CodegenLLVMVisitor::visit_function_call(const ast::FunctionCall& node) {
     const auto& name = node.get_node_name();
-    auto func = module->getFunction(name);
+    llvm::Function* func = module->getFunction(name);
     if (func) {
         create_function_call(func, name, node.get_arguments());
     } else {
         auto symbol = sym_tab->lookup(name);
         if (symbol && symbol->has_any_property(symtab::syminfo::NmodlType::extern_method)) {
-            create_external_method_call(name, node.get_arguments());
+            create_external_function_call(name, node.get_arguments());
         } else {
-            throw std::runtime_error("Error: Unknown function name: " + name +
-                                     ". (External functions references are not supported)");
+            throw std::runtime_error("Error: unknown function name: " + name + "\n");
         }
     }
 }
 
 void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
     // Get the current and the next blocks within the function.
-    llvm::BasicBlock* curr_block = ir_builder.GetInsertBlock();
+    llvm::BasicBlock* curr_block = ir_builder.get_current_block();
     llvm::BasicBlock* next = curr_block->getNextNode();
     llvm::Function* func = curr_block->getParent();
 
@@ -808,14 +645,12 @@ void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
     llvm::BasicBlock* merge_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next);
 
     // Add condition to the current block.
-    node.get_condition()->accept(*this);
-    llvm::Value* cond = values.back();
-    values.pop_back();
+    llvm::Value* cond = accept_and_get(node.get_condition());
 
     // Process the true block.
-    ir_builder.SetInsertPoint(true_block);
+    ir_builder.set_insertion_point(true_block);
     node.get_statement_block()->accept(*this);
-    ir_builder.CreateBr(merge_block);
+    ir_builder.create_br(merge_block);
 
     // Save the merge block and proceed with codegen for `else if` statements.
     llvm::BasicBlock* exit = merge_block;
@@ -823,27 +658,25 @@ void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
         // Link the current block to the true and else blocks.
         llvm::BasicBlock* else_block =
             llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block);
-        ir_builder.SetInsertPoint(curr_block);
-        ir_builder.CreateCondBr(cond, true_block, else_block);
+        ir_builder.set_insertion_point(curr_block);
+        ir_builder.create_cond_br(cond, true_block, else_block);
 
         // Process else block.
-        ir_builder.SetInsertPoint(else_block);
-        else_if->get_condition()->accept(*this);
-        cond = values.back();
-        values.pop_back();
+        ir_builder.set_insertion_point(else_block);
+        cond = accept_and_get(else_if->get_condition());
 
         // Reassign true and merge blocks respectively. Note that the new merge block has to be
         // connected to the old merge block (tmp).
         true_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block);
         llvm::BasicBlock* tmp = merge_block;
         merge_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block);
-        ir_builder.SetInsertPoint(merge_block);
-        ir_builder.CreateBr(tmp);
+        ir_builder.set_insertion_point(merge_block);
+        ir_builder.create_br(tmp);
 
         // Process true block.
-        ir_builder.SetInsertPoint(true_block);
+        ir_builder.set_insertion_point(true_block);
         else_if->get_statement_block()->accept(*this);
-        ir_builder.CreateBr(merge_block);
+        ir_builder.create_br(merge_block);
         curr_block = else_block;
     }
 
@@ -852,25 +685,19 @@ void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
     llvm::BasicBlock* else_block;
     if (elses) {
         else_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block);
-        ir_builder.SetInsertPoint(else_block);
+        ir_builder.set_insertion_point(else_block);
         elses->get_statement_block()->accept(*this);
-        ir_builder.CreateBr(merge_block);
+        ir_builder.create_br(merge_block);
     } else {
         else_block = merge_block;
     }
-    ir_builder.SetInsertPoint(curr_block);
-    ir_builder.CreateCondBr(cond, true_block, else_block);
-    ir_builder.SetInsertPoint(exit);
+    ir_builder.set_insertion_point(curr_block);
+    ir_builder.create_cond_br(cond, true_block, else_block);
+    ir_builder.set_insertion_point(exit);
 }
 
 void CodegenLLVMVisitor::visit_integer(const ast::Integer& node) {
-    if (is_kernel_code && vector_width > 1) {
-        values.push_back(get_constant_int_vector(node.get_value()));
-        return;
-    }
-    const auto& constant = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*context),
-                                                  node.get_value());
-    values.push_back(constant);
+    ir_builder.create_i32_constant(node.get_value());
 }
 
 void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
@@ -881,7 +708,11 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     CodegenLLVMHelperVisitor v{vector_width};
     const auto& functions = v.get_codegen_functions(node);
     instance_var_helper = v.get_instance_var_helper();
-    kernel_id = v.get_kernel_id();
+    sym_tab = node.get_symbol_table();
+    std::string kernel_id = v.get_kernel_id();
+
+    // Initialize the builder for this NMODL program.
+    ir_builder.initialize(*sym_tab, kernel_id);
 
     // Create compile unit if adding debug information to the module.
     if (add_debug_information) {
@@ -891,12 +722,9 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     // For every function, generate its declaration. Thus, we can look up
     // `llvm::Function` in the symbol table in the module.
     for (const auto& func: functions) {
-        emit_procedure_or_function_declaration(*func);
+        create_function_declaration(*func);
     }
 
-    // Set the AST symbol table.
-    sym_tab = node.get_symbol_table();
-
     // Proceed with code generation. Right now, we do not do
     //     node.visit_children(*this);
     // The reason is that the node may contain AST nodes for which the visitor functions have been
@@ -977,40 +805,18 @@ void CodegenLLVMVisitor::visit_procedure_block(const ast::ProcedureBlock& node)
 
 void CodegenLLVMVisitor::visit_unary_expression(const ast::UnaryExpression& node) {
     ast::UnaryOp op = node.get_op().get_value();
-    node.get_expression()->accept(*this);
-    llvm::Value* value = values.back();
-    values.pop_back();
-    if (op == ast::UOP_NEGATION) {
-        values.push_back(ir_builder.CreateFNeg(value));
-    } else if (op == ast::UOP_NOT) {
-        values.push_back(ir_builder.CreateNot(value));
-    } else {
-        throw std::runtime_error("Error: unsupported unary operator\n");
-    }
+    llvm::Value* value = accept_and_get(node.get_expression());
+    ir_builder.create_unary_op(value, op);
 }
 
 void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) {
-    llvm::Value* ptr = get_variable_ptr(node);
-
-    // Finally, load the variable from the pointer value unless it has already been loaded (e.g. via
-    // gather instruction).
-    llvm::Value* var = ptr->getType()->isPointerTy() ? ir_builder.CreateLoad(ptr) : ptr;
-
-    // If the value should not be vectorised, or it is already a vector, add it to the stack.
-    if (!is_kernel_code || vector_width <= 1 || var->getType()->isVectorTy()) {
-        values.push_back(var);
-        return;
-    }
-
-    // Otherwise, if we are generating vectorised inside the loop, replicate the value to form a
-    // vector of `vector_width`.
-    llvm::Value* vector_var = ir_builder.CreateVectorSplat(vector_width, var);
-    values.push_back(vector_var);
+    llvm::Value* value = read_variable(node);
+    ir_builder.maybe_replicate_value(value);
 }
 
 void CodegenLLVMVisitor::visit_while_statement(const ast::WhileStatement& node) {
     // Get the current and the next blocks within the function.
-    llvm::BasicBlock* curr_block = ir_builder.GetInsertBlock();
+    llvm::BasicBlock* curr_block = ir_builder.get_current_block();
     llvm::BasicBlock* next = curr_block->getNextNode();
     llvm::Function* func = curr_block->getParent();
 
@@ -1019,78 +825,18 @@ void CodegenLLVMVisitor::visit_while_statement(const ast::WhileStatement& node)
     llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next);
     llvm::BasicBlock* exit = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next);
 
-    ir_builder.CreateBr(header);
-    ir_builder.SetInsertPoint(header);
+    ir_builder.create_br_and_set_insertion_point(header);
+
 
     // Generate code for condition and create branch to the body block.
-    node.get_condition()->accept(*this);
-    llvm::Value* condition = values.back();
-    values.pop_back();
-    ir_builder.CreateCondBr(condition, body, exit);
+    llvm::Value* condition = accept_and_get(node.get_condition());
+    ir_builder.create_cond_br(condition, body, exit);
 
-    ir_builder.SetInsertPoint(body);
+    ir_builder.set_insertion_point(body);
     node.get_statement_block()->accept(*this);
-    ir_builder.CreateBr(header);
+    ir_builder.create_br(header);
 
-    ir_builder.SetInsertPoint(exit);
-}
-
-void CodegenLLVMVisitor::find_kernel_names(std::vector<std::string>& container) {
-    // By convention, only the kernel functions return void type.
-    const auto& functions = module->getFunctionList();
-    for (const auto& func: functions) {
-        if (func.getReturnType()->isVoidTy()) {
-            container.push_back(func.getName().str());
-        }
-    }
-}
-
-void CodegenLLVMVisitor::wrap_kernel_functions() {
-    // First, identify all kernels.
-    std::vector<std::string> kernel_names;
-    find_kernel_names(kernel_names);
-
-    for (const auto& kernel_name: kernel_names) {
-        // Get the kernel function and the instance struct type.
-        auto kernel = module->getFunction(kernel_name);
-        if (!kernel)
-            throw std::runtime_error("Kernel " + kernel_name + " is not found!");
-
-        if (std::distance(kernel->args().begin(), kernel->args().end()) != 1)
-            throw std::runtime_error("Kernel " + kernel_name + " must have a single argument!");
-
-        auto instance_struct_ptr_type = llvm::dyn_cast<llvm::PointerType>(
-            kernel->getArg(0)->getType());
-        if (!instance_struct_ptr_type)
-            throw std::runtime_error("Kernel " + kernel_name +
-                                     " does not have an instance struct pointer argument!");
-
-        // Create a wrapper void function that takes a void pointer as a single argument.
-        llvm::Type* i32_type = llvm::Type::getInt32Ty(*context);
-        llvm::Type* void_ptr_type = llvm::Type::getInt8PtrTy(*context);
-        llvm::Function* wrapper_func = llvm::Function::Create(
-            llvm::FunctionType::get(i32_type, {void_ptr_type}, /*isVarArg=*/false),
-            llvm::Function::ExternalLinkage,
-            "__" + kernel_name + "_wrapper",
-            *module);
-
-        // Optionally, add debug information for the wrapper function.
-        if (add_debug_information) {
-            debug_builder.add_function_debug_info(wrapper_func);
-        }
-
-        llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", wrapper_func);
-        ir_builder.SetInsertPoint(body);
-
-        // Proceed with bitcasting the void pointer to the struct pointer type, calling the kernel
-        // and adding a terminator.
-        llvm::Value* bitcasted = ir_builder.CreateBitCast(wrapper_func->getArg(0),
-                                                          instance_struct_ptr_type);
-        std::vector<llvm::Value*> args;
-        args.push_back(bitcasted);
-        ir_builder.CreateCall(kernel, args);
-        ir_builder.CreateRet(llvm::ConstantInt::get(i32_type, 0));
-    }
+    ir_builder.set_insertion_point(exit);
 }
 
 }  // namespace codegen
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 450e1872a4..0ada7b8097 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -20,13 +20,13 @@
 
 #include "codegen/llvm/codegen_llvm_helper_visitor.hpp"
 #include "codegen/llvm/llvm_debug_builder.hpp"
+#include "codegen/llvm/llvm_ir_builder.hpp"
 #include "symtab/symbol_table.hpp"
 #include "utils/logger.hpp"
 #include "visitors/ast_visitor.hpp"
 
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
@@ -63,76 +63,50 @@ static const std::map<std::string, llvm::TargetLibraryInfoImpl::VectorLibrary> v
  * \brief %Visitor for transforming NMODL AST to LLVM IR
  */
 class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
-    // Name of mod file (without .mod suffix)
+    /// Name of mod file (without .mod suffix).
     std::string mod_filename;
 
-    // Output directory for code generation
+    /// Output directory for code generation.
     std::string output_dir;
 
   private:
-    InstanceVarHelper instance_var_helper;
-
+    /// Underlying LLVM context.
     std::unique_ptr<llvm::LLVMContext> context = std::make_unique<llvm::LLVMContext>();
 
+    /// Underlying LLVM module.
     std::unique_ptr<llvm::Module> module = std::make_unique<llvm::Module>(mod_filename, *context);
 
-    // LLVM IR builder.
-    llvm::IRBuilder<> ir_builder;
+    /// LLVM IR builder.
+    IRBuilder ir_builder;
 
-    // Debug information builder.
+    /// Debug information builder.
     DebugBuilder debug_builder;
 
-    // Add debug information to the module.
+    /// Add debug information to the module.
     bool add_debug_information;
 
-    // Pass manager for optimisation passes that are used for target code generation.
-    llvm::legacy::FunctionPassManager codegen_pm;
-
-    // Vector library used for maths functions.
-    llvm::TargetLibraryInfoImpl::VectorLibrary vector_library;
-
-    // Pass manager for optimisation passes that are run on IR and are not related to target.
-    llvm::legacy::FunctionPassManager opt_pm;
-
-    // Stack to hold visited values
-    std::vector<llvm::Value*> values;
-
-    // Pointer to the current function.
-    llvm::Function* current_func = nullptr;
-
-    // Pointer to AST symbol table.
+    /// Pointer to AST symbol table.
     symtab::SymbolTable* sym_tab;
 
-    // Run optimisation passes if true.
-    bool opt_passes;
+    /// Instance variable helper.
+    InstanceVarHelper instance_var_helper;
 
-    // Use 32-bit floating-point type if true. Otherwise, use deafult 64-bit.
-    bool use_single_precision;
+    /// Run optimisation passes if true.
+    bool opt_passes;
 
-    // Explicit vectorisation width.
-    int vector_width;
+    /// Pass manager for optimisation passes that are run on IR and are not related to target.
+    llvm::legacy::FunctionPassManager opt_pm;
 
-    // The name of induction variable used in the kernel functions.
-    std::string kernel_id;
+    /// Pass manager for optimisation passes that are used for target code generation.
+    llvm::legacy::FunctionPassManager codegen_pm;
 
-    // A flag to indicate that the code is generated for the kernel.
-    bool is_kernel_code = false;
+    /// Vector library used for maths functions.
+    llvm::TargetLibraryInfoImpl::VectorLibrary vector_library;
 
-    /**
-     *\brief Run LLVM optimisation passes on generated IR
-     *
-     * LLVM provides number of optimisation passes that can be run on the generated IR.
-     * Here we run common optimisation LLVM passes that benefits code optimisation.
-     */
-    void run_ir_opt_passes();
+    /// Explicit vectorisation width.
+    int vector_width;
 
   public:
-    /**
-     * \brief Constructs the LLVM code generator visitor
-     *
-     * This constructor instantiates an NMODL LLVM code generator. This is
-     * just template to work with initial implementation.
-     */
     CodegenLLVMVisitor(const std::string& mod_filename,
                        const std::string& output_dir,
                        bool opt_passes,
@@ -143,202 +117,44 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
         : mod_filename(mod_filename)
         , output_dir(output_dir)
         , opt_passes(opt_passes)
-        , use_single_precision(use_single_precision)
         , vector_width(vector_width)
         , vector_library(veclib_map.at(vec_lib))
         , add_debug_information(add_debug_information)
-        , ir_builder(*context)
+        , ir_builder(*context, use_single_precision, vector_width)
         , debug_builder(*module)
         , codegen_pm(module.get())
         , opt_pm(module.get()) {}
 
+    /// Dumps the generated LLVM IR module to string.
+    std::string dump_module() const {
+        std::string str;
+        llvm::raw_string_ostream os(str);
+        os << *module;
+        os.flush();
+        return str;
+    }
 
-    /**
-     * Generates LLVM code for the given IndexedName
-     * \param node IndexedName NMODL AST node
-     * \return LLVM code generated for this AST node
-     */
-    llvm::Value* codegen_indexed_name(const ast::IndexedName& node);
-
-    /**
-     * Generates LLVM code for the given Instance variable
-     * \param node CodegenInstanceVar NMODL AST node
-     * \return LLVM code generated for this AST node
-     */
-    llvm::Value* codegen_instance_var(const ast::CodegenInstanceVar& node);
-
-    /**
-     * Returns GEP instruction to 1D array
-     * \param name 1D array name
-     * \param index element index
-     * \return GEP instruction value
-     */
-    llvm::Value* create_gep(const std::string& name, llvm::Value* index);
-
-    /**
-     * Returns array index from given IndexedName
-     * \param node IndexedName representing array
-     * \return array index
-     */
-    llvm::Value* get_array_index(const ast::IndexedName& node);
-
-    /**
-     * Returns array length from given IndexedName
-     * \param node IndexedName representing array
-     * \return array length
-     */
-    int get_array_length(const ast::IndexedName& node);
-
-    /**
-     * Returns LLVM type for the given CodegenVarType node
-     * \param node CodegenVarType
-     * \return LLVM type
-     */
-    llvm::Type* get_codegen_var_type(const ast::CodegenVarType& node);
+    /// Fills the container with the names of kernel functions from the MOD file.
+    void find_kernel_names(std::vector<std::string>& container);
 
-    /**
-     * Returns LLVM vector with `vector_width` int values.
-     * \param int value to replicate
-     * \return LLVM value
-     */
-    llvm::Value* get_constant_int_vector(int value);
-
-    /**
-     * Returns LLVM vector with `vector_width` double values.
-     * \param string a double value to replicate
-     * \return LLVM value
-     */
-    llvm::Value* get_constant_fp_vector(const std::string& value);
-
-    /**
-     * Returns 64-bit or 32-bit LLVM floating type
-     * \return     \c LLVM floating point type according to `use_single_precision` flag
-     */
-    llvm::Type* get_default_fp_type();
-
-    /**
-     * Returns pointer to 64-bit or 32-bit LLVM floating type
-     * \return     \c LLVM pointer to floating point type according to `use_single_precision` flag
-     */
-    llvm::Type* get_default_fp_ptr_type();
-
-    /**
-     * Returns a pointer to LLVM struct type
-     * \return LLVM pointer type
-     */
-    llvm::Type* get_instance_struct_type();
+    /// Returns underlying module.
+    std::unique_ptr<llvm::Module> get_module() {
+        return std::move(module);
+    }
 
-    /**
-     * Returns a LLVM value corresponding to the VarName node
-     * \return LLVM value
-     */
-    llvm::Value* get_variable_ptr(const ast::VarName& node);
-
-    /**
-     * Returns shared_ptr to generated ast::InstanceStruct
-     * \return std::shared_ptr<ast::InstanceStruct>
-     */
-    std::shared_ptr<ast::InstanceStruct> get_instance_struct_ptr();
-
-    /**
-     * Create a function call to an external method
-     * \param name external method name
-     * \param arguments expressions passed as arguments to the given external method
-     */
-    void create_external_method_call(const std::string& name,
-                                     const ast::ExpressionVector& arguments);
-
-    /**
-     * Create a function call to NMODL function or procedure in the same mod file
-     * \param func LLVM function corresponding ti this call
-     * \param name function name
-     * \param arguments expressions passed as arguments to the function call
-     */
-    void create_function_call(llvm::Function* func,
-                              const std::string& name,
-                              const ast::ExpressionVector& arguments);
-    /**
-     * Create a function call to printf function
-     * \param arguments expressions passed as arguments to the printf call
-     */
-    void create_printf_call(const ast::ExpressionVector& arguments);
+    /// Returns shared_ptr to generated ast::InstanceStruct.
+    std::shared_ptr<ast::InstanceStruct> get_instance_struct_ptr() {
+        return instance_var_helper.instance;
+    }
 
-    /**
-     * Emit function or procedure declaration in LLVM given the node
-     *
-     * \param node the AST node representing the function or procedure in NMODL
-     */
-    void emit_procedure_or_function_declaration(const ast::CodegenFunction& node);
-
-    /**
-     * Return InstanceVarHelper
-     * \return InstanceVarHelper
-     */
+    /// Returns InstanceVarHelper for the given MOD file.
     InstanceVarHelper get_instance_var_helper() {
         return instance_var_helper;
     }
 
-    /**
-     * Return module pointer
-     * \return LLVM IR module pointer
-     */
-    std::unique_ptr<llvm::Module> get_module() {
-        return std::move(module);
-    }
-
-    /**
-     * Lookup the given name in the current function's symbol table
-     * \return LLVM value
-     */
-    llvm::Value* lookup(const std::string& name);
-
-    /**
-     * Fills values vector with processed NMODL function call arguments
-     * \param arguments expression vector
-     * \param arg_values vector of LLVM IR values to fill
-     */
-    void pack_function_call_arguments(const ast::ExpressionVector& arguments,
-                                      std::vector<llvm::Value*>& arg_values);
-
-    /**
-     * Visit nmodl arithmetic binary operator
-     * \param lhs LLVM value of evaluated lhs expression
-     * \param rhs LLVM value of evaluated rhs expression
-     * \param op the AST binary operator (ADD, DIV, MUL, SUB)
-     * \return LLVM IR value result
-     */
-    llvm::Value* visit_arithmetic_bin_op(llvm::Value* lhs, llvm::Value* rhs, unsigned op);
-
-    /**
-     * Visit nmodl assignment operator (ASSIGN)
-     * \param node the AST node representing the binary expression in NMODL
-     * \param rhs LLVM value of evaluated rhs expression
-     */
-    void visit_assign_op(const ast::BinaryExpression& node, llvm::Value* rhs);
-
-    /**
-     * Visit nmodl logical binary operator
-     * \param lhs LLVM value of evaluated lhs expression
-     * \param rhs LLVM value of evaluated rhs expression
-     * \param op the AST binary operator (AND, OR)
-     * \return LLVM IR value result
-     */
-    llvm::Value* visit_logical_bin_op(llvm::Value* lhs, llvm::Value* rhs, unsigned op);
-
-    /**
-     * Visit nmodl comparison binary operator
-     * \param lhs LLVM value of evaluated lhs expression
-     * \param rhs LLVM value of evaluated rhs expression
-     * \param op the AST binary operator (EXACT_EQUAL, GREATER, GREATER_EQUAL, LESS, LESS_EQUAL,
-     * NOT_EQUAL) \return LLVM IR value result
-     */
-    llvm::Value* visit_comparison_bin_op(llvm::Value* lhs, llvm::Value* rhs, unsigned op);
-
-
-    // Visitors
+    // Visitors.
     void visit_binary_expression(const ast::BinaryExpression& node) override;
     void visit_boolean(const ast::Boolean& node) override;
-    void visit_statement_block(const ast::StatementBlock& node) override;
     void visit_codegen_for_statement(const ast::CodegenForStatement& node) override;
     void visit_codegen_function(const ast::CodegenFunction& node) override;
     void visit_codegen_return_statement(const ast::CodegenReturnStatement& node) override;
@@ -350,31 +166,65 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void visit_integer(const ast::Integer& node) override;
     void visit_procedure_block(const ast::ProcedureBlock& node) override;
     void visit_program(const ast::Program& node) override;
+    void visit_statement_block(const ast::StatementBlock& node) override;
     void visit_unary_expression(const ast::UnaryExpression& node) override;
     void visit_var_name(const ast::VarName& node) override;
     void visit_while_statement(const ast::WhileStatement& node) override;
 
-    /**
-     * Dumps the generated LLVM IR module to string.
-     */
-    std::string dump_module() const {
-        std::string str;
-        llvm::raw_string_ostream os(str);
-        os << *module;
-        os.flush();
-        return str;
-    }
+    /// Wraps all kernel function calls into wrapper functions that use `void*` to pass the data to
+    /// the kernel.
+    void wrap_kernel_functions();
 
-    /**
-     * Fills the container with the names of kernel functions from the MOD file.
-     */
-    void find_kernel_names(std::vector<std::string>& container);
+  private:
+    /// Accepts the given AST node and returns the processed value.
+    llvm::Value* accept_and_get(const std::shared_ptr<ast::Node>& node);
 
-    /**
-     * Wraps all kernel function calls into wrapper functions that use void* to pass the data to the
-     * kernel.
-     */
-    void wrap_kernel_functions();
+    /// Creates a call to an external function (e.g pow, exp, etc.)
+    void create_external_function_call(const std::string& name,
+                                       const ast::ExpressionVector& arguments);
+
+    /// Creates a call to NMODL function or procedure in the same MOD file.
+    void create_function_call(llvm::Function* func,
+                              const std::string& name,
+                              const ast::ExpressionVector& arguments);
+
+    /// Fills values vector with processed NMODL function call arguments.
+    void create_function_call_arguments(const ast::ExpressionVector& arguments,
+                                        ValueVector& arg_values);
+
+    /// Creates the function declaration for the given AST node.
+    void create_function_declaration(const ast::CodegenFunction& node);
+
+    /// Creates a call to `printf` function.
+    void create_printf_call(const ast::ExpressionVector& arguments);
+
+    /// Returns LLVM type for the given CodegenVarType AST node.
+    llvm::Type* get_codegen_var_type(const ast::CodegenVarType& node);
+
+    /// Returns the index value from the IndexedName AST node.
+    llvm::Value* get_index(const ast::IndexedName& node);
+
+    /// Returns an instance struct type.
+    llvm::Type* get_instance_struct_type();
+
+    /// Returns the number of elements in the array specified by the IndexedName AST node.
+    int get_num_elements(const ast::IndexedName& node);
+
+    /// If the value to store is specified, writes it to the instance. Otherwise, returns the
+    /// instance variable.
+    llvm::Value* read_from_or_write_to_instance(const ast::CodegenInstanceVar& node,
+                                                llvm::Value* maybe_value_to_store = nullptr);
+
+    /// Reads the given variable and returns the processed value.
+    llvm::Value* read_variable(const ast::VarName& node);
+
+
+    /// Run multiple LLVM optimisation passes on generated IR.
+    /// TODO: this can be moved to a dedicated file or deprecated.
+    void run_ir_opt_passes();
+
+    //// Writes the value to the given variable.
+    void write_to_variable(const ast::VarName& node, llvm::Value* value);
 };
 
 /** \} */  // end of llvm_backends
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
new file mode 100644
index 0000000000..2773e6929b
--- /dev/null
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -0,0 +1,427 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include "codegen/llvm/llvm_ir_builder.hpp"
+#include "ast/all.hpp"
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/ValueSymbolTable.h"
+
+namespace nmodl {
+namespace codegen {
+
+
+/****************************************************************************************/
+/*                            LLVM type utilities                                       */
+/****************************************************************************************/
+
+llvm::Type* IRBuilder::get_boolean_type() {
+    return llvm::Type::getInt1Ty(builder.getContext());
+}
+
+llvm::Type* IRBuilder::get_i8_ptr_type() {
+    return llvm::Type::getInt8PtrTy(builder.getContext());
+}
+
+llvm::Type* IRBuilder::get_i32_type() {
+    return llvm::Type::getInt32Ty(builder.getContext());
+}
+
+llvm::Type* IRBuilder::get_i32_ptr_type() {
+    return llvm::Type::getInt32PtrTy(builder.getContext());
+}
+
+llvm::Type* IRBuilder::get_i64_type() {
+    return llvm::Type::getInt64Ty(builder.getContext());
+}
+
+llvm::Type* IRBuilder::get_fp_type() {
+    if (fp_precision == single_precision)
+        return llvm::Type::getFloatTy(builder.getContext());
+    return llvm::Type::getDoubleTy(builder.getContext());
+}
+
+llvm::Type* IRBuilder::get_fp_ptr_type() {
+    if (fp_precision == single_precision)
+        return llvm::Type::getFloatPtrTy(builder.getContext());
+    return llvm::Type::getDoublePtrTy(builder.getContext());
+}
+
+llvm::Type* IRBuilder::get_void_type() {
+    return llvm::Type::getVoidTy(builder.getContext());
+}
+
+llvm::Type* IRBuilder::get_struct_ptr_type(const std::string& struct_type_name,
+                                           TypeVector& member_types) {
+    llvm::StructType* llvm_struct_type = llvm::StructType::create(builder.getContext(),
+                                                                  struct_type_name);
+    llvm_struct_type->setBody(member_types);
+    return llvm::PointerType::get(llvm_struct_type, /*AddressSpace=*/0);
+}
+
+
+/****************************************************************************************/
+/*                            LLVM value utilities                                      */
+/****************************************************************************************/
+
+llvm::Value* IRBuilder::lookup_value(const std::string& value_name) {
+    auto value = current_function->getValueSymbolTable()->lookup(value_name);
+    if (!value)
+        throw std::runtime_error("Error: variable " + value_name + " is not in the scope\n");
+    return value;
+}
+
+llvm::Value* IRBuilder::pop_last_value() {
+    // Check if the stack is empty.
+    if (value_stack.empty())
+        throw std::runtime_error("Error: popping a value from the empty stack\n");
+
+    // Return the last added value and delete it from the stack.
+    llvm::Value* last = value_stack.back();
+    value_stack.pop_back();
+    return last;
+}
+
+/****************************************************************************************/
+/*                            LLVM constants utilities                                  */
+/****************************************************************************************/
+
+void IRBuilder::create_boolean_constant(int value) {
+    value_stack.push_back(get_vector_constant<llvm::ConstantInt>(get_boolean_type(), value));
+}
+
+void IRBuilder::create_fp_constant(const std::string& value) {
+    if (instruction_width > 1 && vectorize) {
+        value_stack.push_back(get_vector_constant<llvm::ConstantFP>(get_fp_type(), value));
+    } else {
+        value_stack.push_back(get_scalar_constant<llvm::ConstantFP>(get_fp_type(), value));
+    }
+}
+
+llvm::Value* IRBuilder::create_global_string(const ast::String& node) {
+    return builder.CreateGlobalStringPtr(node.get_value());
+}
+
+void IRBuilder::create_i32_constant(int value) {
+    if (instruction_width > 1 && vectorize) {
+        value_stack.push_back(get_vector_constant<llvm::ConstantInt>(get_i32_type(), value));
+    } else {
+        value_stack.push_back(get_scalar_constant<llvm::ConstantInt>(get_i32_type(), value));
+    }
+}
+
+template <typename C, typename V>
+llvm::Value* IRBuilder::get_scalar_constant(llvm::Type* type, V value) {
+    return C::get(type, value);
+}
+
+template <typename C, typename V>
+llvm::Value* IRBuilder::get_vector_constant(llvm::Type* type, V value) {
+    ConstantVector constants;
+    for (unsigned i = 0; i < instruction_width; ++i) {
+        const auto& element = C::get(type, value);
+        constants.push_back(element);
+    }
+    return llvm::ConstantVector::get(constants);
+}
+
+/****************************************************************************************/
+/*                              LLVM function utilities                                 */
+/****************************************************************************************/
+
+void IRBuilder::allocate_function_arguments(llvm::Function* function,
+                                            const ast::CodegenVarWithTypeVector& nmodl_arguments) {
+    unsigned i = 0;
+    for (auto& arg: function->args()) {
+        std::string arg_name = nmodl_arguments[i++].get()->get_node_name();
+        llvm::Type* arg_type = arg.getType();
+        llvm::Value* alloca = builder.CreateAlloca(arg_type, /*ArraySize=*/nullptr, arg_name);
+        arg.setName(arg_name);
+        builder.CreateStore(&arg, alloca);
+    }
+}
+
+std::string IRBuilder::get_current_function_name() {
+    return current_function->getName().str();
+}
+
+void IRBuilder::create_function_call(llvm::Function* callee,
+                                     ValueVector& arguments,
+                                     bool use_result) {
+    llvm::Value* call_instruction = builder.CreateCall(callee, arguments);
+    if (use_result)
+        value_stack.push_back(call_instruction);
+}
+
+void IRBuilder::create_intrinsic(const std::string& name,
+                                 ValueVector& argument_values,
+                                 TypeVector& argument_types) {
+    unsigned intrinsic_id = llvm::StringSwitch<llvm::Intrinsic::ID>(name)
+                                .Case("exp", llvm::Intrinsic::exp)
+                                .Case("pow", llvm::Intrinsic::pow)
+                                .Default(llvm::Intrinsic::not_intrinsic);
+    if (intrinsic_id) {
+        llvm::Value* intrinsic =
+            builder.CreateIntrinsic(intrinsic_id, argument_types, argument_values);
+        value_stack.push_back(intrinsic);
+    } else {
+        throw std::runtime_error("Error: calls to " + name + " are not valid or not supported\n");
+    }
+}
+
+/****************************************************************************************/
+/*                             LLVM instruction utilities                               */
+/****************************************************************************************/
+
+void IRBuilder::create_array_alloca(const std::string& name,
+                                    llvm::Type* element_type,
+                                    int num_elements) {
+    llvm::Type* array_type = llvm::ArrayType::get(element_type, num_elements);
+    builder.CreateAlloca(array_type, /*ArraySize=*/nullptr, name);
+}
+
+void IRBuilder::create_binary_op(llvm::Value* lhs, llvm::Value* rhs, ast::BinaryOp op) {
+    // Check that both lhs and rhs have the same types.
+    if (lhs->getType() != rhs->getType())
+        throw std::runtime_error(
+            "Error: lhs and rhs of the binary operator have different types\n");
+
+    llvm::Value* result;
+    switch (op) {
+#define DISPATCH(binary_op, fp_instruction, integer_instruction) \
+    case binary_op:                                              \
+        if (lhs->getType()->isIntOrIntVectorTy())                \
+            result = integer_instruction(lhs, rhs);              \
+        else                                                     \
+            result = fp_instruction(lhs, rhs);                   \
+        break;
+
+        // Arithmetic instructions.
+        DISPATCH(ast::BinaryOp::BOP_ADDITION, builder.CreateFAdd, builder.CreateAdd);
+        DISPATCH(ast::BinaryOp::BOP_DIVISION, builder.CreateFDiv, builder.CreateSDiv);
+        DISPATCH(ast::BinaryOp::BOP_MULTIPLICATION, builder.CreateFMul, builder.CreateMul);
+        DISPATCH(ast::BinaryOp::BOP_SUBTRACTION, builder.CreateFSub, builder.CreateSub);
+
+        // Comparison instructions.
+        DISPATCH(ast::BinaryOp::BOP_EXACT_EQUAL, builder.CreateFCmpOEQ, builder.CreateICmpEQ);
+        DISPATCH(ast::BinaryOp::BOP_GREATER, builder.CreateFCmpOGT, builder.CreateICmpSGT);
+        DISPATCH(ast::BinaryOp::BOP_GREATER_EQUAL, builder.CreateFCmpOGE, builder.CreateICmpSGE);
+        DISPATCH(ast::BinaryOp::BOP_LESS, builder.CreateFCmpOLT, builder.CreateICmpSLT);
+        DISPATCH(ast::BinaryOp::BOP_LESS_EQUAL, builder.CreateFCmpOLE, builder.CreateICmpSLE);
+        DISPATCH(ast::BinaryOp::BOP_NOT_EQUAL, builder.CreateFCmpONE, builder.CreateICmpNE);
+
+#undef DISPATCH
+
+    // Logical instructions.
+    case ast::BinaryOp::BOP_AND:
+        result = builder.CreateAnd(lhs, rhs);
+        break;
+    case ast::BinaryOp::BOP_OR:
+        result = builder.CreateOr(lhs, rhs);
+        break;
+
+    default:
+        throw std::runtime_error("Error: unsupported binary operator\n");
+    }
+    value_stack.push_back(result);
+}
+
+llvm::Value* IRBuilder::create_bitcast(llvm::Value* value, llvm::Type* dst_type) {
+    return builder.CreateBitCast(value, dst_type);
+}
+
+llvm::Value* IRBuilder::create_inbounds_gep(const std::string& var_name, llvm::Value* index) {
+    llvm::Value* variable_ptr = lookup_value(var_name);
+
+    // Since we index through the pointer, we need an extra 0 index in the indices list for GEP.
+    ValueVector indices{llvm::ConstantInt::get(get_i64_type(), 0), index};
+    return builder.CreateInBoundsGEP(variable_ptr, indices);
+}
+
+llvm::Value* IRBuilder::create_inbounds_gep(llvm::Value* variable, llvm::Value* index) {
+    return builder.CreateInBoundsGEP(variable, {index});
+}
+
+llvm::Value* IRBuilder::create_index(llvm::Value* value) {
+    // Check if index is a double. While it is possible to use casting from double to integer
+    // values, we choose not to support these cases.
+    llvm::Type* value_type = value->getType();
+    if (!value_type->isIntOrIntVectorTy())
+        throw std::runtime_error("Error: only integer indexing is supported\n");
+
+    // Conventionally, in LLVM array indices are 64 bit.
+    llvm::Type* i64_type = get_i64_type();
+    if (auto index_type = llvm::dyn_cast<llvm::IntegerType>(value_type)) {
+        if (index_type->getBitWidth() == i64_type->getIntegerBitWidth())
+            return value;
+        return builder.CreateSExtOrTrunc(value, i64_type);
+    }
+
+    const auto& vector_type = llvm::cast<llvm::FixedVectorType>(value_type);
+    const auto& element_type = llvm::cast<llvm::IntegerType>(vector_type->getElementType());
+    if (element_type->getBitWidth() == i64_type->getIntegerBitWidth())
+        return value;
+    return builder.CreateSExtOrTrunc(value,
+                                     llvm::FixedVectorType::get(i64_type, instruction_width));
+}
+
+llvm::Value* IRBuilder::create_load(const std::string& name) {
+    llvm::Value* ptr = lookup_value(name);
+    llvm::Type* loaded_type = ptr->getType()->getPointerElementType();
+    return builder.CreateLoad(loaded_type, ptr);
+}
+
+llvm::Value* IRBuilder::create_load(llvm::Value* ptr) {
+    llvm::Type* loaded_type = ptr->getType()->getPointerElementType();
+    return builder.CreateLoad(loaded_type, ptr);
+}
+
+llvm::Value* IRBuilder::create_load_from_array(const std::string& name, llvm::Value* index) {
+    llvm::Value* element_ptr = create_inbounds_gep(name, index);
+    return create_load(element_ptr);
+}
+
+void IRBuilder::create_store(const std::string& name, llvm::Value* value) {
+    llvm::Value* ptr = lookup_value(name);
+    builder.CreateStore(value, ptr);
+}
+
+void IRBuilder::create_store(llvm::Value* ptr, llvm::Value* value) {
+    builder.CreateStore(value, ptr);
+}
+
+void IRBuilder::create_store_to_array(const std::string& name,
+                                      llvm::Value* index,
+                                      llvm::Value* value) {
+    llvm::Value* element_ptr = create_inbounds_gep(name, index);
+    create_store(element_ptr, value);
+}
+
+void IRBuilder::create_return(llvm::Value* return_value) {
+    if (return_value)
+        builder.CreateRet(return_value);
+    else
+        builder.CreateRetVoid();
+}
+
+void IRBuilder::create_scalar_or_vector_alloca(const std::string& name,
+                                               llvm::Type* element_or_scalar_type) {
+    // Even if generating vectorised code, some variables still need to be scalar. Particularly, the
+    // induction variable "id" and remainder loop variables (that start with "epilogue" prefix).
+    llvm::Type* type;
+    if (instruction_width > 1 && vectorize && name != kernel_id && name.rfind("epilogue", 0)) {
+        type = llvm::FixedVectorType::get(element_or_scalar_type, instruction_width);
+    } else {
+        type = element_or_scalar_type;
+    }
+    builder.CreateAlloca(type, /*ArraySize=*/nullptr, name);
+}
+
+void IRBuilder::create_unary_op(llvm::Value* value, ast::UnaryOp op) {
+    if (op == ast::UOP_NEGATION) {
+        value_stack.push_back(builder.CreateFNeg(value));
+    } else if (op == ast::UOP_NOT) {
+        value_stack.push_back(builder.CreateNot(value));
+    } else {
+        throw std::runtime_error("Error: unsupported unary operator\n");
+    }
+}
+
+llvm::Value* IRBuilder::get_struct_member_ptr(llvm::Value* struct_variable, int member_index) {
+    ValueVector indices;
+    indices.push_back(llvm::ConstantInt::get(get_i32_type(), 0));
+    indices.push_back(llvm::ConstantInt::get(get_i32_type(), member_index));
+    return builder.CreateInBoundsGEP(struct_variable, indices);
+}
+
+llvm::Value* IRBuilder::load_to_or_store_from_array(const std::string& id_name,
+                                                    llvm::Value* id_value,
+                                                    llvm::Value* array,
+                                                    llvm::Value* maybe_value_to_store) {
+    // First, calculate the address of the element in the array.
+    llvm::Value* element_ptr = create_inbounds_gep(array, id_value);
+
+    // If the vector code is generated, we need to distinguish between two cases. If the array is
+    // indexed indirectly (i.e. not by an induction variable `kernel_id`), create a gather
+    // instruction.
+    if (id_name != kernel_id && vectorize && instruction_width > 1)
+        return builder.CreateMaskedGather(element_ptr, llvm::Align());
+
+    llvm::Value* ptr;
+    if (vectorize && instruction_width > 1) {
+        // If direct indexing is used during the vectorization, we simply bitcast the scalar pointer
+        // to a vector pointer
+        llvm::Type* vector_type = llvm::PointerType::get(
+            llvm::FixedVectorType::get(element_ptr->getType()->getPointerElementType(),
+                                       instruction_width),
+            /*AddressSpace=*/0);
+        ptr = builder.CreateBitCast(element_ptr, vector_type);
+    } else {
+        // Otherwise, scalar code is generated and hence return the element pointer.
+        ptr = element_ptr;
+    }
+
+    if (maybe_value_to_store) {
+        create_store(ptr, maybe_value_to_store);
+        return nullptr;
+    } else {
+        return create_load(ptr);
+    }
+}
+
+void IRBuilder::maybe_replicate_value(llvm::Value* value) {
+    // If the value should not be vectorised, or it is already a vector, add it to the stack.
+    if (!vectorize || instruction_width == 1 || value->getType()->isVectorTy()) {
+        value_stack.push_back(value);
+    } else {
+        // Otherwise, we generate vectorized code inside the loop, so replicate the value to form a
+        // vector.
+        llvm::Value* vector_value = builder.CreateVectorSplat(instruction_width, value);
+        value_stack.push_back(vector_value);
+    }
+}
+
+
+/****************************************************************************************/
+/*                                 LLVM block utilities                                 */
+/****************************************************************************************/
+
+llvm::BasicBlock* IRBuilder::create_block_and_set_insertion_point(llvm::Function* function,
+                                                                  llvm::BasicBlock* insert_before,
+                                                                  std::string name) {
+    llvm::BasicBlock* block =
+        llvm::BasicBlock::Create(builder.getContext(), name, function, insert_before);
+    builder.SetInsertPoint(block);
+    return block;
+}
+
+void IRBuilder::create_br(llvm::BasicBlock* block) {
+    builder.CreateBr(block);
+}
+
+void IRBuilder::create_br_and_set_insertion_point(llvm::BasicBlock* block) {
+    builder.CreateBr(block);
+    builder.SetInsertPoint(block);
+}
+
+void IRBuilder::create_cond_br(llvm::Value* condition,
+                               llvm::BasicBlock* true_block,
+                               llvm::BasicBlock* false_block) {
+    builder.CreateCondBr(condition, true_block, false_block);
+}
+
+llvm::BasicBlock* IRBuilder::get_current_block() {
+    return builder.GetInsertBlock();
+}
+
+void IRBuilder::set_insertion_point(llvm::BasicBlock* block) {
+    builder.SetInsertPoint(block);
+}
+
+}  // namespace codegen
+}  // namespace nmodl
diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp
new file mode 100644
index 0000000000..b1b23ff0cf
--- /dev/null
+++ b/src/codegen/llvm/llvm_ir_builder.hpp
@@ -0,0 +1,272 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+#include <string>
+
+#include "codegen/llvm/codegen_llvm_helper_visitor.hpp"
+#include "symtab/symbol_table.hpp"
+
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+
+namespace nmodl {
+namespace codegen {
+
+/// Floating point bit widths.
+static constexpr const unsigned single_precision = 32;
+static constexpr const unsigned double_precision = 64;
+
+/// Some typedefs.
+using ConstantVector = std::vector<llvm::Constant*>;
+using TypeVector = std::vector<llvm::Type*>;
+using ValueVector = std::vector<llvm::Value*>;
+
+/**
+ * \class IRBuilder
+ * \brief A helper class to generate LLVM IR for NMODL AST.
+ */
+class IRBuilder {
+  private:
+    /// Underlying LLVM IR builder.
+    llvm::IRBuilder<> builder;
+
+    /// Stack to hold visited and processed values.
+    ValueVector value_stack;
+
+    /// Pointer to the current function for which the code is generated.
+    llvm::Function* current_function;
+
+    /// Symbol table of the NMODL AST.
+    symtab::SymbolTable* symbol_table;
+
+    /// Flag to indicate that the generated IR should be vectorized.
+    bool vectorize;
+
+    /// Precision of the floating-point numbers (32 or 64 bit).
+    unsigned fp_precision;
+
+    /// If 1, indicates that the scalar code is generated. Otherwise, the current vectorization
+    /// width.
+    unsigned instruction_width;
+
+    /// The vector width used for the vectorized code.
+    unsigned vector_width;
+
+    /// The name of induction variable used in kernel loops.
+    std::string kernel_id;
+
+  public:
+    IRBuilder(llvm::LLVMContext& context,
+              bool use_single_precision = false,
+              unsigned vector_width = 1)
+        : builder(context)
+        , symbol_table(nullptr)
+        , current_function(nullptr)
+        , vectorize(false)
+        , fp_precision(use_single_precision ? single_precision : double_precision)
+        , vector_width(vector_width)
+        , instruction_width(vector_width)
+        , kernel_id("") {}
+
+    /// Initializes the builder with the symbol table and the kernel induction variable id.
+    void initialize(symtab::SymbolTable& symbol_table, std::string& kernel_id) {
+        this->symbol_table = &symbol_table;
+        this->kernel_id = kernel_id;
+    }
+
+    /// Explicitly sets the builder to produce scalar code (even during vectorization).
+    void generate_scalar_code() {
+        instruction_width = 1;
+    }
+
+    /// Explicitly sets the builder to produce vectorized code.
+    void generate_vectorized_code() {
+        instruction_width = vector_width;
+    }
+
+    /// Turns on vectorization mode.
+    void start_vectorization() {
+        vectorize = true;
+    }
+
+    /// Turns off vectorization mode.
+    void stop_vectorization() {
+        vectorize = false;
+    }
+
+    /// Sets the current function for which LLVM IR is generated.
+    void set_function(llvm::Function* function) {
+        current_function = function;
+    }
+
+    /// Clears the stack of the values and unsets the current function.
+    void clear_function() {
+        value_stack.clear();
+        current_function = nullptr;
+    }
+
+    /// Generates LLVM IR to allocate the arguments of the function on the stack.
+    void allocate_function_arguments(llvm::Function* function,
+                                     const ast::CodegenVarWithTypeVector& nmodl_arguments);
+
+    /// Generates IR for allocating an array.
+    void create_array_alloca(const std::string& name, llvm::Type* element_type, int num_elements);
+
+    /// Generates LLVM IR for the given binary operator.
+    void create_binary_op(llvm::Value* lhs, llvm::Value* rhs, ast::BinaryOp op);
+
+    /// Generates LLVM IR for the bitcast instruction.
+    llvm::Value* create_bitcast(llvm::Value* value, llvm::Type* dst_type);
+
+    /// Create a basic block and set the builder's insertion point to it.
+    llvm::BasicBlock* create_block_and_set_insertion_point(
+        llvm::Function* function,
+        llvm::BasicBlock* insert_before = nullptr,
+        std::string name = "");
+
+    /// Generates LLVM IR for unconditional branch.
+    void create_br(llvm::BasicBlock* block);
+
+    /// Generates LLVM IR for unconditional branch and sets the insertion point to this block.
+    void create_br_and_set_insertion_point(llvm::BasicBlock* block);
+
+    /// Generates LLVM IR for conditional branch.
+    void create_cond_br(llvm::Value* condition,
+                        llvm::BasicBlock* true_block,
+                        llvm::BasicBlock* false_block);
+
+    /// Generates LLVM IR for the boolean constant.
+    void create_boolean_constant(int value);
+
+    /// Generates LLVM IR for the floating-point constant.
+    void create_fp_constant(const std::string& value);
+
+    /// Generates LLVM IR for a call to the function.
+    void create_function_call(llvm::Function* callee,
+                              ValueVector& arguments,
+                              bool use_result = true);
+
+    /// Generates LLVM IR for the string value.
+    llvm::Value* create_global_string(const ast::String& node);
+
+    /// Generates LLVM IR to transform the value into an index by possibly sign-extending it.
+    llvm::Value* create_index(llvm::Value* value);
+
+    /// Generates an intrinsic that corresponds to the given name.
+    void create_intrinsic(const std::string& name,
+                          ValueVector& argument_values,
+                          TypeVector& argument_types);
+
+    /// Generates LLVM IR for the integer constant.
+    void create_i32_constant(int value);
+
+    /// Generates LLVM IR to load the value specified by its name and returns it.
+    llvm::Value* create_load(const std::string& name);
+
+    /// Generates LLVM IR to load the value from the pointer and returns it.
+    llvm::Value* create_load(llvm::Value* ptr);
+
+    /// Generates LLVM IR to load the element at the specified index from the given array name and
+    /// returns it.
+    llvm::Value* create_load_from_array(const std::string& name, llvm::Value* index);
+
+    /// Generates LLVM IR to store the value to the location specified by the name.
+    void create_store(const std::string& name, llvm::Value* value);
+
+    /// Generates LLVM IR to store the value to the location specified by the pointer.
+    void create_store(llvm::Value* ptr, llvm::Value* value);
+
+    /// Generates LLVM IR to store the value to the array element, where array is specified by the
+    /// name.
+    void create_store_to_array(const std::string& name, llvm::Value* index, llvm::Value* value);
+
+    /// Generates LLVM IR return instructions.
+    void create_return(llvm::Value* return_value = nullptr);
+
+    /// Generates IR for allocating a scalar or vector variable.
+    void create_scalar_or_vector_alloca(const std::string& name,
+                                        llvm::Type* element_or_scalar_type);
+
+    /// Generates LLVM IR for the given unary operator.
+    void create_unary_op(llvm::Value* value, ast::UnaryOp op);
+
+    /// Creates a boolean (1-bit integer) type.
+    llvm::Type* get_boolean_type();
+
+    /// Returns current basic block.
+    llvm::BasicBlock* get_current_block();
+
+    /// Returns the name of the function for which LLVM IR is generated.
+    std::string get_current_function_name();
+
+    /// Creates a pointer to 8-bit integer type.
+    llvm::Type* get_i8_ptr_type();
+
+    /// Creates a 32-bit integer type.
+    llvm::Type* get_i32_type();
+
+    /// Creates a pointer to 32-bit integer type.
+    llvm::Type* get_i32_ptr_type();
+
+    /// Creates a 64-bit integer type.
+    llvm::Type* get_i64_type();
+
+    /// Creates a floating-point type.
+    llvm::Type* get_fp_type();
+
+    /// Creates a pointer to floating-point type.
+    llvm::Type* get_fp_ptr_type();
+
+    /// Creates a void type.
+    llvm::Type* get_void_type();
+
+    /// Generates LLVM IR to get the address of the struct's member at given index. Returns the
+    /// calculated value.
+    llvm::Value* get_struct_member_ptr(llvm::Value* struct_variable, int member_index);
+
+    /// Creates a pointer to struct type with the given name and given members.
+    llvm::Type* get_struct_ptr_type(const std::string& struct_type_name, TypeVector& member_types);
+
+    /// Generates IR that loads the elements of the array even during vectorization. If the value is
+    /// specified, then it is stored to the array at the given index.
+    llvm::Value* load_to_or_store_from_array(const std::string& id_name,
+                                             llvm::Value* id_value,
+                                             llvm::Value* array,
+                                             llvm::Value* maybe_value_to_store = nullptr);
+
+    /// Lookups the value by  its name in the current function's symbol table.
+    llvm::Value* lookup_value(const std::string& value_name);
+
+    /// Generates IR to replicate the value if vectorizing the code.
+    void maybe_replicate_value(llvm::Value* value);
+
+    /// Sets builder's insertion point to the given block.
+    void set_insertion_point(llvm::BasicBlock* block);
+
+    /// Pops the last visited value from the value stack.
+    llvm::Value* pop_last_value();
+
+  private:
+    /// Generates an inbounds GEP instruction for the given name and returns calculated address.
+    llvm::Value* create_inbounds_gep(const std::string& variable_name, llvm::Value* index);
+
+    /// Generates an inbounds GEP instruction for the given value and returns calculated address.
+    llvm::Value* create_inbounds_gep(llvm::Value* variable, llvm::Value* index);
+
+    /// Returns a scalar constant of the provided type.
+    template <typename C, typename V>
+    llvm::Value* get_scalar_constant(llvm::Type* type, V value);
+
+    /// Returns a vector constant of the provided type.
+    template <typename C, typename V>
+    llvm::Value* get_vector_constant(llvm::Type* type, V value);
+};
+
+}  // namespace codegen
+}  // namespace nmodl

From acbcd1b9909767d13ba4990eb9963fa1a290eaec Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 13 May 2021 05:46:12 -0700
Subject: [PATCH 054/105] Fixed initialisation of `CodegenAtomicStatement`
 (#642)

  * Fixed CodegenAtomicStatement initialisation
  * Removed unused variable and changed comment
---
 .../llvm/codegen_llvm_helper_visitor.cpp      | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index bc8317fecb..9b4dad55ee 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -256,10 +256,18 @@ static void append_statements_from_block(ast::StatementVector& statements,
     }
 }
 
-static std::shared_ptr<ast::CodegenAtomicStatement> create_atomic_statement(std::string& lhs_str,
-                                                                            std::string& op_str,
-                                                                            std::string& rhs_str) {
-    auto lhs = std::make_shared<ast::Name>(new ast::String(lhs_str));
+static std::shared_ptr<ast::CodegenAtomicStatement> create_atomic_statement(
+    std::string& ion_varname,
+    std::string& index_varname,
+    std::string& op_str,
+    std::string& rhs_str) {
+    // create lhs expression
+    auto varname = new ast::Name(new ast::String(ion_varname));
+    auto index = new ast::Name(new ast::String(index_varname));
+    auto lhs = std::make_shared<ast::VarName>(new ast::IndexedName(varname, index),
+                                              /*at=*/nullptr,
+                                              /*index=*/nullptr);
+
     auto op = ast::BinaryOperator(ast::string_to_binaryop(op_str));
     auto rhs = create_expression(rhs_str);
     return std::make_shared<ast::CodegenAtomicStatement>(lhs, op, rhs);
@@ -367,7 +375,8 @@ void CodegenLLVMHelperVisitor::ion_write_statements(BlockType type,
         // push index definition, index statement and actual write statement
         int_variables.push_back(index_varname);
         index_statements.push_back(visitor::create_statement(index_statement));
-        body_statements.push_back(create_atomic_statement(ion_to_write, op, rhs));
+        // pass ion variable to write and its index
+        body_statements.push_back(create_atomic_statement(ion_varname, index_varname, op, rhs));
     };
 
     /// iterate over all ions and create write ion statements for given block type

From acfd3c3bb6f3e085b73dab4f4a5cccc611852c3f Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Thu, 13 May 2021 15:25:24 +0200
Subject: [PATCH 055/105] Fix instance struct data generation for
 testing/benchmarking (#641)

* Instance data structure initialization had following bug
   - instance struct has int member variables which act as
     offsets to other vectors (e.g. node_index, na_ion_index)
   - these variables were initialized from 1 to N where N was
     incremented always without considering the upper bound on
     for offset.
* With this fix
   - index / integer variables are always initialized from
     0 to N-1.
   - Variables are initialised 1e-5 prevision so that we have
     reaosanbly bigger values
   - Update tests to check offset from 0 to N-1
---
 test/unit/codegen/codegen_data_helper.cpp     |  9 +++++++-
 test/unit/codegen/codegen_data_helper.hpp     | 23 +++++++++----------
 .../codegen/codegen_llvm_instance_struct.cpp  |  9 +++++---
 3 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/test/unit/codegen/codegen_data_helper.cpp b/test/unit/codegen/codegen_data_helper.cpp
index 4bf94f583d..a0ee6ec957 100644
--- a/test/unit/codegen/codegen_data_helper.cpp
+++ b/test/unit/codegen/codegen_data_helper.cpp
@@ -115,7 +115,14 @@ CodegenInstanceData CodegenDataHelper::create_data(size_t num_elements, size_t s
         // allocate memory and setup a pointer
         void* member;
         posix_memalign(&member, NBYTE_ALIGNMENT, member_size * num_elements);
-        initialize_variable(var, member, variable_index, num_elements);
+
+        // integer values are often offsets so they must start from
+        // 0 to num_elements-1 to avoid out of bound accesses.
+        int initial_value = variable_index;
+        if (type == ast::AstNodeType::INTEGER) {
+            initial_value = 0;
+        }
+        initialize_variable(var, member, initial_value, num_elements);
         data.num_bytes += member_size * num_elements;
 
         // copy address at specific location in the struct
diff --git a/test/unit/codegen/codegen_data_helper.hpp b/test/unit/codegen/codegen_data_helper.hpp
index ef8e869366..76c4f422d9 100644
--- a/test/unit/codegen/codegen_data_helper.hpp
+++ b/test/unit/codegen/codegen_data_helper.hpp
@@ -57,11 +57,12 @@ struct CodegenInstanceData {
 /**
  * Generate vector of dummy data according to the template type specified
  *
- * For double type: generate vector starting from (initial_value + 1e-15)
- *                  with increments of 1e-15
- * For float type:  generate vector starting from (initial_value + 1e-6)
- *                  with increments of 1e-6
- * For int type:    generate vector starting from (initial_value + 1) with
+ * For double or float type: generate vector starting from `initial_value`
+ *                  with an increment of 1e-5. The increment can be any other
+ *                  value but 1e-5 is chosen because when we benchmark with
+ *                  a million elements then the values are in the range of
+ *                  <initial_value, initial_value + 10>.
+ * For int type:    generate vector starting from initial_value with an
  *                  increments of 1
  *
  * \param inital_value Base value for initializing the data
@@ -71,16 +72,14 @@ struct CodegenInstanceData {
 template <typename T>
 std::vector<T> generate_dummy_data(size_t initial_value, size_t num_elements) {
     std::vector<T> data(num_elements);
-    T precision;
-    if (std::is_same<T, double>::value) {
-        precision = 1e-15;
-    } else if (std::is_same<T, float>::value) {
-        precision = 1e-6;
+    T increment;
+    if (std::is_same<T, int>::value) {
+        increment = 1;
     } else {
-        precision = 1;
+        increment = 1e-5;
     }
     for (size_t i = 0; i < num_elements; i++) {
-        data[i] = initial_value + precision * (i + 1);
+        data[i] = initial_value + increment * i;
     }
     return data;
 }
diff --git a/test/unit/codegen/codegen_llvm_instance_struct.cpp b/test/unit/codegen/codegen_llvm_instance_struct.cpp
index 52b9bb9868..e77b6844ae 100644
--- a/test/unit/codegen/codegen_llvm_instance_struct.cpp
+++ b/test/unit/codegen/codegen_llvm_instance_struct.cpp
@@ -132,8 +132,12 @@ SCENARIO("Instance Struct creation", "[visitor][llvm][instance_struct]") {
                             generate_dummy_data<double>(ena_index, num_elements)));
             REQUIRE(compare(instance_data.members[ion_ena_index],
                             generate_dummy_data<double>(ion_ena_index, num_elements)));
+            // index variables are offsets, they start from 0
+            REQUIRE(compare(instance_data.members[ion_ena_index_index],
+                            generate_dummy_data<int>(0, num_elements)));
             REQUIRE(compare(instance_data.members[node_index_index],
-                            generate_dummy_data<int>(node_index_index, num_elements)));
+                            generate_dummy_data<int>(0, num_elements)));
+
             REQUIRE(*static_cast<double*>(instance_data.members[t_index]) ==
                     default_nthread_t_value);
             REQUIRE(*static_cast<int*>(instance_data.members[node_count_index]) == num_elements);
@@ -164,8 +168,7 @@ SCENARIO("Instance Struct creation", "[visitor][llvm][instance_struct]") {
             REQUIRE(compare(instance->ena, generate_dummy_data<double>(ena_index, num_elements)));
             REQUIRE(compare(instance->ion_ena,
                             generate_dummy_data<double>(ion_ena_index, num_elements)));
-            REQUIRE(compare(instance->node_index,
-                            generate_dummy_data<int>(node_index_index, num_elements)));
+            REQUIRE(compare(instance->node_index, generate_dummy_data<int>(0, num_elements)));
             REQUIRE(instance->t == default_nthread_t_value);
             REQUIRE(instance->celsius == default_celsius_value);
             REQUIRE(instance->secondorder == default_second_order_value);

From 0e09468394d595565bc117dba964fc09b5824df0 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 13 May 2021 13:14:13 -0700
Subject: [PATCH 056/105] Basic scatter support (#643)

Added basic support to transform indirect writes into
`llvm.masked.scatter` intrinsic. Currently, the scatter functionality is
limited to non-atomic writes and assignment (e.g. `+=` operator is
not yet supported). Hence, a warning is logged to the console
indicating all limitations.

Corresponding IR and execution tests were also added.

fixes #539
---
 src/codegen/llvm/codegen_llvm_visitor.cpp    | 42 +++++++++--
 src/codegen/llvm/codegen_llvm_visitor.hpp    |  1 +
 src/codegen/llvm/llvm_ir_builder.cpp         |  7 +-
 test/unit/codegen/codegen_llvm_execution.cpp | 76 ++++++++++++++++++++
 test/unit/codegen/codegen_llvm_ir.cpp        | 47 ++++++++++++
 5 files changed, 165 insertions(+), 8 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index a86a5cd8b5..39594169f4 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -37,9 +37,10 @@ static constexpr const char instance_struct_type_name[] = "__instance_var__type"
 
 /// A utility to check for supported Statement AST nodes.
 static bool is_supported_statement(const ast::Statement& statement) {
-    return statement.is_codegen_var_list_statement() || statement.is_expression_statement() ||
-           statement.is_codegen_for_statement() || statement.is_codegen_return_statement() ||
-           statement.is_if_statement() || statement.is_while_statement();
+    return statement.is_codegen_atomic_statement() || statement.is_codegen_for_statement() ||
+           statement.is_if_statement() || statement.is_codegen_return_statement() ||
+           statement.is_codegen_var_list_statement() || statement.is_expression_statement() ||
+           statement.is_while_statement();
 }
 
 /// A utility to check that the kernel body can be vectorised.
@@ -162,10 +163,12 @@ void CodegenLLVMVisitor::create_printf_call(const ast::ExpressionVector& argumen
 }
 
 void CodegenLLVMVisitor::find_kernel_names(std::vector<std::string>& container) {
-    // By convention, only kernel functions have a return type of void.
+    // By convention, only kernel functions have a return type of void and single argument. The
+    // number of arguments check is needed to avoid LLVM void intrinsics to be considered as
+    // kernels.
     const auto& functions = module->getFunctionList();
     for (const auto& func: functions) {
-        if (func.getReturnType()->isVoidTy()) {
+        if (func.getReturnType()->isVoidTy() && llvm::hasSingleElement(func.args())) {
             container.push_back(func.getName().str());
         }
     }
@@ -366,7 +369,7 @@ void CodegenLLVMVisitor::wrap_kernel_functions() {
         if (!kernel)
             throw std::runtime_error("Error: kernel " + kernel_name + " is not found\n");
 
-        if (std::distance(kernel->args().begin(), kernel->args().end()) != 1)
+        if (!llvm::hasSingleElement(kernel->args()))
             throw std::runtime_error("Error: kernel " + kernel_name +
                                      " must have a single argument\n");
 
@@ -443,6 +446,33 @@ void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) {
     ir_builder.create_boolean_constant(node.get_value());
 }
 
+/**
+ * Currently, this functions is very similar to visiting the binary operator. However, the
+ * difference here is that the writes to the LHS variable must be atomic. These has a particular
+ * use case in synapse kernels. For simplicity, we choose not to support atomic writes at this
+ * stage and emit a warning.
+ *
+ * \todo support this properly.
+ */
+void CodegenLLVMVisitor::visit_codegen_atomic_statement(const ast::CodegenAtomicStatement& node) {
+    if (vector_width > 1)
+        logger->warn("Atomic operations are not supported");
+
+    // Support only assignment for now.
+    llvm::Value* rhs = accept_and_get(node.get_rhs());
+    if (node.get_atomic_op().get_value() != ast::BinaryOp::BOP_ASSIGN)
+        throw std::runtime_error(
+            "Error: only assignment is supported for CodegenAtomicStatement\n");
+    const auto& var = dynamic_cast<ast::VarName*>(node.get_lhs().get());
+    if (!var)
+        throw std::runtime_error("Error: only 'VarName' assignment is supported\n");
+
+    // Process the assignment as if it was non-atomic.
+    if (vector_width > 1)
+        logger->warn("Treating write as non-atomic");
+    write_to_variable(*var, rhs);
+}
+
 // Generating FOR loop in LLVM IR creates the following structure:
 //
 //  +---------------------------+
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 0ada7b8097..14a608d3ca 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -155,6 +155,7 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     // Visitors.
     void visit_binary_expression(const ast::BinaryExpression& node) override;
     void visit_boolean(const ast::Boolean& node) override;
+    void visit_codegen_atomic_statement(const ast::CodegenAtomicStatement& node) override;
     void visit_codegen_for_statement(const ast::CodegenForStatement& node) override;
     void visit_codegen_function(const ast::CodegenFunction& node) override;
     void visit_codegen_return_statement(const ast::CodegenReturnStatement& node) override;
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index 2773e6929b..04e36e50cd 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -349,8 +349,11 @@ llvm::Value* IRBuilder::load_to_or_store_from_array(const std::string& id_name,
     // If the vector code is generated, we need to distinguish between two cases. If the array is
     // indexed indirectly (i.e. not by an induction variable `kernel_id`), create a gather
     // instruction.
-    if (id_name != kernel_id && vectorize && instruction_width > 1)
-        return builder.CreateMaskedGather(element_ptr, llvm::Align());
+    if (id_name != kernel_id && vectorize && instruction_width > 1) {
+        return maybe_value_to_store
+                   ? builder.CreateMaskedScatter(maybe_value_to_store, element_ptr, llvm::Align())
+                   : builder.CreateMaskedGather(element_ptr, llvm::Align());
+    }
 
     llvm::Value* ptr;
     if (vectorize && instruction_width > 1) {
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index db6cd08f51..7256f5c826 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -433,3 +433,79 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
         }
     }
 }
+
+//=============================================================================
+// Vectorised kernel with ion writes.
+//=============================================================================
+
+SCENARIO("Vectorised kernel with scatter instruction", "[llvm][runner]") {
+    GIVEN("Simple MOD file with ion writes") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+                USEION ca WRITE cai
+            }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+            }
+
+            DERIVATIVE states {
+                : increment cai to test scatter
+                cai = cai + 1
+            }
+        )";
+
+
+        NmodlDriver driver;
+        const auto& ast = driver.parse_string(nmodl_text);
+
+        // Run passes on the AST to generate LLVM.
+        SymtabVisitor().visit_program(*ast);
+        NeuronSolveVisitor().visit_program(*ast);
+        SolveBlockVisitor().visit_program(*ast);
+        codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
+                                                 /*output_dir=*/".",
+                                                 /*opt_passes=*/false,
+                                                 /*use_single_precision=*/false,
+                                                 /*vector_width=*/2);
+        llvm_visitor.visit_program(*ast);
+        llvm_visitor.wrap_kernel_functions();
+
+        // Create the instance struct data.
+        int num_elements = 5;
+        const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
+        auto codegen_data = codegen::CodegenDataHelper(ast, generated_instance_struct);
+        auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1);
+
+        // Fill the instance struct data with some values.
+        std::vector<double> cai = {1.0, 2.0, 3.0, 4.0, 5.0};
+        std::vector<double> ion_cai = {1.0, 2.0, 3.0, 4.0, 5.0};
+        std::vector<int> ion_cai_index = {4, 2, 3, 0, 1};
+
+        InstanceTestInfo instance_info{&instance_data,
+                                       llvm_visitor.get_instance_var_helper(),
+                                       num_elements};
+        initialise_instance_variable(instance_info, cai, "cai");
+        initialise_instance_variable(instance_info, ion_cai, "ion_cai");
+        initialise_instance_variable(instance_info, ion_cai_index, "ion_cai_index");
+
+        // Set up the JIT runner.
+        std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
+        TestRunner runner(std::move(module));
+        runner.initialize_driver();
+
+        THEN("Ion values in struct have been updated correctly") {
+            runner.run_with_argument<int, void*>("__nrn_state_test_wrapper",
+                                                 instance_data.base_ptr);
+            // cai[id] = ion_cai[ion_cai_index[id]]
+            // cai[id] += 1
+            std::vector<double> cai_expected = {6.0, 4.0, 5.0, 2.0, 3.0};
+            REQUIRE(check_instance_variable(instance_info, cai_expected, "cai"));
+
+            // ion_cai[ion_cai_index[id]] = cai[id]
+            std::vector<double> ion_cai_expected = {2.0, 3.0, 4.0, 5.0, 6.0};
+            REQUIRE(check_instance_variable(instance_info, ion_cai_expected, "ion_cai"));
+        }
+    }
+}
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 1b0f236e3d..5a67a5b445 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -956,6 +956,53 @@ SCENARIO("Vectorised simple kernel", "[visitor][llvm]") {
     }
 }
 
+//=============================================================================
+// Scatter for vectorised kernel
+//=============================================================================
+
+SCENARIO("Vectorised simple kernel with ion writes", "[visitor][llvm]") {
+    GIVEN("An indirect indexing of ca ion") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX hh
+                USEION ca WRITE cai
+            }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+            }
+
+            DERIVATIVE states {}
+        )";
+
+        THEN("a scatter instructions is created") {
+            std::string module_string = run_llvm_visitor(nmodl_text,
+                                                         /*opt=*/false,
+                                                         /*use_single_precision=*/false,
+                                                         /*vector_width=*/4);
+            std::smatch m;
+
+            // Check scatter intrinsic is correctly declared.
+            std::regex declaration(
+                R"(declare void @llvm\.masked\.scatter\.v4f64\.v4p0f64\(<4 x double>, <4 x double\*>, i32 immarg, <4 x i1>\))");
+            REQUIRE(std::regex_search(module_string, m, declaration));
+
+            // Check that the indices vector is created correctly and extended to i64.
+            std::regex index_load(R"(load <4 x i32>, <4 x i32>\* %ion_cai_id)");
+            std::regex sext(R"(sext <4 x i32> %.* to <4 x i64>)");
+            REQUIRE(std::regex_search(module_string, m, index_load));
+            REQUIRE(std::regex_search(module_string, m, sext));
+
+            // Check that store to `ion_cai` is performed via scatter instruction.
+            //      ion_cai[ion_cai_id] = cai[id]
+            std::regex scatter(
+                "call void @llvm\\.masked\\.scatter\\.v4f64\\.v4p0f64\\(<4 x double> %.*, <4 x "
+                "double\\*> %.*, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>\\)");
+            REQUIRE(std::regex_search(module_string, m, scatter));
+        }
+    }
+}
+
 //=============================================================================
 // Derivative block : test optimization
 //=============================================================================

From f7d00dd8105b9256484b2a807ef4f323fa81dceb Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Sun, 16 May 2021 18:07:01 +0200
Subject: [PATCH 057/105] Benchmarking code re-organisation and minor
 improvements (#647)

* Move benchmark + JIT related code from src/codegen/llvm
  to test/benchmark
* Common execution of CodegenLLVMVisitor for llvm --ir and
  benchmark option. With this, ast transformed for LLVM
  code generation is dumped to file.
* Previous object file is removed (if exist) so that output
  file name is same / deterministic
* Benchmark output is always printed to stdout via common
  logger object
* Remove unnecessary LLVMBuildInfo struct
---
 CMakeLists.txt                                |  1 +
 src/CMakeLists.txt                            |  5 +-
 src/codegen/llvm/CMakeLists.txt               |  6 +-
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  5 ++
 src/codegen/llvm/main.cpp                     |  2 +-
 src/main.cpp                                  | 39 +++++-----
 test/benchmark/CMakeLists.txt                 | 17 +++++
 .../llvm => test/benchmark}/jit_driver.cpp    |  8 ++
 .../llvm => test/benchmark}/jit_driver.hpp    |  0
 .../benchmark}/llvm_benchmark.cpp             | 73 +++++--------------
 .../benchmark}/llvm_benchmark.hpp             | 33 +++------
 test/unit/CMakeLists.txt                      |  5 +-
 test/unit/codegen/codegen_llvm_execution.cpp  |  2 +-
 13 files changed, 87 insertions(+), 109 deletions(-)
 create mode 100644 test/benchmark/CMakeLists.txt
 rename {src/codegen/llvm => test/benchmark}/jit_driver.cpp (97%)
 rename {src/codegen/llvm => test/benchmark}/jit_driver.hpp (100%)
 rename {src/codegen/llvm => test/benchmark}/llvm_benchmark.cpp (61%)
 rename {src/codegen/llvm => test/benchmark}/llvm_benchmark.hpp (76%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a16a090f9c..8fbe58984f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -222,6 +222,7 @@ set(MEMORYCHECK_COMMAND_OPTIONS
 # do not enable tests if nmodl is used as submodule
 if(NOT NMODL_AS_SUBPROJECT)
   include(CTest)
+  add_subdirectory(test/benchmark)
   add_subdirectory(test/unit)
   add_subdirectory(test/integration)
 endif()
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ab84cfbd70..697f92ce51 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -34,12 +34,13 @@ target_link_libraries(
   util
   lexer
   ${NMODL_WRAPPER_LIBS})
-cpp_cc_configure_sanitizers(TARGET nmodl)
 
 if(NMODL_ENABLE_LLVM)
-  target_link_libraries(nmodl llvm_codegen llvm_benchmark ${LLVM_LIBS_TO_LINK})
+  target_link_libraries(nmodl llvm_codegen llvm_benchmark benchmark_data ${LLVM_LIBS_TO_LINK})
 endif()
 
+cpp_cc_configure_sanitizers(TARGET nmodl)
+
 # =============================================================================
 # Add dependency with nmodl pytnon module (for consumer projects)
 # =============================================================================
diff --git a/src/codegen/llvm/CMakeLists.txt b/src/codegen/llvm/CMakeLists.txt
index 78380485fa..70398de185 100644
--- a/src/codegen/llvm/CMakeLists.txt
+++ b/src/codegen/llvm/CMakeLists.txt
@@ -6,10 +6,6 @@ set(LLVM_CODEGEN_SOURCE_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_visitor.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_helper_visitor.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_helper_visitor.hpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.hpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_debug_builder.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_debug_builder.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_ir_builder.cpp
@@ -32,9 +28,9 @@ if(NOT NMODL_AS_SUBPROJECT)
   target_link_libraries(
     nmodl_llvm_runner
     CLI11::CLI11
+    llvm_benchmark
     llvm_codegen
     codegen
-    llvm_benchmark
     visitor
     symtab
     lexer
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 14a608d3ca..990485d8e2 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -152,6 +152,11 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
         return instance_var_helper;
     }
 
+    /// Returns vector width
+    int get_vector_width() const {
+        return vector_width;
+    }
+
     // Visitors.
     void visit_binary_expression(const ast::BinaryExpression& node) override;
     void visit_boolean(const ast::Boolean& node) override;
diff --git a/src/codegen/llvm/main.cpp b/src/codegen/llvm/main.cpp
index b700f5ad59..2f4e1f653d 100644
--- a/src/codegen/llvm/main.cpp
+++ b/src/codegen/llvm/main.cpp
@@ -9,8 +9,8 @@
 
 #include "ast/program.hpp"
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
-#include "jit_driver.hpp"
 #include "parser/nmodl_driver.hpp"
+#include "test/benchmark/jit_driver.hpp"
 #include "utils/logger.hpp"
 #include "visitors/symtab_visitor.hpp"
 
diff --git a/src/main.cpp b/src/main.cpp
index d2e73d37da..87a17d6eb2 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -19,7 +19,7 @@
 #include "codegen/codegen_transform_visitor.hpp"
 #ifdef NMODL_LLVM_BACKEND
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
-#include "codegen/llvm/llvm_benchmark.hpp"
+#include "test/benchmark/llvm_benchmark.hpp"
 #endif
 
 #include "config/config.h"
@@ -337,7 +337,7 @@ int main(int argc, const char* argv[]) {
                        fmt::format("Disable debug information ({})", disable_debug_information))->ignore_case();
     llvm_opt->add_flag("--opt",
                        llvm_ir_opt_passes,
-                       fmt::format("Run LLVM optimisation passes ({})", llvm_ir_opt_passes))->ignore_case();
+                       fmt::format("Run few common LLVM IR optimisation passes ({})", llvm_ir_opt_passes))->ignore_case();
     llvm_opt->add_flag("--single-precision",
                        llvm_float_type,
                        fmt::format("Use single precision floating-point types ({})", llvm_float_type))->ignore_case();
@@ -682,26 +682,7 @@ int main(int argc, const char* argv[]) {
             }
 
 #ifdef NMODL_LLVM_BACKEND
-
-            if (run_llvm_benchmark) {
-                logger->info("Running LLVM benchmark");
-                benchmark::LLVMBuildInfo info{llvm_vec_width,
-                                              llvm_ir_opt_passes,
-                                              llvm_float_type,
-                                              vector_library};
-                benchmark::LLVMBenchmark benchmark(modfile,
-                                                   output_dir,
-                                                   shared_lib_paths,
-                                                   info,
-                                                   num_experiments,
-                                                   instance_size,
-                                                   backend,
-                                                   llvm_opt_level_ir,
-                                                   llvm_opt_level_codegen);
-                benchmark.run(ast);
-            }
-
-            else if (llvm_ir) {
+            if (llvm_ir || run_llvm_benchmark) {
                 logger->info("Running LLVM backend code generator");
                 CodegenLLVMVisitor visitor(modfile,
                                            output_dir,
@@ -713,6 +694,20 @@ int main(int argc, const char* argv[]) {
                 visitor.visit_program(*ast);
                 ast_to_nmodl(*ast, filepath("llvm", "mod"));
                 ast_to_json(*ast, filepath("llvm", "json"));
+
+                if (run_llvm_benchmark) {
+                    logger->info("Running LLVM benchmark");
+                    benchmark::LLVMBenchmark benchmark(visitor,
+                                                       modfile,
+                                                       output_dir,
+                                                       shared_lib_paths,
+                                                       num_experiments,
+                                                       instance_size,
+                                                       backend,
+                                                       llvm_opt_level_ir,
+                                                       llvm_opt_level_codegen);
+                    benchmark.run(ast);
+                }
             }
 #endif
         }
diff --git a/test/benchmark/CMakeLists.txt b/test/benchmark/CMakeLists.txt
new file mode 100644
index 0000000000..4441d53251
--- /dev/null
+++ b/test/benchmark/CMakeLists.txt
@@ -0,0 +1,17 @@
+# =============================================================================
+# llvm benchmark sources
+# =============================================================================
+set(LLVM_BENCHMARK_SOURCE_FILES
+    ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.cpp ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.hpp)
+
+# =============================================================================
+# LLVM benchmark library
+# =============================================================================
+include_directories(${LLVM_INCLUDE_DIRS})
+add_library(llvm_benchmark STATIC ${LLVM_BENCHMARK_SOURCE_FILES})
+add_dependencies(llvm_benchmark lexer util visitor)
+
+if(NMODL_ENABLE_JIT_EVENT_LISTENERS)
+  target_compile_definitions(llvm_benchmark PUBLIC NMODL_HAVE_JIT_EVENT_LISTENERS)
+endif()
diff --git a/src/codegen/llvm/jit_driver.cpp b/test/benchmark/jit_driver.cpp
similarity index 97%
rename from src/codegen/llvm/jit_driver.cpp
rename to test/benchmark/jit_driver.cpp
index 2a6842d0fb..a2d8df63f4 100644
--- a/src/codegen/llvm/jit_driver.cpp
+++ b/test/benchmark/jit_driver.cpp
@@ -7,6 +7,7 @@
 
 #include "jit_driver.hpp"
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "utils/common_utils.hpp"
 
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
@@ -247,6 +248,13 @@ void JITDriver::init(std::string features,
 
     // Optionally, dump the binary to the object file.
     if (benchmark_info) {
+        std::string object_file = benchmark_info->filename + ".o";
+        if (utils::file_exists(object_file)) {
+            int status = remove(object_file.c_str());
+            if (status) {
+                throw std::runtime_error("Can not remove object file " + object_file);
+            }
+        }
         jit->getObjTransformLayer().setTransform(
             llvm::orc::DumpObjects(benchmark_info->output_dir, benchmark_info->filename));
     }
diff --git a/src/codegen/llvm/jit_driver.hpp b/test/benchmark/jit_driver.hpp
similarity index 100%
rename from src/codegen/llvm/jit_driver.hpp
rename to test/benchmark/jit_driver.hpp
diff --git a/src/codegen/llvm/llvm_benchmark.cpp b/test/benchmark/llvm_benchmark.cpp
similarity index 61%
rename from src/codegen/llvm/llvm_benchmark.cpp
rename to test/benchmark/llvm_benchmark.cpp
index adbe653f1e..f6811fd664 100644
--- a/src/codegen/llvm/llvm_benchmark.cpp
+++ b/test/benchmark/llvm_benchmark.cpp
@@ -9,8 +9,8 @@
 #include <fstream>
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
-#include "codegen/llvm/jit_driver.hpp"
 #include "llvm_benchmark.hpp"
+#include "test/benchmark/jit_driver.hpp"
 #include "llvm/Support/Host.h"
 
 #include "test/unit/codegen/codegen_data_helper.hpp"
@@ -42,57 +42,42 @@ void LLVMBenchmark::disable(const std::string& feature, std::vector<std::string>
     for (auto& host_feature: host_features) {
         if (feature == host_feature.substr(1)) {
             host_feature[0] = '-';
-            *log_stream << host_feature << "\n";
+            logger->info("{}", host_feature);
             return;
         }
     }
 }
 
 void LLVMBenchmark::run(const std::shared_ptr<ast::Program>& node) {
-    // First, set the output stream for the logs.
-    set_log_output();
-
-    // Then, record the time taken for building the LLVM IR module.
-    codegen::CodegenLLVMVisitor visitor(mod_filename,
-                                        output_dir,
-                                        llvm_build_info.opt_passes,
-                                        llvm_build_info.use_single_precision,
-                                        llvm_build_info.vector_width,
-                                        llvm_build_info.vec_lib,
-                                        /*add_debug_information=*/true);
-    generate_llvm(visitor, node);
-
+    // create functions
+    generate_llvm(node);
     // Finally, run the benchmark and log the measurements.
-    run_benchmark(visitor, node);
+    run_benchmark(node);
 }
 
-void LLVMBenchmark::generate_llvm(codegen::CodegenLLVMVisitor& visitor,
-                                  const std::shared_ptr<ast::Program>& node) {
+void LLVMBenchmark::generate_llvm(const std::shared_ptr<ast::Program>& node) {
     // First, visit the AST to build the LLVM IR module and wrap the kernel function calls.
     auto start = std::chrono::high_resolution_clock::now();
-    visitor.visit_program(*node);
-    visitor.wrap_kernel_functions();
+    llvm_visitor.wrap_kernel_functions();
     auto end = std::chrono::high_resolution_clock::now();
 
     // Log the time taken to visit the AST and build LLVM IR.
     std::chrono::duration<double> diff = end - start;
-    *log_stream << "Created LLVM IR module from NMODL AST in " << std::setprecision(PRECISION)
-                << diff.count() << "\n\n";
+    logger->info("Created LLVM IR module from NMODL AST in {} sec", diff.count());
 }
 
-void LLVMBenchmark::run_benchmark(codegen::CodegenLLVMVisitor& visitor,
-                                  const std::shared_ptr<ast::Program>& node) {
+void LLVMBenchmark::run_benchmark(const std::shared_ptr<ast::Program>& node) {
     // Set the codegen data helper and find the kernels.
-    auto codegen_data = codegen::CodegenDataHelper(node, visitor.get_instance_struct_ptr());
+    auto codegen_data = codegen::CodegenDataHelper(node, llvm_visitor.get_instance_struct_ptr());
     std::vector<std::string> kernel_names;
-    visitor.find_kernel_names(kernel_names);
+    llvm_visitor.find_kernel_names(kernel_names);
 
     // Get feature's string and turn them off depending on the backend.
     std::vector<std::string> features = get_cpu_features();
-    *log_stream << "Backend: " << backend << "\n";
+    logger->info("Backend: {}", backend);
     if (backend == "avx2") {
         // Disable SSE.
-        *log_stream << "Disabling features:\n";
+        logger->info("Disabling features:");
         disable("sse", features);
         disable("sse2", features);
         disable("sse3", features);
@@ -100,16 +85,17 @@ void LLVMBenchmark::run_benchmark(codegen::CodegenLLVMVisitor& visitor,
         disable("sse4.2", features);
     } else if (backend == "sse2") {
         // Disable AVX.
-        *log_stream << "Disabling features:\n";
+        logger->info("Disabling features:");
         disable("avx", features);
         disable("avx2", features);
     }
 
     std::string features_str = llvm::join(features.begin(), features.end(), ",");
-    std::unique_ptr<llvm::Module> m = visitor.get_module();
+    std::unique_ptr<llvm::Module> m = llvm_visitor.get_module();
 
     // Create the benchmark runner and initialize it.
-    std::string filename = "v" + std::to_string(llvm_build_info.vector_width) + "_" + mod_filename;
+    std::string filename = "v" + std::to_string(llvm_visitor.get_vector_width()) + "_" +
+                           mod_filename;
     runner::BenchmarkRunner runner(std::move(m),
                                    filename,
                                    output_dir,
@@ -125,7 +111,7 @@ void LLVMBenchmark::run_benchmark(codegen::CodegenLLVMVisitor& visitor,
         auto instance_data = codegen_data.create_data(instance_size, /*seed=*/1);
 
         double size_mbs = instance_data.num_bytes / (1024.0 * 1024.0);
-        *log_stream << "Benchmarking kernel '" << kernel_name << ", with " << size_mbs << " MBs\n";
+        logger->info("Benchmarking kernel '{}' with {} MBs dataset", kernel_name, size_mbs);
 
         // For every kernel run the benchmark `num_experiments` times.
         double time_sum = 0.0;
@@ -138,32 +124,13 @@ void LLVMBenchmark::run_benchmark(codegen::CodegenLLVMVisitor& visitor,
             std::chrono::duration<double> diff = end - start;
 
             // Log the time taken for each run.
-            *log_stream << "Experiment " << i << ": compute time = " << std::setprecision(9)
-                        << diff.count() << "\n";
+            logger->info("Experiment {} compute time = {:.6f} sec", i, diff.count());
 
             time_sum += diff.count();
         }
         // Log the average time taken for the kernel.
-        *log_stream << "Average compute time = " << std::setprecision(PRECISION)
-                    << time_sum / num_experiments << "\n\n";
-    }
-}
-
-void LLVMBenchmark::set_log_output() {
-    // If the output directory is not specified, dump logs to the console.
-    if (output_dir == ".") {
-        log_stream = std::make_shared<std::ostream>(std::cout.rdbuf());
-        return;
+        logger->info("Average compute time = {:.6f} \n", time_sum / num_experiments);
     }
-
-    // Otherwise, dump logs to the specified file.
-    std::string filename = output_dir + "/" + mod_filename + ".log";
-    ofs.open(filename.c_str());
-
-    if (ofs.fail())
-        throw std::runtime_error("Error while opening a file '" + filename + "'");
-
-    log_stream = std::make_shared<std::ostream>(ofs.rdbuf());
 }
 
 }  // namespace benchmark
diff --git a/src/codegen/llvm/llvm_benchmark.hpp b/test/benchmark/llvm_benchmark.hpp
similarity index 76%
rename from src/codegen/llvm/llvm_benchmark.hpp
rename to test/benchmark/llvm_benchmark.hpp
index c2c781d7f0..9696191172 100644
--- a/src/codegen/llvm/llvm_benchmark.hpp
+++ b/test/benchmark/llvm_benchmark.hpp
@@ -10,19 +10,11 @@
 #include <string>
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
-
+#include "utils/logger.hpp"
 
 namespace nmodl {
 namespace benchmark {
 
-/// A struct to hold LLVM visitor information.
-struct LLVMBuildInfo {
-    int vector_width;
-    bool opt_passes;
-    bool use_single_precision;
-    std::string vec_lib;
-};
-
 /**
  * \class LLVMBenchmark
  * \brief A wrapper to execute MOD file kernels via LLVM IR backend, and
@@ -30,6 +22,9 @@ struct LLVMBuildInfo {
  */
 class LLVMBenchmark {
   private:
+    /// LLVM visitor.
+    codegen::CodegenLLVMVisitor& llvm_visitor;
+
     /// Source MOD file name.
     std::string mod_filename;
 
@@ -54,32 +49,26 @@ class LLVMBenchmark {
     /// Optimisation level for machine code generation.
     int opt_level_codegen;
 
-    /// LLVM visitor information.
-    LLVMBuildInfo llvm_build_info;
-
-    /// The log output stream (file or stdout).
-    std::shared_ptr<std::ostream> log_stream;
-
     /// Filestream for dumping logs to the file.
     std::ofstream ofs;
 
   public:
-    LLVMBenchmark(const std::string& mod_filename,
+    LLVMBenchmark(codegen::CodegenLLVMVisitor& llvm_visitor,
+                  const std::string& mod_filename,
                   const std::string& output_dir,
                   std::vector<std::string> shared_libs,
-                  LLVMBuildInfo info,
                   int num_experiments,
                   int instance_size,
                   const std::string& backend,
                   int opt_level_ir,
                   int opt_level_codegen)
-        : mod_filename(mod_filename)
+        : llvm_visitor(llvm_visitor)
+        , mod_filename(mod_filename)
         , output_dir(output_dir)
         , shared_libs(shared_libs)
         , num_experiments(num_experiments)
         , instance_size(instance_size)
         , backend(backend)
-        , llvm_build_info(info)
         , opt_level_ir(opt_level_ir)
         , opt_level_codegen(opt_level_codegen) {}
 
@@ -91,12 +80,10 @@ class LLVMBenchmark {
     void disable(const std::string& feature, std::vector<std::string>& host_features);
 
     /// Visits the AST to construct the LLVM IR module.
-    void generate_llvm(codegen::CodegenLLVMVisitor& visitor,
-                       const std::shared_ptr<ast::Program>& node);
+    void generate_llvm(const std::shared_ptr<ast::Program>& node);
 
     /// Runs the main body of the benchmark, executing the compute kernels.
-    void run_benchmark(codegen::CodegenLLVMVisitor& visitor,
-                       const std::shared_ptr<ast::Program>& node);
+    void run_benchmark(const std::shared_ptr<ast::Program>& node);
 
     /// Sets the log output stream (file or console).
     void set_log_output();
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index a82ff8ff19..c98c919728 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -110,8 +110,8 @@ target_link_libraries(
 if(NMODL_ENABLE_LLVM)
   include_directories(${LLVM_INCLUDE_DIRS} codegen)
 
-  add_library(llvm_benchmark STATIC codegen/codegen_data_helper.cpp)
-  add_dependencies(llvm_benchmark lexer)
+  add_library(benchmark_data STATIC codegen/codegen_data_helper.cpp)
+  add_dependencies(benchmark_data lexer)
 
   add_executable(testllvm visitor/main.cpp codegen/codegen_llvm_ir.cpp
                           codegen/codegen_data_helper.cpp codegen/codegen_llvm_instance_struct.cpp)
@@ -134,6 +134,7 @@ if(NMODL_ENABLE_LLVM)
     test_llvm_runner
     Catch2::Catch2
     llvm_codegen
+    llvm_benchmark
     codegen
     visitor
     symtab
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index 7256f5c826..36d13447bf 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -10,9 +10,9 @@
 
 #include "ast/program.hpp"
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
-#include "codegen/llvm/jit_driver.hpp"
 #include "codegen_data_helper.hpp"
 #include "parser/nmodl_driver.hpp"
+#include "test/benchmark/jit_driver.hpp"
 #include "visitors/checkparent_visitor.hpp"
 #include "visitors/neuron_solve_visitor.hpp"
 #include "visitors/solve_block_visitor.hpp"

From ac283f27755ba7d32b5174f48c255add3635a830 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Sun, 16 May 2021 22:04:31 -0700
Subject: [PATCH 058/105] Added attributes and metadata to LLVM IR compute
 kernels (#648)

Previously, there was no metadata and attributes associated with the
instance struct pointer, compute kernels or loops. This commit fixes
this.

- New instance struct attributes

Since all pointers contained in the instance struct do not alias, we add
a `noalias` (LLVM's `__restrict` alternative) attribute to it. In addition,
we add `nocapture` (No capturing occurs in the function) and
`readonly` (Struct pointer is not written to) attributes.

This means that some load instructions can be moved out from the loop
body. Example:
```llvm
; BEFORE
for.body.lr.ph:                                   ; preds = %0
  %5 = getelementptr inbounds %avx__instance_var__type, %avx__instance_var__type* %mech1, i64 0, i32 1
  br label %for.body

for.body:                                         ; preds = %for.body.lr.ph, %for.body
  %15 = load double*, double** %5, align 8
  ; ...


; AFTER
for.body.lr.ph:                                   ; preds = %0
  %5 = getelementptr inbounds %avx__instance_var__type, %avx__instance_var__type* %mech1, i64 0, i32 1
  %6 = load double*, double** %5, align 8
  br label %for.body
```

- New function attributes

Now, compute kernels are marked with `nofree` and `nounwind`
attributes.

- Loop metadata

Also, loop metadata is added to scalar kernels, specifying that no
vectorization is needed. The reason for this is because we want to
benchmark truly scalar kernels, and disable LLVM's vectorization if
necessary.

Note that for vector loop epilogue there is no metadata that disables
vectorization.

fixes #607
---
 src/codegen/llvm/codegen_llvm_visitor.cpp | 93 +++++++++++++----------
 src/codegen/llvm/codegen_llvm_visitor.hpp |  3 +
 src/codegen/llvm/llvm_ir_builder.cpp      | 54 ++++++++++++-
 src/codegen/llvm/llvm_ir_builder.hpp      | 13 +++-
 test/unit/codegen/codegen_llvm_ir.cpp     | 21 ++++-
 5 files changed, 134 insertions(+), 50 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 39594169f4..2124ad82c9 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -163,13 +163,11 @@ void CodegenLLVMVisitor::create_printf_call(const ast::ExpressionVector& argumen
 }
 
 void CodegenLLVMVisitor::find_kernel_names(std::vector<std::string>& container) {
-    // By convention, only kernel functions have a return type of void and single argument. The
-    // number of arguments check is needed to avoid LLVM void intrinsics to be considered as
-    // kernels.
-    const auto& functions = module->getFunctionList();
-    for (const auto& func: functions) {
-        if (func.getReturnType()->isVoidTy() && llvm::hasSingleElement(func.args())) {
-            container.push_back(func.getName().str());
+    auto& functions = module->getFunctionList();
+    for (auto& func: functions) {
+        const std::string name = func.getName().str();
+        if (is_kernel_function(name)) {
+            container.push_back(name);
         }
     }
 }
@@ -239,6 +237,36 @@ int CodegenLLVMVisitor::get_num_elements(const ast::IndexedName& node) {
     return static_cast<int>(*macro->get_value());
 }
 
+/**
+ * Currently, functions are identified as compute kernels if they satisfy the following:
+ *   1. They have a void return type
+ *   2. They have a single argument
+ *   3. The argument is a struct type pointer
+ * This is not robust, and hence it would be better to find what functions are kernels on the NMODL
+ * AST side (e.g. via a flag, or via names list).
+ *
+ * \todo identify kernels on NMODL AST side.
+ */
+bool CodegenLLVMVisitor::is_kernel_function(const std::string& function_name) {
+    llvm::Function* function = module->getFunction(function_name);
+    if (!function)
+        throw std::runtime_error("Error: function " + function_name + " does not exist\n");
+
+    // By convention, only kernel functions have a return type of void and single argument. The
+    // number of arguments check is needed to avoid LLVM void intrinsics to be considered as
+    // kernels.
+    if (!function->getReturnType()->isVoidTy() || !llvm::hasSingleElement(function->args()))
+        return false;
+
+    // Kernel's argument is a pointer to the instance struct type.
+    llvm::Type* arg_type = function->getArg(0)->getType();
+    if (auto pointer_type = llvm::dyn_cast<llvm::PointerType>(arg_type)) {
+        if (pointer_type->getElementType()->isStructTy())
+            return true;
+    }
+    return false;
+}
+
 llvm::Value* CodegenLLVMVisitor::read_from_or_write_to_instance(const ast::CodegenInstanceVar& node,
                                                                 llvm::Value* maybe_value_to_store) {
     const auto& instance_name = node.get_instance_var()->get_node_name();
@@ -364,20 +392,8 @@ void CodegenLLVMVisitor::wrap_kernel_functions() {
     find_kernel_names(kernel_names);
 
     for (const auto& kernel_name: kernel_names) {
-        // Get the kernel function and the instance struct type.
+        // Get the kernel function.
         auto kernel = module->getFunction(kernel_name);
-        if (!kernel)
-            throw std::runtime_error("Error: kernel " + kernel_name + " is not found\n");
-
-        if (!llvm::hasSingleElement(kernel->args()))
-            throw std::runtime_error("Error: kernel " + kernel_name +
-                                     " must have a single argument\n");
-
-        auto instance_struct_ptr_type = llvm::dyn_cast<llvm::PointerType>(
-            kernel->getArg(0)->getType());
-        if (!instance_struct_ptr_type)
-            throw std::runtime_error("Error: kernel " + kernel_name +
-                                     " does not have an instance struct pointer as an argument\n");
 
         // Create a wrapper void function that takes a void pointer as a single argument.
         llvm::Type* i32_type = ir_builder.get_i32_type();
@@ -398,7 +414,7 @@ void CodegenLLVMVisitor::wrap_kernel_functions() {
         // Proceed with bitcasting the void pointer to the struct pointer type, calling the kernel
         // and adding a terminator.
         llvm::Value* bitcasted = ir_builder.create_bitcast(wrapper_func->getArg(0),
-                                                           instance_struct_ptr_type);
+                                                           kernel->getArg(0)->getType());
         ValueVector args;
         args.push_back(bitcasted);
         ir_builder.create_function_call(kernel, args, /*use_result=*/false);
@@ -522,9 +538,6 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     llvm::BasicBlock* for_inc = llvm::BasicBlock::Create(*context, /*Name=*/"for.inc", func, next);
     llvm::BasicBlock* exit = llvm::BasicBlock::Create(*context, /*Name=*/"for.exit", func, next);
 
-    // Save the vector width.
-    int tmp_vector_width = vector_width;
-
     // Check if the kernel can be vectorised. If not, generate scalar code.
     if (!can_vectorize(node, sym_tab)) {
         logger->info("Cannot vectorise the for loop in '" + ir_builder.get_current_function_name() +
@@ -534,21 +547,20 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
         ir_builder.generate_scalar_code();
     }
 
-    // First, initialise the loop in the same basic block. This block is optional. Also, reset
-    // vector width to 1 if processing the remainder of the loop.
-    if (node.get_initialization()) {
+    // First, initialise the loop in the same basic block. This block is optional. Also, generate
+    // scalar code if processing the remainder of the loop.
+    if (node.get_initialization())
         node.get_initialization()->accept(*this);
-    } else {
-        vector_width = 1;
+    else
         ir_builder.generate_scalar_code();
-    }
 
     // Branch to condition basic block and insert condition code there.
     ir_builder.create_br_and_set_insertion_point(for_cond);
 
     // Extract the condition to decide whether to branch to the loop body or loop exit.
     llvm::Value* cond = accept_and_get(node.get_condition());
-    ir_builder.create_cond_br(cond, for_body, exit);
+    llvm::BranchInst* loop_br = ir_builder.create_cond_br(cond, for_body, exit);
+    ir_builder.set_loop_metadata(loop_br);
 
     // Generate code for the loop body and create the basic block for the increment.
     ir_builder.set_insertion_point(for_body);
@@ -560,11 +572,9 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     // Process increment.
     node.get_increment()->accept(*this);
 
-    // Create a branch to condition block, then generate exit code out of the loop. Restore the
-    // vector width.
+    // Create a branch to condition block, then generate exit code out of the loop.
     ir_builder.create_br(for_cond);
     ir_builder.set_insertion_point(exit);
-    vector_width = tmp_vector_width;
     ir_builder.generate_vectorized_code();
     ir_builder.start_vectorization();
 }
@@ -578,7 +588,7 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
 
     // Create the entry basic block of the function/procedure and point the local named values table
     // to the symbol table.
-    llvm::BasicBlock* body = ir_builder.create_block_and_set_insertion_point(func);
+    ir_builder.create_block_and_set_insertion_point(func);
 
     // When processing a function, it returns a value named <function_name> in NMODL. Therefore, we
     // first run RenameVisitor to rename it into ret_<function_name>. This will aid in avoiding
@@ -588,14 +598,12 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
     visitor::RenameVisitor v(name, return_var_name);
     block->accept(v);
 
-
     // Allocate parameters on the stack and add them to the symbol table.
     ir_builder.allocate_function_arguments(func, arguments);
 
     // Process function or procedure body. If the function is a compute kernel, then set the
-    // corresponding flags. The return statement is handled in a separate visitor.
-    bool has_void_ret_type = node.get_return_type()->get_type() == ast::AstNodeType::VOID;
-    if (has_void_ret_type) {
+    // corresponding flags. If so, the return statement is handled in a separate visitor.
+    if (is_kernel_function(name)) {
         ir_builder.start_vectorization();
         block->accept(*this);
         ir_builder.stop_vectorization();
@@ -603,9 +611,12 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
         block->accept(*this);
     }
 
-    // If function has a void return type, add a terminator not handled by CodegenReturnVar.
-    if (has_void_ret_type)
+    // If function is a compute kernel, add a void terminator explicitly, since there is no
+    // `CodegenReturnVar` node. Also, set the necessary attributes.
+    if (is_kernel_function(name)) {
+        ir_builder.set_kernel_attributes();
         ir_builder.create_return();
+    }
 
     // Clear local values stack and remove the pointer to the local symbol table.
     ir_builder.clear_function();
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 990485d8e2..22505a304c 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -216,6 +216,9 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     /// Returns the number of elements in the array specified by the IndexedName AST node.
     int get_num_elements(const ast::IndexedName& node);
 
+    /// Returns whether the function is an NMODL compute kernel.
+    bool is_kernel_function(const std::string& function_name);
+
     /// If the value to store is specified, writes it to the instance. Otherwise, returns the
     /// instance variable.
     llvm::Value* read_from_or_write_to_instance(const ast::CodegenInstanceVar& node,
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index 04e36e50cd..06ba8d00ef 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -174,6 +174,52 @@ void IRBuilder::create_intrinsic(const std::string& name,
     }
 }
 
+void IRBuilder::set_kernel_attributes() {
+    // By convention, the compute kernel does not free memory and does not throw exceptions.
+    current_function->setDoesNotFreeMemory();
+    current_function->setDoesNotThrow();
+
+    // We also want to specify that the pointers that instance struct holds, do not alias. In order
+    // to do that, we add a `noalias` attribute to the argument. As per Clang's specification:
+    //  > The `noalias` attribute indicates that the only memory accesses inside function are loads
+    //  > and stores from objects pointed to by its pointer-typed arguments, with arbitrary
+    //  > offsets.
+    current_function->addParamAttr(0, llvm::Attribute::NoAlias);
+
+    // Finally, specify that the struct pointer does not capture and is read-only.
+    current_function->addParamAttr(0, llvm::Attribute::NoCapture);
+    current_function->addParamAttr(0, llvm::Attribute::ReadOnly);
+}
+
+/****************************************************************************************/
+/*                                LLVM metadata utilities                               */
+/****************************************************************************************/
+
+void IRBuilder::set_loop_metadata(llvm::BranchInst* branch) {
+    llvm::LLVMContext& context = builder.getContext();
+    MetadataVector loop_metadata;
+
+    // Add nullptr to reserve the first place for loop's metadata self-reference.
+    loop_metadata.push_back(nullptr);
+
+    // If `vector_width` is 1, explicitly disable vectorization for benchmarking purposes.
+    if (vector_width == 1) {
+        llvm::MDString* name = llvm::MDString::get(context, "llvm.loop.vectorize.enable");
+        llvm::Value* false_value = llvm::ConstantInt::get(get_boolean_type(), 0);
+        llvm::ValueAsMetadata* value = llvm::ValueAsMetadata::get(false_value);
+        loop_metadata.push_back(llvm::MDNode::get(context, {name, value}));
+    }
+
+    // No metadata to add.
+    if (loop_metadata.size() <= 1)
+        return;
+
+    // Add loop's metadata self-reference and attach it to the branch.
+    llvm::MDNode* metadata = llvm::MDNode::get(context, loop_metadata);
+    metadata->replaceOperandWith(0, metadata);
+    branch->setMetadata(llvm::LLVMContext::MD_loop, metadata);
+}
+
 /****************************************************************************************/
 /*                             LLVM instruction utilities                               */
 /****************************************************************************************/
@@ -412,10 +458,10 @@ void IRBuilder::create_br_and_set_insertion_point(llvm::BasicBlock* block) {
     builder.SetInsertPoint(block);
 }
 
-void IRBuilder::create_cond_br(llvm::Value* condition,
-                               llvm::BasicBlock* true_block,
-                               llvm::BasicBlock* false_block) {
-    builder.CreateCondBr(condition, true_block, false_block);
+llvm::BranchInst* IRBuilder::create_cond_br(llvm::Value* condition,
+                                            llvm::BasicBlock* true_block,
+                                            llvm::BasicBlock* false_block) {
+    return builder.CreateCondBr(condition, true_block, false_block);
 }
 
 llvm::BasicBlock* IRBuilder::get_current_block() {
diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp
index b1b23ff0cf..e0cda2cf93 100644
--- a/src/codegen/llvm/llvm_ir_builder.hpp
+++ b/src/codegen/llvm/llvm_ir_builder.hpp
@@ -24,6 +24,7 @@ static constexpr const unsigned double_precision = 64;
 
 /// Some typedefs.
 using ConstantVector = std::vector<llvm::Constant*>;
+using MetadataVector = std::vector<llvm::Metadata*>;
 using TypeVector = std::vector<llvm::Type*>;
 using ValueVector = std::vector<llvm::Value*>;
 
@@ -137,9 +138,9 @@ class IRBuilder {
     void create_br_and_set_insertion_point(llvm::BasicBlock* block);
 
     /// Generates LLVM IR for conditional branch.
-    void create_cond_br(llvm::Value* condition,
-                        llvm::BasicBlock* true_block,
-                        llvm::BasicBlock* false_block);
+    llvm::BranchInst* create_cond_br(llvm::Value* condition,
+                                     llvm::BasicBlock* true_block,
+                                     llvm::BasicBlock* false_block);
 
     /// Generates LLVM IR for the boolean constant.
     void create_boolean_constant(int value);
@@ -249,6 +250,12 @@ class IRBuilder {
     /// Sets builder's insertion point to the given block.
     void set_insertion_point(llvm::BasicBlock* block);
 
+    /// Sets the necessary attributes for the kernel and its arguments.
+    void set_kernel_attributes();
+
+    /// Sets the loop metadata for the given branch from the loop.
+    void set_loop_metadata(llvm::BranchInst* branch);
+
     /// Pops the last visited value from the value stack.
     llvm::Value* pop_last_value();
 
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 5a67a5b445..fb4593c4a6 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -838,15 +838,19 @@ SCENARIO("Scalar state kernel", "[visitor][llvm]") {
             std::string module_string = run_llvm_visitor(nmodl_text);
             std::smatch m;
 
-            // Check the struct type and the kernel declaration.
+            // Check the struct type with correct attributes and the kernel declaration.
             std::regex struct_type(
                 "%.*__instance_var__type = type \\{ double\\*, double\\*, double\\*, double\\*, "
                 "double\\*, double\\*, double\\*, i32\\*, double, double, double, i32, i32 \\}");
             std::regex kernel_declaration(
-                R"(define void @nrn_state_hh\(%.*__instance_var__type\* .*\))");
+                R"(define void @nrn_state_hh\(%.*__instance_var__type\* noalias nocapture readonly .*\) #0)");
             REQUIRE(std::regex_search(module_string, m, struct_type));
             REQUIRE(std::regex_search(module_string, m, kernel_declaration));
 
+            // Check kernel attributes.
+            std::regex kernel_attributes(R"(attributes #0 = \{ nofree nounwind \})");
+            REQUIRE(std::regex_search(module_string, m, kernel_attributes));
+
             // Check for correct variables initialisation and a branch to condition block.
             std::regex id_initialisation(R"(%id = alloca i32)");
             std::regex node_id_initialisation(R"(%node_id = alloca i32)");
@@ -871,6 +875,15 @@ SCENARIO("Scalar state kernel", "[visitor][llvm]") {
             REQUIRE(std::regex_search(module_string, m, condition));
             REQUIRE(std::regex_search(module_string, m, cond_br));
 
+            // Check that loop metadata is attached to the scalar kernel.
+            std::regex loop_metadata(R"(!llvm\.loop !0)");
+            std::regex loop_metadata_self_reference(R"(!0 = distinct !\{!0, !1\})");
+            std::regex loop_metadata_disable_vectorization(
+                R"(!1 = !\{!\"llvm\.loop\.vectorize\.enable\", i1 false\})");
+            REQUIRE(std::regex_search(module_string, m, loop_metadata));
+            REQUIRE(std::regex_search(module_string, m, loop_metadata_self_reference));
+            REQUIRE(std::regex_search(module_string, m, loop_metadata_disable_vectorization));
+
             // Check for correct loads from the struct with GEPs.
             std::regex load_from_struct(
                 "  %.* = load %.*__instance_var__type\\*, %.*__instance_var__type\\*\\* %.*\n"
@@ -934,6 +947,10 @@ SCENARIO("Vectorised simple kernel", "[visitor][llvm]") {
                                                          /*vector_width=*/4);
             std::smatch m;
 
+            // Check that no loop metadata is attached.
+            std::regex loop_metadata(R"(!llvm\.loop !.*)");
+            REQUIRE(!std::regex_search(module_string, m, loop_metadata));
+
             // Check gather intrinsic is correctly declared.
             std::regex declaration(
                 R"(declare <4 x double> @llvm\.masked\.gather\.v4f64\.v4p0f64\(<4 x double\*>, i32 immarg, <4 x i1>, <4 x double>\) )");

From 05e9cfafc2bb2014506db56e57ec9fd9c9e5e0d6 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 18 May 2021 03:14:08 -0700
Subject: [PATCH 059/105] Added loaded value to the stack (#655)

- fixes the case, where loaded value was taken from the stack, but was never actually put there
---
 src/codegen/llvm/llvm_ir_builder.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index 06ba8d00ef..8828aa83c5 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -319,12 +319,16 @@ llvm::Value* IRBuilder::create_index(llvm::Value* value) {
 llvm::Value* IRBuilder::create_load(const std::string& name) {
     llvm::Value* ptr = lookup_value(name);
     llvm::Type* loaded_type = ptr->getType()->getPointerElementType();
-    return builder.CreateLoad(loaded_type, ptr);
+    llvm::Value* loaded = builder.CreateLoad(loaded_type, ptr);
+    value_stack.push_back(loaded);
+    return loaded;
 }
 
 llvm::Value* IRBuilder::create_load(llvm::Value* ptr) {
     llvm::Type* loaded_type = ptr->getType()->getPointerElementType();
-    return builder.CreateLoad(loaded_type, ptr);
+    llvm::Value* loaded = builder.CreateLoad(loaded_type, ptr);
+    value_stack.push_back(loaded);
+    return loaded;
 }
 
 llvm::Value* IRBuilder::create_load_from_array(const std::string& name, llvm::Value* index) {

From 9953758cedf6a9e68a601470e94b19594d799660 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 20 May 2021 00:32:40 -0700
Subject: [PATCH 060/105] Basic predication support for LLVM backend (#652)

Added support for vector predication. Currently, we support a very basic
predication pattern (that will be extended in the future):
```c++
IF (/*condition*/) {
  // code here, no nested conditionals
} ELSE {
  // code here, no nested conditionals
}
```
**What has been changed and added**

1. Removed vectorization check

Before, in the `FOR` statement visitor we were checking whether the code
can be vectorized. After refactoring `llvm::IRBuilder<>` into a separate class,
there is no interface to reset the builder's vector width. Hence, this check leads
to visitor having scalar vector width of 1, and builder having the same vector width.
```c++
if (!can_vectorize(node, sym_tab)) {
    vector_width = 1;
     ir_builder.generate_scalar_code();
}
```
In order to avoid any issues, this check is simply removed and will be added in
the separate PR.

2. Predication support

- `can_vectorize` has been changed to support a single `IF` or `IF/ELSE` pair.
- A special vectorized `IF` AST node visitor has been added.
- If generating code within `IF` AST node, instructions are masked.

3. Added execution and IR tests

fixes #539
---
 .../llvm/codegen_llvm_helper_visitor.cpp      |   2 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp     |  90 ++++++++++------
 src/codegen/llvm/codegen_llvm_visitor.hpp     |   3 +
 src/codegen/llvm/llvm_ir_builder.cpp          |  83 ++++++++++----
 src/codegen/llvm/llvm_ir_builder.hpp          |  56 ++++++----
 test/unit/codegen/codegen_llvm_execution.cpp  | 101 ++++++++++++++++++
 test/unit/codegen/codegen_llvm_ir.cpp         |  69 ++++++++++++
 7 files changed, 326 insertions(+), 78 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 9b4dad55ee..578aaaa2b4 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -251,7 +251,7 @@ static void append_statements_from_block(ast::StatementVector& statements,
     for (const auto& statement: block_statements) {
         const auto& expression_statement = std::dynamic_pointer_cast<ast::ExpressionStatement>(
             statement);
-        if (!expression_statement->get_expression()->is_solve_block())
+        if (!expression_statement || !expression_statement->get_expression()->is_solve_block())
             statements.push_back(statement);
     }
 }
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 2124ad82c9..ec41008da0 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -54,11 +54,15 @@ static bool can_vectorize(const ast::CodegenForStatement& statement, symtab::Sym
             return false;
     }
 
-    // Check there is no control flow in the kernel.
-    const std::vector<ast::AstNodeType> unsupported_nodes = {ast::AstNodeType::IF_STATEMENT};
-    const auto& collected = collect_nodes(statement, unsupported_nodes);
+    // Check for simple supported control flow in the kernel (single if/else statement).
+    const std::vector<ast::AstNodeType> supported_control_flow = {ast::AstNodeType::IF_STATEMENT};
+    const auto& supported = collect_nodes(statement, supported_control_flow);
 
-    return collected.empty();
+    // Check for unsupported control flow statements.
+    const std::vector<ast::AstNodeType> unsupported_nodes = {ast::AstNodeType::ELSE_IF_STATEMENT};
+    const auto& unsupported = collect_nodes(statement, unsupported_nodes);
+
+    return unsupported.empty() && supported.size() <= 1;
 }
 
 llvm::Value* CodegenLLVMVisitor::accept_and_get(const std::shared_ptr<ast::Node>& node) {
@@ -162,6 +166,27 @@ void CodegenLLVMVisitor::create_printf_call(const ast::ExpressionVector& argumen
     ir_builder.create_function_call(printf, argument_values, /*use_result=*/false);
 }
 
+void CodegenLLVMVisitor::create_vectorized_control_flow_block(const ast::IfStatement& node) {
+    // Get the true mask from the condition statement.
+    llvm::Value* true_mask = accept_and_get(node.get_condition());
+
+    // Process the true block.
+    ir_builder.set_mask(true_mask);
+    node.get_statement_block()->accept(*this);
+
+    // Note: by default, we do not support kernels with complicated control flow. This is checked
+    // prior to visiting 'CodegenForStatement`.
+    const auto& elses = node.get_elses();
+    if (elses) {
+        // If `else` statement exists, invert the mask and proceed with code generation.
+        ir_builder.invert_mask();
+        elses->get_statement_block()->accept(*this);
+    }
+
+    // Clear the mask value.
+    ir_builder.clear_mask();
+}
+
 void CodegenLLVMVisitor::find_kernel_names(std::vector<std::string>& container) {
     auto& functions = module->getFunctionList();
     for (auto& func: functions) {
@@ -325,7 +350,8 @@ llvm::Value* CodegenLLVMVisitor::read_variable(const ast::VarName& node) {
     const auto& identifier = node.get_name();
 
     if (identifier->is_name()) {
-        return ir_builder.create_load(node.get_node_name());
+        return ir_builder.create_load(node.get_node_name(),
+                                      /*masked=*/ir_builder.generates_predicated_ir());
     }
 
     if (identifier->is_indexed_name()) {
@@ -522,8 +548,8 @@ void CodegenLLVMVisitor::visit_codegen_atomic_statement(const ast::CodegenAtomic
 //  | <code after for loop>     |
 //  +---------------------------+
 void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatement& node) {
-    // Disable vector code generation for condition and increment blocks.
-    ir_builder.stop_vectorization();
+    // Condition and increment blocks must be scalar.
+    ir_builder.generate_scalar_ir();
 
     // Get the current and the next blocks within the function.
     llvm::BasicBlock* curr_block = ir_builder.get_current_block();
@@ -538,21 +564,11 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     llvm::BasicBlock* for_inc = llvm::BasicBlock::Create(*context, /*Name=*/"for.inc", func, next);
     llvm::BasicBlock* exit = llvm::BasicBlock::Create(*context, /*Name=*/"for.exit", func, next);
 
-    // Check if the kernel can be vectorised. If not, generate scalar code.
-    if (!can_vectorize(node, sym_tab)) {
-        logger->info("Cannot vectorise the for loop in '" + ir_builder.get_current_function_name() +
-                     "'");
-        logger->info("Generating scalar code...");
-        vector_width = 1;
-        ir_builder.generate_scalar_code();
-    }
-
-    // First, initialise the loop in the same basic block. This block is optional. Also, generate
-    // scalar code if processing the remainder of the loop.
-    if (node.get_initialization())
-        node.get_initialization()->accept(*this);
-    else
-        ir_builder.generate_scalar_code();
+    // First, initialize the loop in the same basic block. If processing the remainder of the loop,
+    // no initialization happens.
+    const auto& main_loop_initialization = node.get_initialization();
+    if (main_loop_initialization)
+        main_loop_initialization->accept(*this);
 
     // Branch to condition basic block and insert condition code there.
     ir_builder.create_br_and_set_insertion_point(for_cond);
@@ -561,22 +577,24 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     llvm::Value* cond = accept_and_get(node.get_condition());
     llvm::BranchInst* loop_br = ir_builder.create_cond_br(cond, for_body, exit);
     ir_builder.set_loop_metadata(loop_br);
+    ir_builder.set_insertion_point(for_body);
+
+    // If not processing remainder of the loop, start vectorization.
+    if (vector_width > 1 && main_loop_initialization)
+        ir_builder.generate_vector_ir();
 
     // Generate code for the loop body and create the basic block for the increment.
-    ir_builder.set_insertion_point(for_body);
-    ir_builder.start_vectorization();
     const auto& statement_block = node.get_statement_block();
     statement_block->accept(*this);
-    ir_builder.stop_vectorization();
+    ir_builder.generate_scalar_ir();
     ir_builder.create_br_and_set_insertion_point(for_inc);
-    // Process increment.
+
+    // Process the increment.
     node.get_increment()->accept(*this);
 
     // Create a branch to condition block, then generate exit code out of the loop.
     ir_builder.create_br(for_cond);
     ir_builder.set_insertion_point(exit);
-    ir_builder.generate_vectorized_code();
-    ir_builder.start_vectorization();
 }
 
 
@@ -601,12 +619,12 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
     // Allocate parameters on the stack and add them to the symbol table.
     ir_builder.allocate_function_arguments(func, arguments);
 
-    // Process function or procedure body. If the function is a compute kernel, then set the
-    // corresponding flags. If so, the return statement is handled in a separate visitor.
-    if (is_kernel_function(name)) {
-        ir_builder.start_vectorization();
+    // Process function or procedure body. If the function is a compute kernel, enable
+    // vectorization. If so, the return statement is handled in a separate visitor.
+    if (vector_width > 1 && is_kernel_function(name)) {
+        ir_builder.generate_vector_ir();
         block->accept(*this);
-        ir_builder.stop_vectorization();
+        ir_builder.generate_scalar_ir();
     } else {
         block->accept(*this);
     }
@@ -676,6 +694,12 @@ void CodegenLLVMVisitor::visit_function_call(const ast::FunctionCall& node) {
 }
 
 void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
+    // If vectorizing the compute kernel with control flow, process it separately.
+    if (vector_width > 1 && ir_builder.vectorizing()) {
+        create_vectorized_control_flow_block(node);
+        return;
+    }
+
     // Get the current and the next blocks within the function.
     llvm::BasicBlock* curr_block = ir_builder.get_current_block();
     llvm::BasicBlock* next = curr_block->getNextNode();
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 22505a304c..384c20c2c7 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -204,6 +204,9 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     /// Creates a call to `printf` function.
     void create_printf_call(const ast::ExpressionVector& arguments);
 
+    /// Creates a vectorized version of the LLVM IR for the simple control flow statement.
+    void create_vectorized_control_flow_block(const ast::IfStatement& node);
+
     /// Returns LLVM type for the given CodegenVarType AST node.
     llvm::Type* get_codegen_var_type(const ast::CodegenVarType& node);
 
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index 8828aa83c5..90e7456e33 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -92,11 +92,15 @@ llvm::Value* IRBuilder::pop_last_value() {
 /****************************************************************************************/
 
 void IRBuilder::create_boolean_constant(int value) {
-    value_stack.push_back(get_vector_constant<llvm::ConstantInt>(get_boolean_type(), value));
+    if (vector_width > 1 && vectorize) {
+        value_stack.push_back(get_vector_constant<llvm::ConstantInt>(get_boolean_type(), value));
+    } else {
+        value_stack.push_back(get_scalar_constant<llvm::ConstantInt>(get_boolean_type(), value));
+    }
 }
 
 void IRBuilder::create_fp_constant(const std::string& value) {
-    if (instruction_width > 1 && vectorize) {
+    if (vector_width > 1 && vectorize) {
         value_stack.push_back(get_vector_constant<llvm::ConstantFP>(get_fp_type(), value));
     } else {
         value_stack.push_back(get_scalar_constant<llvm::ConstantFP>(get_fp_type(), value));
@@ -108,7 +112,7 @@ llvm::Value* IRBuilder::create_global_string(const ast::String& node) {
 }
 
 void IRBuilder::create_i32_constant(int value) {
-    if (instruction_width > 1 && vectorize) {
+    if (vector_width > 1 && vectorize) {
         value_stack.push_back(get_vector_constant<llvm::ConstantInt>(get_i32_type(), value));
     } else {
         value_stack.push_back(get_scalar_constant<llvm::ConstantInt>(get_i32_type(), value));
@@ -123,7 +127,7 @@ llvm::Value* IRBuilder::get_scalar_constant(llvm::Type* type, V value) {
 template <typename C, typename V>
 llvm::Value* IRBuilder::get_vector_constant(llvm::Type* type, V value) {
     ConstantVector constants;
-    for (unsigned i = 0; i < instruction_width; ++i) {
+    for (unsigned i = 0; i < vector_width; ++i) {
         const auto& element = C::get(type, value);
         constants.push_back(element);
     }
@@ -312,19 +316,27 @@ llvm::Value* IRBuilder::create_index(llvm::Value* value) {
     const auto& element_type = llvm::cast<llvm::IntegerType>(vector_type->getElementType());
     if (element_type->getBitWidth() == i64_type->getIntegerBitWidth())
         return value;
-    return builder.CreateSExtOrTrunc(value,
-                                     llvm::FixedVectorType::get(i64_type, instruction_width));
+    return builder.CreateSExtOrTrunc(value, llvm::FixedVectorType::get(i64_type, vector_width));
 }
 
-llvm::Value* IRBuilder::create_load(const std::string& name) {
+llvm::Value* IRBuilder::create_load(const std::string& name, bool masked) {
     llvm::Value* ptr = lookup_value(name);
+
+    // Check if the generated IR is vectorized and masked.
+    if (masked) {
+        return builder.CreateMaskedLoad(ptr, llvm::Align(), mask);
+    }
     llvm::Type* loaded_type = ptr->getType()->getPointerElementType();
     llvm::Value* loaded = builder.CreateLoad(loaded_type, ptr);
     value_stack.push_back(loaded);
     return loaded;
 }
 
-llvm::Value* IRBuilder::create_load(llvm::Value* ptr) {
+llvm::Value* IRBuilder::create_load(llvm::Value* ptr, bool masked) {
+    // Check if the generated IR is vectorized and masked.
+    if (masked) {
+        return builder.CreateMaskedLoad(ptr, llvm::Align(), mask);
+    }
     llvm::Type* loaded_type = ptr->getType()->getPointerElementType();
     llvm::Value* loaded = builder.CreateLoad(loaded_type, ptr);
     value_stack.push_back(loaded);
@@ -336,12 +348,23 @@ llvm::Value* IRBuilder::create_load_from_array(const std::string& name, llvm::Va
     return create_load(element_ptr);
 }
 
-void IRBuilder::create_store(const std::string& name, llvm::Value* value) {
+void IRBuilder::create_store(const std::string& name, llvm::Value* value, bool masked) {
     llvm::Value* ptr = lookup_value(name);
+
+    // Check if the generated IR is vectorized and masked.
+    if (masked) {
+        builder.CreateMaskedStore(value, ptr, llvm::Align(), mask);
+        return;
+    }
     builder.CreateStore(value, ptr);
 }
 
-void IRBuilder::create_store(llvm::Value* ptr, llvm::Value* value) {
+void IRBuilder::create_store(llvm::Value* ptr, llvm::Value* value, bool masked) {
+    // Check if the generated IR is vectorized and masked.
+    if (masked) {
+        builder.CreateMaskedStore(value, ptr, llvm::Align(), mask);
+        return;
+    }
     builder.CreateStore(value, ptr);
 }
 
@@ -364,8 +387,8 @@ void IRBuilder::create_scalar_or_vector_alloca(const std::string& name,
     // Even if generating vectorised code, some variables still need to be scalar. Particularly, the
     // induction variable "id" and remainder loop variables (that start with "epilogue" prefix).
     llvm::Type* type;
-    if (instruction_width > 1 && vectorize && name != kernel_id && name.rfind("epilogue", 0)) {
-        type = llvm::FixedVectorType::get(element_or_scalar_type, instruction_width);
+    if (vector_width > 1 && vectorize && name != kernel_id && name.rfind("epilogue", 0)) {
+        type = llvm::FixedVectorType::get(element_or_scalar_type, vector_width);
     } else {
         type = element_or_scalar_type;
     }
@@ -389,6 +412,17 @@ llvm::Value* IRBuilder::get_struct_member_ptr(llvm::Value* struct_variable, int
     return builder.CreateInBoundsGEP(struct_variable, indices);
 }
 
+void IRBuilder::invert_mask() {
+    if (!mask)
+        throw std::runtime_error("Error: mask is not set\n");
+
+    // Create the vector with all `true` values.
+    create_boolean_constant(1);
+    llvm::Value* one = pop_last_value();
+
+    mask = builder.CreateXor(mask, one);
+}
+
 llvm::Value* IRBuilder::load_to_or_store_from_array(const std::string& id_name,
                                                     llvm::Value* id_value,
                                                     llvm::Value* array,
@@ -396,22 +430,27 @@ llvm::Value* IRBuilder::load_to_or_store_from_array(const std::string& id_name,
     // First, calculate the address of the element in the array.
     llvm::Value* element_ptr = create_inbounds_gep(array, id_value);
 
+    // Find out if the vector code is generated.
+    bool generating_vector_ir = vector_width > 1 && vectorize;
+
     // If the vector code is generated, we need to distinguish between two cases. If the array is
     // indexed indirectly (i.e. not by an induction variable `kernel_id`), create a gather
     // instruction.
-    if (id_name != kernel_id && vectorize && instruction_width > 1) {
-        return maybe_value_to_store
-                   ? builder.CreateMaskedScatter(maybe_value_to_store, element_ptr, llvm::Align())
-                   : builder.CreateMaskedGather(element_ptr, llvm::Align());
+    if (id_name != kernel_id && generating_vector_ir) {
+        return maybe_value_to_store ? builder.CreateMaskedScatter(maybe_value_to_store,
+                                                                  element_ptr,
+                                                                  llvm::Align(),
+                                                                  mask)
+                                    : builder.CreateMaskedGather(element_ptr, llvm::Align(), mask);
     }
 
     llvm::Value* ptr;
-    if (vectorize && instruction_width > 1) {
+    if (generating_vector_ir) {
         // If direct indexing is used during the vectorization, we simply bitcast the scalar pointer
         // to a vector pointer
         llvm::Type* vector_type = llvm::PointerType::get(
             llvm::FixedVectorType::get(element_ptr->getType()->getPointerElementType(),
-                                       instruction_width),
+                                       vector_width),
             /*AddressSpace=*/0);
         ptr = builder.CreateBitCast(element_ptr, vector_type);
     } else {
@@ -420,21 +459,21 @@ llvm::Value* IRBuilder::load_to_or_store_from_array(const std::string& id_name,
     }
 
     if (maybe_value_to_store) {
-        create_store(ptr, maybe_value_to_store);
+        create_store(ptr, maybe_value_to_store, /*masked=*/mask && generating_vector_ir);
         return nullptr;
     } else {
-        return create_load(ptr);
+        return create_load(ptr, /*masked=*/mask && generating_vector_ir);
     }
 }
 
 void IRBuilder::maybe_replicate_value(llvm::Value* value) {
     // If the value should not be vectorised, or it is already a vector, add it to the stack.
-    if (!vectorize || instruction_width == 1 || value->getType()->isVectorTy()) {
+    if (!vectorize || vector_width == 1 || value->getType()->isVectorTy()) {
         value_stack.push_back(value);
     } else {
         // Otherwise, we generate vectorized code inside the loop, so replicate the value to form a
         // vector.
-        llvm::Value* vector_value = builder.CreateVectorSplat(instruction_width, value);
+        llvm::Value* vector_value = builder.CreateVectorSplat(vector_width, value);
         value_stack.push_back(vector_value);
     }
 }
diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp
index e0cda2cf93..ba3800fc66 100644
--- a/src/codegen/llvm/llvm_ir_builder.hpp
+++ b/src/codegen/llvm/llvm_ir_builder.hpp
@@ -52,13 +52,12 @@ class IRBuilder {
     /// Precision of the floating-point numbers (32 or 64 bit).
     unsigned fp_precision;
 
-    /// If 1, indicates that the scalar code is generated. Otherwise, the current vectorization
-    /// width.
-    unsigned instruction_width;
-
     /// The vector width used for the vectorized code.
     unsigned vector_width;
 
+    /// Masked value used to predicate vector instructions.
+    llvm::Value* mask;
+
     /// The name of induction variable used in kernel loops.
     std::string kernel_id;
 
@@ -72,7 +71,7 @@ class IRBuilder {
         , vectorize(false)
         , fp_precision(use_single_precision ? single_precision : double_precision)
         , vector_width(vector_width)
-        , instruction_width(vector_width)
+        , mask(nullptr)
         , kernel_id("") {}
 
     /// Initializes the builder with the symbol table and the kernel induction variable id.
@@ -81,26 +80,21 @@ class IRBuilder {
         this->kernel_id = kernel_id;
     }
 
-    /// Explicitly sets the builder to produce scalar code (even during vectorization).
-    void generate_scalar_code() {
-        instruction_width = 1;
+    /// Explicitly sets the builder to produce scalar IR.
+    void generate_scalar_ir() {
+        vectorize = false;
     }
 
-    /// Explicitly sets the builder to produce vectorized code.
-    void generate_vectorized_code() {
-        instruction_width = vector_width;
+    /// Indicates whether the builder generates vectorized IR.
+    bool vectorizing() {
+        return vectorize;
     }
 
-    /// Turns on vectorization mode.
-    void start_vectorization() {
+    /// Explicitly sets the builder to produce vectorized IR.
+    void generate_vector_ir() {
         vectorize = true;
     }
 
-    /// Turns off vectorization mode.
-    void stop_vectorization() {
-        vectorize = false;
-    }
-
     /// Sets the current function for which LLVM IR is generated.
     void set_function(llvm::Function* function) {
         current_function = function;
@@ -112,6 +106,21 @@ class IRBuilder {
         current_function = nullptr;
     }
 
+    /// Sets the value to be the mask for vector code generation.
+    void set_mask(llvm::Value* value) {
+        mask = value;
+    }
+
+    /// Clears the mask for vector code generation.
+    void clear_mask() {
+        mask = nullptr;
+    }
+
+    /// Indicates whether the vectorized IR is predicated.
+    bool generates_predicated_ir() {
+        return vectorize && mask;
+    }
+
     /// Generates LLVM IR to allocate the arguments of the function on the stack.
     void allocate_function_arguments(llvm::Function* function,
                                      const ast::CodegenVarWithTypeVector& nmodl_arguments);
@@ -168,20 +177,20 @@ class IRBuilder {
     void create_i32_constant(int value);
 
     /// Generates LLVM IR to load the value specified by its name and returns it.
-    llvm::Value* create_load(const std::string& name);
+    llvm::Value* create_load(const std::string& name, bool masked = false);
 
     /// Generates LLVM IR to load the value from the pointer and returns it.
-    llvm::Value* create_load(llvm::Value* ptr);
+    llvm::Value* create_load(llvm::Value* ptr, bool masked = false);
 
     /// Generates LLVM IR to load the element at the specified index from the given array name and
     /// returns it.
     llvm::Value* create_load_from_array(const std::string& name, llvm::Value* index);
 
     /// Generates LLVM IR to store the value to the location specified by the name.
-    void create_store(const std::string& name, llvm::Value* value);
+    void create_store(const std::string& name, llvm::Value* value, bool masked = false);
 
     /// Generates LLVM IR to store the value to the location specified by the pointer.
-    void create_store(llvm::Value* ptr, llvm::Value* value);
+    void create_store(llvm::Value* ptr, llvm::Value* value, bool masked = false);
 
     /// Generates LLVM IR to store the value to the array element, where array is specified by the
     /// name.
@@ -234,6 +243,9 @@ class IRBuilder {
     /// Creates a pointer to struct type with the given name and given members.
     llvm::Type* get_struct_ptr_type(const std::string& struct_type_name, TypeVector& member_types);
 
+    /// Inverts the mask for vector code generation by xoring it.
+    void invert_mask();
+
     /// Generates IR that loads the elements of the array even during vectorization. If the value is
     /// specified, then it is stored to the array at the given index.
     llvm::Value* load_to_or_store_from_array(const std::string& id_name,
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index 36d13447bf..e12dfd8981 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -509,3 +509,104 @@ SCENARIO("Vectorised kernel with scatter instruction", "[llvm][runner]") {
         }
     }
 }
+
+//=============================================================================
+// Vectorised kernel with control flow.
+//=============================================================================
+
+SCENARIO("Vectorised kernel with simple control flow", "[llvm][runner]") {
+    GIVEN("Simple MOD file with if statement") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+            }
+
+            STATE {
+                w x y z
+            }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+            }
+
+            DERIVATIVE states {
+                IF (v > 0) {
+                    w = v * w
+                }
+
+                IF (x < 0) {
+                    x = 7
+                }
+
+                IF (0 <= y && y < 10 || z == 0) {
+                    y = 2 * y
+                } ELSE {
+                    z = z - y
+                }
+
+            }
+        )";
+
+
+        NmodlDriver driver;
+        const auto& ast = driver.parse_string(nmodl_text);
+
+        // Run passes on the AST to generate LLVM.
+        SymtabVisitor().visit_program(*ast);
+        NeuronSolveVisitor().visit_program(*ast);
+        SolveBlockVisitor().visit_program(*ast);
+        codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
+                                                 /*output_dir=*/".",
+                                                 /*opt_passes=*/false,
+                                                 /*use_single_precision=*/false,
+                                                 /*vector_width=*/2);
+        llvm_visitor.visit_program(*ast);
+        llvm_visitor.wrap_kernel_functions();
+
+        // Create the instance struct data.
+        int num_elements = 5;
+        const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
+        auto codegen_data = codegen::CodegenDataHelper(ast, generated_instance_struct);
+        auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1);
+
+        // Fill the instance struct data with some values.
+        std::vector<double> x = {-1.0, 2.0, -3.0, 4.0, -5.0};
+        std::vector<double> y = {11.0, 2.0, -3.0, 4.0, 100.0};
+        std::vector<double> z = {0.0, 1.0, 20.0, 0.0, 40.0};
+
+        std::vector<double> w = {10.0, 20.0, 30.0, 40.0, 50.0};
+        std::vector<double> voltage = {-1.0, 2.0, -1.0, 2.0, -1.0};
+        std::vector<int> node_index = {1, 2, 3, 4, 0};
+
+        InstanceTestInfo instance_info{&instance_data,
+                                       llvm_visitor.get_instance_var_helper(),
+                                       num_elements};
+        initialise_instance_variable(instance_info, w, "w");
+        initialise_instance_variable(instance_info, voltage, "voltage");
+        initialise_instance_variable(instance_info, node_index, "node_index");
+
+        initialise_instance_variable(instance_info, x, "x");
+        initialise_instance_variable(instance_info, y, "y");
+        initialise_instance_variable(instance_info, z, "z");
+
+        // Set up the JIT runner.
+        std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
+        TestRunner runner(std::move(module));
+        runner.initialize_driver();
+
+        THEN("Masked instructions are generated") {
+            runner.run_with_argument<int, void*>("__nrn_state_test_wrapper",
+                                                 instance_data.base_ptr);
+            std::vector<double> w_expected = {20.0, 20.0, 60.0, 40.0, 50.0};
+            REQUIRE(check_instance_variable(instance_info, w_expected, "w"));
+
+            std::vector<double> x_expected = {7.0, 2.0, 7.0, 4.0, 7.0};
+            REQUIRE(check_instance_variable(instance_info, x_expected, "x"));
+
+            std::vector<double> y_expected = {22.0, 4.0, -3.0, 8.0, 100.0};
+            std::vector<double> z_expected = {0.0, 1.0, 23.0, 0.0, -60.0};
+            REQUIRE(check_instance_variable(instance_info, y_expected, "y"));
+            REQUIRE(check_instance_variable(instance_info, z_expected, "z"));
+        }
+    }
+}
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index fb4593c4a6..11c380980e 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -1020,6 +1020,75 @@ SCENARIO("Vectorised simple kernel with ion writes", "[visitor][llvm]") {
     }
 }
 
+//=============================================================================
+// Vectorised kernel with simple control flow
+//=============================================================================
+
+SCENARIO("Vectorised simple kernel with control flow", "[visitor][llvm]") {
+    GIVEN("A single if/else statement") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+            }
+
+            STATE {
+                y
+            }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+            }
+
+            DERIVATIVE states {
+                IF (y < 0) {
+                    y = y + 7
+                } ELSE {
+                    y = v
+                }
+            }
+        )";
+
+        THEN("masked load and stores are created") {
+            std::string module_string = run_llvm_visitor(nmodl_text,
+                                                         /*opt=*/false,
+                                                         /*use_single_precision=*/true,
+                                                         /*vector_width=*/8);
+            std::smatch m;
+
+            // Check masked load/store intrinsics are correctly declared.
+            std::regex masked_load(
+                R"(declare <8 x float> @llvm\.masked\.load\.v8f32\.p0v8f32\(<8 x float>\*, i32 immarg, <8 x i1>, <8 x float>\))");
+            std::regex masked_store(
+                R"(declare void @llvm.masked\.store\.v8f32\.p0v8f32\(<8 x float>, <8 x float>\*, i32 immarg, <8 x i1>\))");
+            REQUIRE(std::regex_search(module_string, m, masked_load));
+            REQUIRE(std::regex_search(module_string, m, masked_store));
+
+            // Check true direction instructions are predicated with mask.
+            // IF (mech->y[id] < 0) {
+            //     mech->y[id] = mech->y[id] + 7
+            std::regex mask(R"(%30 = fcmp olt <8 x float> %.*, zeroinitializer)");
+            std::regex true_load(
+                R"(call <8 x float> @llvm\.masked\.load\.v8f32\.p0v8f32\(<8 x float>\* %.*, i32 1, <8 x i1> %30, <8 x float> undef\))");
+            std::regex true_store(
+                R"(call void @llvm\.masked\.store\.v8f32\.p0v8f32\(<8 x float> %.*, <8 x float>\* %.*, i32 1, <8 x i1> %30\))");
+            REQUIRE(std::regex_search(module_string, m, mask));
+            REQUIRE(std::regex_search(module_string, m, true_load));
+            REQUIRE(std::regex_search(module_string, m, true_store));
+
+            // Check false direction instructions are predicated with inverted mask.
+            // } ELSE {
+            //     mech->y[id] = v
+            // }
+            std::regex inverted_mask(
+                R"(%47 = xor <8 x i1> %30, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)");
+            std::regex false_load(
+                R"(call <8 x float> @llvm\.masked\.load\.v8f32\.p0v8f32\(<8 x float>\* %v, i32 1, <8 x i1> %47, <8 x float> undef\))");
+            std::regex false_store(
+                R"(call void @llvm\.masked\.store\.v8f32\.p0v8f32\(<8 x float> %.*, <8 x float>\* %.*, i32 1, <8 x i1> %47\))");
+        }
+    }
+}
+
 //=============================================================================
 // Derivative block : test optimization
 //=============================================================================

From 7b459254776cd2ffeee376ff5d251c70501b46c7 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 20 May 2021 07:32:50 -0700
Subject: [PATCH 061/105] Improvements for LLVM code generation and
 benchmarking (#661)

* Improved cmake versioning of LLVM
* Added ^ support
* Added more math functions intrinsics with tests
* Added compute time variance and min/max times in benchmarking output
---
 CMakeLists.txt                            |   3 -
 src/codegen/llvm/codegen_llvm_visitor.cpp |   4 +-
 src/codegen/llvm/codegen_llvm_visitor.hpp |   2 +-
 src/codegen/llvm/llvm_ir_builder.cpp      |  24 ++++-
 test/benchmark/llvm_benchmark.cpp         |  29 ++++--
 test/unit/codegen/codegen_llvm_ir.cpp     | 117 ++++++++++++++++++++--
 6 files changed, 154 insertions(+), 25 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8fbe58984f..6cbb3ba3b4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -184,9 +184,6 @@ if(NMODL_ENABLE_LLVM)
   include(cmake/LLVMHelper.cmake)
   include_directories(${LLVM_INCLUDE_DIRS})
   add_definitions(-DNMODL_LLVM_BACKEND)
-  if(LLVM_VERSION VERSION_LESS_EQUAL 12)
-    add_definitions(-DLLVM_VERSION_LESS_THAN_13)
-  endif()
 endif()
 
 # =============================================================================
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index ec41008da0..ba28361e09 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -20,7 +20,7 @@
 #include "llvm/Support/Host.h"
 #include "llvm/Support/ToolOutputFile.h"
 
-#ifndef LLVM_VERSION_LESS_THAN_13
+#if LLVM_VERSION_MAJOR >= 13
 #include "llvm/CodeGen/ReplaceWithVeclib.h"
 #endif
 
@@ -819,7 +819,7 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
 
     // Optionally, replace LLVM's maths intrinsics with vector library calls.
     if (vector_width > 1 && vector_library != llvm::TargetLibraryInfoImpl::NoLibrary) {
-#ifdef LLVM_VERSION_LESS_THAN_13
+#if LLVM_VERSION_MAJOR < 13
         logger->warn(
             "This version of LLVM does not support replacement of LLVM intrinsics with vector "
             "library calls");
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 384c20c2c7..a97e73030a 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -51,7 +51,7 @@ namespace codegen {
 /// A map to query vector library by its string value.
 static const std::map<std::string, llvm::TargetLibraryInfoImpl::VectorLibrary> veclib_map = {
     {"Accelerate", llvm::TargetLibraryInfoImpl::Accelerate},
-#ifndef LLVM_VERSION_LESS_THAN_13
+#if LLVM_VERSION_MAJOR >= 13
     {"libmvec", llvm::TargetLibraryInfoImpl::LIBMVEC_X86},
 #endif
     {"MASSV", llvm::TargetLibraryInfoImpl::MASSV},
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index 90e7456e33..c67941df3e 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -165,9 +165,26 @@ void IRBuilder::create_function_call(llvm::Function* callee,
 void IRBuilder::create_intrinsic(const std::string& name,
                                  ValueVector& argument_values,
                                  TypeVector& argument_types) {
+    // Process 'pow' call separately.
+    if (name == "pow") {
+        llvm::Value* pow_intrinsic = builder.CreateIntrinsic(llvm::Intrinsic::pow,
+                                                             {argument_types.front()},
+                                                             argument_values);
+        value_stack.push_back(pow_intrinsic);
+        return;
+    }
+
+    // Create other intrinsics.
     unsigned intrinsic_id = llvm::StringSwitch<llvm::Intrinsic::ID>(name)
+                                .Case("ceil", llvm::Intrinsic::ceil)
+                                .Case("cos", llvm::Intrinsic::cos)
                                 .Case("exp", llvm::Intrinsic::exp)
-                                .Case("pow", llvm::Intrinsic::pow)
+                                .Case("fabs", llvm::Intrinsic::fabs)
+                                .Case("floor", llvm::Intrinsic::floor)
+                                .Case("log", llvm::Intrinsic::log)
+                                .Case("log10", llvm::Intrinsic::log10)
+                                .Case("sin", llvm::Intrinsic::sin)
+                                .Case("sqrt", llvm::Intrinsic::sqrt)
                                 .Default(llvm::Intrinsic::not_intrinsic);
     if (intrinsic_id) {
         llvm::Value* intrinsic =
@@ -267,6 +284,11 @@ void IRBuilder::create_binary_op(llvm::Value* lhs, llvm::Value* rhs, ast::Binary
 
 #undef DISPATCH
 
+    // Separately replace ^ with the `pow` intrinsic.
+    case ast::BinaryOp::BOP_POWER:
+        result = builder.CreateIntrinsic(llvm::Intrinsic::pow, {lhs->getType()}, {lhs, rhs});
+        break;
+
     // Logical instructions.
     case ast::BinaryOp::BOP_AND:
         result = builder.CreateAnd(lhs, rhs);
diff --git a/test/benchmark/llvm_benchmark.cpp b/test/benchmark/llvm_benchmark.cpp
index f6811fd664..b9f2fdeced 100644
--- a/test/benchmark/llvm_benchmark.cpp
+++ b/test/benchmark/llvm_benchmark.cpp
@@ -107,15 +107,21 @@ void LLVMBenchmark::run_benchmark(const std::shared_ptr<ast::Program>& node) {
 
     // Benchmark every kernel.
     for (const auto& kernel_name: kernel_names) {
-        // Initialise the data.
-        auto instance_data = codegen_data.create_data(instance_size, /*seed=*/1);
-
-        double size_mbs = instance_data.num_bytes / (1024.0 * 1024.0);
-        logger->info("Benchmarking kernel '{}' with {} MBs dataset", kernel_name, size_mbs);
-
         // For every kernel run the benchmark `num_experiments` times.
+        double time_min = std::numeric_limits<double>::max();
+        double time_max = 0.0;
         double time_sum = 0.0;
+        double time_squared_sum = 0.0;
         for (int i = 0; i < num_experiments; ++i) {
+            // Initialise the data.
+            auto instance_data = codegen_data.create_data(instance_size, /*seed=*/1);
+
+            // Log instance size once.
+            if (i == 0) {
+                double size_mbs = instance_data.num_bytes / (1024.0 * 1024.0);
+                logger->info("Benchmarking kernel '{}' with {} MBs dataset", kernel_name, size_mbs);
+            }
+
             // Record the execution time of the kernel.
             std::string wrapper_name = "__" + kernel_name + "_wrapper";
             auto start = std::chrono::high_resolution_clock::now();
@@ -126,10 +132,19 @@ void LLVMBenchmark::run_benchmark(const std::shared_ptr<ast::Program>& node) {
             // Log the time taken for each run.
             logger->info("Experiment {} compute time = {:.6f} sec", i, diff.count());
 
+            // Update statistics.
             time_sum += diff.count();
+            time_squared_sum += diff.count() * diff.count();
+            time_min = std::min(time_min, diff.count());
+            time_max = std::max(time_max, diff.count());
         }
         // Log the average time taken for the kernel.
-        logger->info("Average compute time = {:.6f} \n", time_sum / num_experiments);
+        double time_mean = time_sum / num_experiments;
+        logger->info("Average compute time = {:.6f}", time_mean);
+        logger->info("Compute time variance = {:g}",
+                     time_squared_sum / num_experiments - time_mean * time_mean);
+        logger->info("Minimum compute time = {:.6f}", time_min);
+        logger->info("Minimum compute time = {:.6f}\n", time_max);
     }
 }
 
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 11c380980e..25823d4a4e 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -97,7 +97,7 @@ SCENARIO("Binary expression", "[visitor][llvm]") {
             std::regex lhs(R"(%2 = load float, float\* %a)");
             std::regex res(R"(%3 = fadd float %2, %1)");
 
-            // Check the float values are loaded correctly and added
+            // Check the float values are loaded correctly and added.
             REQUIRE(std::regex_search(module_string, m, rhs));
             REQUIRE(std::regex_search(module_string, m, lhs));
             REQUIRE(std::regex_search(module_string, m, res));
@@ -116,7 +116,7 @@ SCENARIO("Binary expression", "[visitor][llvm]") {
             std::string module_string = run_llvm_visitor(nmodl_text);
             std::smatch m;
 
-            // Check rhs
+            // Check rhs.
             std::regex rr(R"(%1 = load double, double\* %b)");
             std::regex rl(R"(%2 = load double, double\* %a)");
             std::regex x(R"(%3 = fadd double %2, %1)");
@@ -124,7 +124,7 @@ SCENARIO("Binary expression", "[visitor][llvm]") {
             REQUIRE(std::regex_search(module_string, m, rl));
             REQUIRE(std::regex_search(module_string, m, x));
 
-            // Check lhs
+            // Check lhs.
             std::regex lr(R"(%4 = load double, double\* %b)");
             std::regex ll(R"(%5 = load double, double\* %a)");
             std::regex y(R"(%6 = fsub double %5, %4)");
@@ -132,7 +132,7 @@ SCENARIO("Binary expression", "[visitor][llvm]") {
             REQUIRE(std::regex_search(module_string, m, ll));
             REQUIRE(std::regex_search(module_string, m, y));
 
-            // Check result
+            // Check result.
             std::regex res(R"(%7 = fdiv double %6, %3)");
             REQUIRE(std::regex_search(module_string, m, res));
         }
@@ -150,13 +150,36 @@ SCENARIO("Binary expression", "[visitor][llvm]") {
             std::string module_string = run_llvm_visitor(nmodl_text);
             std::smatch m;
 
-            // Check store immediate is created
+            // Check store immediate is created.
             std::regex allocation(R"(%i = alloca double)");
             std::regex assignment(R"(store double 2.0*e\+00, double\* %i)");
             REQUIRE(std::regex_search(module_string, m, allocation));
             REQUIRE(std::regex_search(module_string, m, assignment));
         }
     }
+
+    GIVEN("Function with power operator") {
+        std::string nmodl_text = R"(
+            FUNCTION power() {
+                LOCAL i, j
+                i = 2
+                j = 4
+                power = i ^ j
+            }
+        )";
+
+        THEN("'pow' intrinsic is created") {
+            std::string module_string =
+                run_llvm_visitor(nmodl_text, /*opt=*/false, /*use_single_precision=*/true);
+            std::smatch m;
+
+            // Check 'pow' intrinsic.
+            std::regex declaration(R"(declare float @llvm\.pow\.f32\(float, float\))");
+            std::regex pow(R"(call float @llvm\.pow\.f32\(float %.*, float %.*\))");
+            REQUIRE(std::regex_search(module_string, m, declaration));
+            REQUIRE(std::regex_search(module_string, m, pow));
+        }
+    }
 }
 
 //=============================================================================
@@ -492,8 +515,44 @@ SCENARIO("Function call", "[visitor][llvm]") {
 
     GIVEN("A call to external method") {
         std::string nmodl_text = R"(
-            FUNCTION bar(i) {
-                bar = exp(i)
+            FUNCTION nmodl_ceil(x) {
+                nmodl_ceil = ceil(x)
+            }
+
+            FUNCTION nmodl_cos(x) {
+                nmodl_cos = cos(x)
+            }
+
+            FUNCTION nmodl_exp(x) {
+                nmodl_exp = exp(x)
+            }
+
+            FUNCTION nmodl_fabs(x) {
+                nmodl_fabs = fabs(x)
+            }
+
+            FUNCTION nmodl_floor(x) {
+                nmodl_floor = floor(x)
+            }
+
+            FUNCTION nmodl_log(x) {
+                nmodl_log = log(x)
+            }
+
+            FUNCTION nmodl_log10(x) {
+                nmodl_log10 = log10(x)
+            }
+
+            FUNCTION nmodl_pow(x, y) {
+                nmodl_pow = pow(x, y)
+            }
+
+            FUNCTION nmodl_sin(x) {
+                nmodl_sin = sin(x)
+            }
+
+            FUNCTION nmodl_sqrt(x) {
+                nmodl_sqrt = sqrt(x)
             }
         )";
 
@@ -501,13 +560,49 @@ SCENARIO("Function call", "[visitor][llvm]") {
             std::string module_string = run_llvm_visitor(nmodl_text);
             std::smatch m;
 
-            // Check for intrinsic declaration.
+            // Check for intrinsic declarations.
+            std::regex ceil(R"(declare double @llvm\.ceil\.f64\(double\))");
+            std::regex cos(R"(declare double @llvm\.cos\.f64\(double\))");
             std::regex exp(R"(declare double @llvm\.exp\.f64\(double\))");
+            std::regex fabs(R"(declare double @llvm\.fabs\.f64\(double\))");
+            std::regex floor(R"(declare double @llvm\.floor\.f64\(double\))");
+            std::regex log(R"(declare double @llvm\.log\.f64\(double\))");
+            std::regex log10(R"(declare double @llvm\.log10\.f64\(double\))");
+            std::regex pow(R"(declare double @llvm\.pow\.f64\(double, double\))");
+            std::regex sin(R"(declare double @llvm\.sin\.f64\(double\))");
+            std::regex sqrt(R"(declare double @llvm\.sqrt\.f64\(double\))");
+            REQUIRE(std::regex_search(module_string, m, ceil));
+            REQUIRE(std::regex_search(module_string, m, cos));
             REQUIRE(std::regex_search(module_string, m, exp));
+            REQUIRE(std::regex_search(module_string, m, fabs));
+            REQUIRE(std::regex_search(module_string, m, floor));
+            REQUIRE(std::regex_search(module_string, m, log));
+            REQUIRE(std::regex_search(module_string, m, log10));
+            REQUIRE(std::regex_search(module_string, m, pow));
+            REQUIRE(std::regex_search(module_string, m, sin));
+            REQUIRE(std::regex_search(module_string, m, sqrt));
 
             // Check the correct call is made.
-            std::regex call(R"(call double @llvm\.exp\.f64\(double %[0-9]+\))");
-            REQUIRE(std::regex_search(module_string, m, call));
+            std::regex ceil_call(R"(call double @llvm\.ceil\.f64\(double %[0-9]+\))");
+            std::regex cos_call(R"(call double @llvm\.cos\.f64\(double %[0-9]+\))");
+            std::regex exp_call(R"(call double @llvm\.exp\.f64\(double %[0-9]+\))");
+            std::regex fabs_call(R"(call double @llvm\.fabs\.f64\(double %[0-9]+\))");
+            std::regex floor_call(R"(call double @llvm\.floor\.f64\(double %[0-9]+\))");
+            std::regex log_call(R"(call double @llvm\.log\.f64\(double %[0-9]+\))");
+            std::regex log10_call(R"(call double @llvm\.log10\.f64\(double %[0-9]+\))");
+            std::regex pow_call(R"(call double @llvm\.pow\.f64\(double %[0-9]+, double %[0-9]+\))");
+            std::regex sin_call(R"(call double @llvm\.sin\.f64\(double %[0-9]+\))");
+            std::regex sqrt_call(R"(call double @llvm\.sqrt\.f64\(double %[0-9]+\))");
+            REQUIRE(std::regex_search(module_string, m, ceil_call));
+            REQUIRE(std::regex_search(module_string, m, cos_call));
+            REQUIRE(std::regex_search(module_string, m, exp_call));
+            REQUIRE(std::regex_search(module_string, m, fabs_call));
+            REQUIRE(std::regex_search(module_string, m, floor_call));
+            REQUIRE(std::regex_search(module_string, m, log_call));
+            REQUIRE(std::regex_search(module_string, m, log10_call));
+            REQUIRE(std::regex_search(module_string, m, pow_call));
+            REQUIRE(std::regex_search(module_string, m, sin_call));
+            REQUIRE(std::regex_search(module_string, m, sqrt_call));
         }
     }
 
@@ -1230,7 +1325,7 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
             REQUIRE(std::regex_search(no_library_module_str, m, exp_decl));
             REQUIRE(std::regex_search(no_library_module_str, m, exp_call));
 
-#ifndef LLVM_VERSION_LESS_THAN_13
+#if LLVM_VERSION_MAJOR >= 13
             // Check exponential calls are replaced with calls to SVML library.
             std::string svml_library_module_str = run_llvm_visitor(nmodl_text,
                                                                    /*opt=*/false,

From dcaff9a9e003c5c57eb68478acc1ad6ddcb6ac74 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 20 May 2021 14:19:41 -0700
Subject: [PATCH 062/105] Fixed `alloca`s insertion point for LLVM backend
 (#663)

* With this PR alloca instructions are always inserted in the beginning
   of the function entry block. This is done to avoid them in the while or
   for loops, where allocations per iteration cause stack overflow
   (if the IR is not optimized).
* Insertion point for allocas is the enetry block now

See #653
---
 src/codegen/llvm/codegen_llvm_visitor.cpp |  4 +--
 src/codegen/llvm/llvm_ir_builder.cpp      | 38 +++++++++++++++++++++--
 src/codegen/llvm/llvm_ir_builder.hpp      |  7 +++++
 3 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index ba28361e09..6df5820d42 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -601,12 +601,12 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
 void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node) {
     const auto& name = node.get_node_name();
     const auto& arguments = node.get_arguments();
-    llvm::Function* func = module->getFunction(name);
-    ir_builder.set_function(func);
 
     // Create the entry basic block of the function/procedure and point the local named values table
     // to the symbol table.
+    llvm::Function* func = module->getFunction(name);
     ir_builder.create_block_and_set_insertion_point(func);
+    ir_builder.set_function(func);
 
     // When processing a function, it returns a value named <function_name> in NMODL. Therefore, we
     // first run RenameVisitor to rename it into ret_<function_name>. This will aid in avoiding
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index c67941df3e..004f28d857 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -144,7 +144,7 @@ void IRBuilder::allocate_function_arguments(llvm::Function* function,
     for (auto& arg: function->args()) {
         std::string arg_name = nmodl_arguments[i++].get()->get_node_name();
         llvm::Type* arg_type = arg.getType();
-        llvm::Value* alloca = builder.CreateAlloca(arg_type, /*ArraySize=*/nullptr, arg_name);
+        llvm::Value* alloca = create_alloca(arg_name, arg_type);
         arg.setName(arg_name);
         builder.CreateStore(&arg, alloca);
     }
@@ -245,11 +245,43 @@ void IRBuilder::set_loop_metadata(llvm::BranchInst* branch) {
 /*                             LLVM instruction utilities                               */
 /****************************************************************************************/
 
+llvm::Value* IRBuilder::create_alloca(const std::string& name, llvm::Type* type) {
+    // If insertion point for `alloca` instructions is not set, then create the instruction in the
+    // entry block and set it to be the insertion point.
+    if (!alloca_ip) {
+        // Get the entry block and insert the `alloca` instruction there.
+        llvm::BasicBlock* current_block = builder.GetInsertBlock();
+        llvm::BasicBlock& entry_block = current_block->getParent()->getEntryBlock();
+        builder.SetInsertPoint(&entry_block);
+        llvm::Value* alloca = builder.CreateAlloca(type, /*ArraySize=*/nullptr, name);
+
+        // Set the `alloca` instruction insertion point and restore the insertion point for the next
+        // set of instructions.
+        alloca_ip = llvm::cast<llvm::AllocaInst>(alloca);
+        builder.SetInsertPoint(current_block);
+        return alloca;
+    }
+
+    // Create `alloca` instruction.
+    llvm::BasicBlock* alloca_block = alloca_ip->getParent();
+    const auto& data_layout = alloca_block->getModule()->getDataLayout();
+    auto* alloca = new llvm::AllocaInst(type,
+                                        data_layout.getAllocaAddrSpace(),
+                                        /*ArraySize=*/nullptr,
+                                        data_layout.getPrefTypeAlign(type),
+                                        name);
+
+    // Insert `alloca` at the specified insertion point and reset it for the next instructions.
+    alloca_block->getInstList().insertAfter(alloca_ip->getIterator(), alloca);
+    alloca_ip = alloca;
+    return alloca;
+}
+
 void IRBuilder::create_array_alloca(const std::string& name,
                                     llvm::Type* element_type,
                                     int num_elements) {
     llvm::Type* array_type = llvm::ArrayType::get(element_type, num_elements);
-    builder.CreateAlloca(array_type, /*ArraySize=*/nullptr, name);
+    create_alloca(name, array_type);
 }
 
 void IRBuilder::create_binary_op(llvm::Value* lhs, llvm::Value* rhs, ast::BinaryOp op) {
@@ -414,7 +446,7 @@ void IRBuilder::create_scalar_or_vector_alloca(const std::string& name,
     } else {
         type = element_or_scalar_type;
     }
-    builder.CreateAlloca(type, /*ArraySize=*/nullptr, name);
+    create_alloca(name, type);
 }
 
 void IRBuilder::create_unary_op(llvm::Value* value, ast::UnaryOp op) {
diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp
index ba3800fc66..744b737392 100644
--- a/src/codegen/llvm/llvm_ir_builder.hpp
+++ b/src/codegen/llvm/llvm_ir_builder.hpp
@@ -46,6 +46,9 @@ class IRBuilder {
     /// Symbol table of the NMODL AST.
     symtab::SymbolTable* symbol_table;
 
+    /// Insertion point for `alloca` instructions.
+    llvm::Instruction* alloca_ip;
+
     /// Flag to indicate that the generated IR should be vectorized.
     bool vectorize;
 
@@ -69,6 +72,7 @@ class IRBuilder {
         , symbol_table(nullptr)
         , current_function(nullptr)
         , vectorize(false)
+        , alloca_ip(nullptr)
         , fp_precision(use_single_precision ? single_precision : double_precision)
         , vector_width(vector_width)
         , mask(nullptr)
@@ -104,6 +108,7 @@ class IRBuilder {
     void clear_function() {
         value_stack.clear();
         current_function = nullptr;
+        alloca_ip = nullptr;
     }
 
     /// Sets the value to be the mask for vector code generation.
@@ -125,6 +130,8 @@ class IRBuilder {
     void allocate_function_arguments(llvm::Function* function,
                                      const ast::CodegenVarWithTypeVector& nmodl_arguments);
 
+    llvm::Value* create_alloca(const std::string& name, llvm::Type* type);
+
     /// Generates IR for allocating an array.
     void create_array_alloca(const std::string& name, llvm::Type* element_type, int num_elements);
 

From 9109139d0f5bf3f0b819de66f0b38c876d80b6ce Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 20 May 2021 23:53:04 -0700
Subject: [PATCH 063/105] Fast math flags for LLVM backend (#662)

Added support for fast math flags in LLVM backend. Currently,
the user can specify them via command-line (this approach was
chosen for easier benchmarking). The specified flags are named
exactly  the same as in LLVM. This feature is useful to enable
previously unsafe FP-math optimizations. For example, fused-multiply-add
instructions can now be generated when lowering LLVM IR to assembly
or executing via JIT.

Example:
```c++
// fma.mod
FUNCTION fma(a, b, c) {
    fma = (a * b) + c
}
```
```bash
$ ./nmodl fma.mod --verbose debug llvm --ir --fmf nnan contract afn --opt
```
```llvm
define double @fma(double %a, double %b, double %c) {
  %1 = fmul nnan contract afn double %a, %b
  %2 = fadd nnan contract afn double %1, %c
  ret double %2
}
```
---
 src/codegen/llvm/codegen_llvm_visitor.hpp |  5 +--
 src/codegen/llvm/llvm_ir_builder.hpp      | 28 +++++++++++++++--
 src/main.cpp                              |  9 +++++-
 test/unit/codegen/codegen_llvm_ir.cpp     | 38 +++++++++++++++++++++--
 4 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index a97e73030a..c3beb53640 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -113,14 +113,15 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
                        bool use_single_precision = false,
                        int vector_width = 1,
                        std::string vec_lib = "none",
-                       bool add_debug_information = false)
+                       bool add_debug_information = false,
+                       std::vector<std::string> fast_math_flags = {})
         : mod_filename(mod_filename)
         , output_dir(output_dir)
         , opt_passes(opt_passes)
         , vector_width(vector_width)
         , vector_library(veclib_map.at(vec_lib))
         , add_debug_information(add_debug_information)
-        , ir_builder(*context, use_single_precision, vector_width)
+        , ir_builder(*context, use_single_precision, vector_width, fast_math_flags)
         , debug_builder(*module)
         , codegen_pm(module.get())
         , opt_pm(module.get()) {}
diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp
index 744b737392..b9736e2846 100644
--- a/src/codegen/llvm/llvm_ir_builder.hpp
+++ b/src/codegen/llvm/llvm_ir_builder.hpp
@@ -64,10 +64,14 @@ class IRBuilder {
     /// The name of induction variable used in kernel loops.
     std::string kernel_id;
 
+    /// Fast math flags for floating-point IR instructions.
+    std::vector<std::string> fast_math_flags;
+
   public:
     IRBuilder(llvm::LLVMContext& context,
               bool use_single_precision = false,
-              unsigned vector_width = 1)
+              unsigned vector_width = 1,
+              std::vector<std::string> fast_math_flags = {})
         : builder(context)
         , symbol_table(nullptr)
         , current_function(nullptr)
@@ -76,10 +80,30 @@ class IRBuilder {
         , fp_precision(use_single_precision ? single_precision : double_precision)
         , vector_width(vector_width)
         , mask(nullptr)
-        , kernel_id("") {}
+        , kernel_id("")
+        , fast_math_flags(fast_math_flags) {}
+
+    /// Transforms the fast math flags provided to the builder into LLVM's representation.
+    llvm::FastMathFlags transform_to_fmf(std::vector<std::string>& flags) {
+        static const std::map<std::string, void (llvm::FastMathFlags::*)(bool)> set_flag = {
+            {"nnan", &llvm::FastMathFlags::setNoNaNs},
+            {"ninf", &llvm::FastMathFlags::setNoInfs},
+            {"nsz", &llvm::FastMathFlags::setNoSignedZeros},
+            {"contract", &llvm::FastMathFlags::setAllowContract},
+            {"afn", &llvm::FastMathFlags::setApproxFunc},
+            {"reassoc", &llvm::FastMathFlags::setAllowReassoc},
+            {"fast", &llvm::FastMathFlags::setFast}};
+        llvm::FastMathFlags fmf;
+        for (const auto& flag: flags) {
+            (fmf.*(set_flag.at(flag)))(true);
+        }
+        return fmf;
+    }
 
     /// Initializes the builder with the symbol table and the kernel induction variable id.
     void initialize(symtab::SymbolTable& symbol_table, std::string& kernel_id) {
+        if (!fast_math_flags.empty())
+            builder.setFastMathFlags(transform_to_fmf(fast_math_flags));
         this->symbol_table = &symbol_table;
         this->kernel_id = kernel_id;
     }
diff --git a/src/main.cpp b/src/main.cpp
index 87a17d6eb2..b0b16d145d 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -185,6 +185,9 @@ int main(int argc, const char* argv[]) {
     /// disable debug information generation for the IR
     bool disable_debug_information(false);
 
+    /// fast math flags for LLVM backend
+    std::vector<std::string> llvm_fast_math_flags;
+
     /// run llvm benchmark
     bool run_llvm_benchmark(false);
 
@@ -347,6 +350,9 @@ int main(int argc, const char* argv[]) {
     llvm_opt->add_option("--veclib",
                          vector_library,
                          fmt::format("Vector library for maths functions ({})", vector_library))->check(CLI::IsMember({"Accelerate", "libmvec", "MASSV", "SVML", "none"}));
+    llvm_opt->add_option("--fmf",
+                         llvm_fast_math_flags,
+                         "Fast math flags for floating-point optimizations (none)")->check(CLI::IsMember({"afn", "arcp", "contract", "ninf", "nnan", "nsz", "reassoc", "fast"}));
 
     // LLVM IR benchmark options.
     auto benchmark_opt = app.add_subcommand("benchmark", "LLVM benchmark option")->ignore_case();
@@ -690,7 +696,8 @@ int main(int argc, const char* argv[]) {
                                            llvm_float_type,
                                            llvm_vec_width,
                                            vector_library,
-                                           !disable_debug_information);
+                                           !disable_debug_information,
+                                           llvm_fast_math_flags);
                 visitor.visit_program(*ast);
                 ast_to_nmodl(*ast, filepath("llvm", "mod"));
                 ast_to_json(*ast, filepath("llvm", "json"));
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 25823d4a4e..cf5dd1db16 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -37,7 +37,8 @@ std::string run_llvm_visitor(const std::string& text,
                              bool opt = false,
                              bool use_single_precision = false,
                              int vector_width = 1,
-                             std::string vec_lib = "none") {
+                             std::string vec_lib = "none",
+                             std::vector<std::string> fast_math_flags = {}) {
     NmodlDriver driver;
     const auto& ast = driver.parse_string(text);
 
@@ -50,7 +51,9 @@ std::string run_llvm_visitor(const std::string& text,
                                              opt,
                                              use_single_precision,
                                              vector_width,
-                                             vec_lib);
+                                             vec_lib,
+                                             /*add_debug_information=*/false,
+                                             fast_math_flags);
     llvm_visitor.visit_program(*ast);
     return llvm_visitor.dump_module();
 }
@@ -1378,6 +1381,37 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
     }
 }
 
+//=============================================================================
+// Fast math flags
+//=============================================================================
+
+SCENARIO("Fast math flags", "[visitor][llvm]") {
+    GIVEN("A function to produce fma and specified math flags") {
+        std::string nmodl_text = R"(
+            FUNCTION foo(a, b, c) {
+                foo = (a * b) + c
+            }
+        )";
+
+        THEN("instructions are generated with the flags set") {
+            std::string module_string =
+                run_llvm_visitor(nmodl_text,
+                                 /*opt=*/true,
+                                 /*use_single_precision=*/false,
+                                 /*vector_width=*/1,
+                                 /*vec_lib=*/"none",
+                                 /*fast_math_flags=*/{"nnan", "contract", "afn"});
+            std::smatch m;
+
+            // Check flags for produced 'fmul' and 'fadd' instructions.
+            std::regex fmul(R"(fmul nnan contract afn double %.*, %.*)");
+            std::regex fadd(R"(fadd nnan contract afn double %.*, %.*)");
+            REQUIRE(std::regex_search(module_string, m, fmul));
+            REQUIRE(std::regex_search(module_string, m, fadd));
+        }
+    }
+}
+
 //=============================================================================
 // Optimization : dead code removal
 //=============================================================================

From fa5c7bfdf933aec1582c6f14dd7b337d80193e07 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Fri, 21 May 2021 14:04:39 +0200
Subject: [PATCH 064/105] Avoid generating LLVM IR for Functions and Procedures
 if inlined (#664)

---
 .../llvm/codegen_llvm_helper_visitor.cpp      | 18 +++++
 .../llvm/codegen_llvm_helper_visitor.hpp      | 10 ++-
 src/codegen/llvm/codegen_llvm_visitor.cpp     |  2 +-
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  7 +-
 src/main.cpp                                  |  3 +-
 test/unit/codegen/codegen_llvm_ir.cpp         | 66 ++++++++++++++++++-
 6 files changed, 97 insertions(+), 9 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 578aaaa2b4..0725760d8a 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -198,6 +198,7 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
     }
     codegen_functions.push_back(function);
 }
+
 /**
  * \note : Order of variables is not important but we assume all pointers
  * are added first and then scalar variables like t, dt, second_order etc.
@@ -538,11 +539,17 @@ void CodegenLLVMHelperVisitor::rename_local_variables(ast::StatementBlock& node)
 
 
 void CodegenLLVMHelperVisitor::visit_procedure_block(ast::ProcedureBlock& node) {
+    // if the Procedure block is already inlined, there is no reason to generate the LLVM IR code
+    if (nmodl_inline)
+        return;
     node.visit_children(*this);
     create_function_for_node(node);
 }
 
 void CodegenLLVMHelperVisitor::visit_function_block(ast::FunctionBlock& node) {
+    // if the Function block is already inlined, there is no reason to generate the LLVM IR code
+    if (nmodl_inline)
+        return;
     node.visit_children(*this);
     create_function_for_node(node);
 }
@@ -788,6 +795,17 @@ void CodegenLLVMHelperVisitor::visit_program(ast::Program& node) {
     for (auto& fun: codegen_functions) {
         node.emplace_back_node(fun);
     }
+    // Remove Function and Procedure blocks from the Program since they are already inlined
+    if (nmodl_inline) {
+        const auto& func_proc_nodes =
+            collect_nodes(node,
+                          {ast::AstNodeType::FUNCTION_BLOCK, ast::AstNodeType::PROCEDURE_BLOCK});
+        std::unordered_set<ast::Node*> nodes_to_erase;
+        for (const auto& ast_node: func_proc_nodes) {
+            nodes_to_erase.insert(static_cast<ast::Node*>(ast_node.get()));
+        }
+        node.erase_node(nodes_to_erase);
+    }
 }
 
 
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index bbff588675..3619cbc32e 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -100,9 +100,12 @@ struct InstanceVarHelper {
  * these will be common across all backends.
  */
 class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
-    // explicit vectorisation width
+    /// explicit vectorisation width
     int vector_width;
 
+    /// variable to check whether Function and Procedures blocks are inline by NMODL passes
+    bool nmodl_inline;
+
     /// newly generated code generation specific functions
     CodegenFunctionVector codegen_functions;
 
@@ -134,8 +137,9 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     static const std::string VOLTAGE_VAR;
     static const std::string NODE_INDEX_VAR;
 
-    CodegenLLVMHelperVisitor(int vector_width)
-        : vector_width(vector_width){};
+    CodegenLLVMHelperVisitor(int vector_width, bool nmodl_inline)
+        : vector_width(vector_width)
+        , nmodl_inline(nmodl_inline) {}
 
     const InstanceVarHelper& get_instance_var_helper() {
         return instance_var_helper;
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 6df5820d42..515949e329 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -770,7 +770,7 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     //   - convert function and procedure blocks into CodegenFunctions
     //   - gather information about AST. For now, information about functions
     //     and procedures is used only.
-    CodegenLLVMHelperVisitor v{vector_width};
+    CodegenLLVMHelperVisitor v{vector_width, nmodl_inline};
     const auto& functions = v.get_codegen_functions(node);
     instance_var_helper = v.get_instance_var_helper();
     sym_tab = node.get_symbol_table();
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index c3beb53640..cbc0f9b949 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -69,6 +69,9 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     /// Output directory for code generation.
     std::string output_dir;
 
+    /// Variable to check if Functions and Procedures are inlined by NMODL passes
+    bool nmodl_inline;
+
   private:
     /// Underlying LLVM context.
     std::unique_ptr<llvm::LLVMContext> context = std::make_unique<llvm::LLVMContext>();
@@ -114,9 +117,11 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
                        int vector_width = 1,
                        std::string vec_lib = "none",
                        bool add_debug_information = false,
-                       std::vector<std::string> fast_math_flags = {})
+                       std::vector<std::string> fast_math_flags = {},
+                       bool nmodl_inline = false)
         : mod_filename(mod_filename)
         , output_dir(output_dir)
+        , nmodl_inline(nmodl_inline)
         , opt_passes(opt_passes)
         , vector_width(vector_width)
         , vector_library(veclib_map.at(vec_lib))
diff --git a/src/main.cpp b/src/main.cpp
index b0b16d145d..af018d9fff 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -697,7 +697,8 @@ int main(int argc, const char* argv[]) {
                                            llvm_vec_width,
                                            vector_library,
                                            !disable_debug_information,
-                                           llvm_fast_math_flags);
+                                           llvm_fast_math_flags,
+                                           nmodl_inline);
                 visitor.visit_program(*ast);
                 ast_to_nmodl(*ast, filepath("llvm", "mod"));
                 ast_to_json(*ast, filepath("llvm", "json"));
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index cf5dd1db16..b43c6bee8e 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -16,6 +16,7 @@
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "parser/nmodl_driver.hpp"
 #include "visitors/checkparent_visitor.hpp"
+#include "visitors/inline_visitor.hpp"
 #include "visitors/neuron_solve_visitor.hpp"
 #include "visitors/solve_block_visitor.hpp"
 #include "visitors/symtab_visitor.hpp"
@@ -38,11 +39,15 @@ std::string run_llvm_visitor(const std::string& text,
                              bool use_single_precision = false,
                              int vector_width = 1,
                              std::string vec_lib = "none",
-                             std::vector<std::string> fast_math_flags = {}) {
+                             std::vector<std::string> fast_math_flags = {},
+                             bool nmodl_inline = false) {
     NmodlDriver driver;
     const auto& ast = driver.parse_string(text);
 
     SymtabVisitor().visit_program(*ast);
+    if (nmodl_inline) {
+        InlineVisitor().visit_program(*ast);
+    }
     NeuronSolveVisitor().visit_program(*ast);
     SolveBlockVisitor().visit_program(*ast);
 
@@ -53,7 +58,9 @@ std::string run_llvm_visitor(const std::string& text,
                                              vector_width,
                                              vec_lib,
                                              /*add_debug_information=*/false,
-                                             fast_math_flags);
+                                             fast_math_flags,
+                                             nmodl_inline);
+
     llvm_visitor.visit_program(*ast);
     return llvm_visitor.dump_module();
 }
@@ -71,7 +78,7 @@ std::vector<std::shared_ptr<ast::Ast>> run_llvm_visitor_helper(
 
     SymtabVisitor().visit_program(*ast);
     SolveBlockVisitor().visit_program(*ast);
-    CodegenLLVMHelperVisitor(vector_width).visit_program(*ast);
+    CodegenLLVMHelperVisitor(vector_width, /*nmodl_inline=*/false).visit_program(*ast);
 
     const auto& nodes = collect_nodes(*ast, nodes_to_collect);
 
@@ -1436,3 +1443,56 @@ SCENARIO("Dead code removal", "[visitor][llvm][opt]") {
         }
     }
 }
+
+//=============================================================================
+// Inlining: remove inline code blocks
+//=============================================================================
+
+SCENARIO("Removal of inlined functions and procedures", "[visitor][llvm][inline]") {
+    GIVEN("Simple breakpoint block calling a function and a procedure") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test_inline
+                RANGE a, b, s
+            }
+            ASSIGNED {
+                a
+                b
+                s
+            }
+            PROCEDURE test_add(a, b) {
+                LOCAL i
+                i = a + b
+            }
+            FUNCTION test_sub(a, b) {
+                test_sub = a - b
+            }
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+            }
+            DERIVATIVE states {
+                a = 1
+                b = 2
+                test_add(a, b)
+                s = test_sub(a, b)
+            }
+        )";
+
+        THEN("when the code is inlined the procedure and function blocks are removed") {
+            std::string module_string = run_llvm_visitor(nmodl_text,
+                                                         /*opt=*/false,
+                                                         /*use_single_precision=*/false,
+                                                         /*vector_width=*/1,
+                                                         /*vec_lib=*/"none",
+                                                         /*fast_math_flags=*/{},
+                                                         /*nmodl_inline=*/true);
+            std::smatch m;
+
+            // Check if the procedure and function declarations are removed
+            std::regex add_proc(R"(define i32 @test_add\(double %a[0-9].*, double %b[0-9].*\))");
+            REQUIRE(!std::regex_search(module_string, m, add_proc));
+            std::regex sub_func(R"(define double @test_sub\(double %a[0-9].*, double %b[0-9].*\))");
+            REQUIRE(!std::regex_search(module_string, m, sub_func));
+        }
+    }
+}

From 30e53c7ef5b75f0dd904848dfae5d94355d37a0e Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Fri, 21 May 2021 08:21:07 -0700
Subject: [PATCH 065/105] Fixed typo in benchmarking metrics (#665)

---
 test/benchmark/llvm_benchmark.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/benchmark/llvm_benchmark.cpp b/test/benchmark/llvm_benchmark.cpp
index b9f2fdeced..e48df0d457 100644
--- a/test/benchmark/llvm_benchmark.cpp
+++ b/test/benchmark/llvm_benchmark.cpp
@@ -144,7 +144,7 @@ void LLVMBenchmark::run_benchmark(const std::shared_ptr<ast::Program>& node) {
         logger->info("Compute time variance = {:g}",
                      time_squared_sum / num_experiments - time_mean * time_mean);
         logger->info("Minimum compute time = {:.6f}", time_min);
-        logger->info("Minimum compute time = {:.6f}\n", time_max);
+        logger->info("Maximum compute time = {:.6f}\n", time_max);
     }
 }
 

From 0362f6667fc892a61da173363d7b7480af092923 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Fri, 21 May 2021 23:04:19 +0200
Subject: [PATCH 066/105] Remove only inlined blocks from AST based on symtab
 properties (#668)

---
 .../llvm/codegen_llvm_helper_visitor.cpp      | 36 ++++++++++---------
 .../llvm/codegen_llvm_helper_visitor.hpp      | 11 +++---
 src/codegen/llvm/codegen_llvm_visitor.cpp     |  2 +-
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  7 +---
 src/main.cpp                                  |  3 +-
 test/unit/codegen/codegen_llvm_ir.cpp         |  5 ++-
 6 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 0725760d8a..9e3d9eedef 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -10,6 +10,7 @@
 
 #include "ast/all.hpp"
 #include "codegen/codegen_helper_visitor.hpp"
+#include "symtab/symbol_table.hpp"
 #include "utils/logger.hpp"
 #include "visitors/rename_visitor.hpp"
 #include "visitors/visitor_utils.hpp"
@@ -19,6 +20,8 @@ namespace codegen {
 
 using namespace fmt::literals;
 
+using symtab::syminfo::Status;
+
 /// initialize static member variables
 const ast::AstNodeType CodegenLLVMHelperVisitor::INTEGER_TYPE = ast::AstNodeType::INTEGER;
 const ast::AstNodeType CodegenLLVMHelperVisitor::FLOAT_TYPE = ast::AstNodeType::DOUBLE;
@@ -539,17 +542,11 @@ void CodegenLLVMHelperVisitor::rename_local_variables(ast::StatementBlock& node)
 
 
 void CodegenLLVMHelperVisitor::visit_procedure_block(ast::ProcedureBlock& node) {
-    // if the Procedure block is already inlined, there is no reason to generate the LLVM IR code
-    if (nmodl_inline)
-        return;
     node.visit_children(*this);
     create_function_for_node(node);
 }
 
 void CodegenLLVMHelperVisitor::visit_function_block(ast::FunctionBlock& node) {
-    // if the Function block is already inlined, there is no reason to generate the LLVM IR code
-    if (nmodl_inline)
-        return;
     node.visit_children(*this);
     create_function_for_node(node);
 }
@@ -782,6 +779,21 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     std::cout << nmodl::to_nmodl(function) << std::endl;
 }
 
+void CodegenLLVMHelperVisitor::remove_inlined_nodes(ast::Program& node) {
+    auto program_symtab = node.get_model_symbol_table();
+    const auto& func_proc_nodes =
+        collect_nodes(node, {ast::AstNodeType::FUNCTION_BLOCK, ast::AstNodeType::PROCEDURE_BLOCK});
+    std::unordered_set<ast::Node*> nodes_to_erase;
+    for (const auto& ast_node: func_proc_nodes) {
+        if (program_symtab->lookup(ast_node->get_node_name())
+                .get()
+                ->has_all_status(Status::inlined)) {
+            nodes_to_erase.insert(static_cast<ast::Node*>(ast_node.get()));
+        }
+    }
+    node.erase_node(nodes_to_erase);
+}
+
 void CodegenLLVMHelperVisitor::visit_program(ast::Program& node) {
     /// run codegen helper visitor to collect information
     CodegenHelperVisitor v;
@@ -791,21 +803,11 @@ void CodegenLLVMHelperVisitor::visit_program(ast::Program& node) {
     node.emplace_back_node(instance_var_helper.instance);
 
     logger->info("Running CodegenLLVMHelperVisitor");
+    remove_inlined_nodes(node);
     node.visit_children(*this);
     for (auto& fun: codegen_functions) {
         node.emplace_back_node(fun);
     }
-    // Remove Function and Procedure blocks from the Program since they are already inlined
-    if (nmodl_inline) {
-        const auto& func_proc_nodes =
-            collect_nodes(node,
-                          {ast::AstNodeType::FUNCTION_BLOCK, ast::AstNodeType::PROCEDURE_BLOCK});
-        std::unordered_set<ast::Node*> nodes_to_erase;
-        for (const auto& ast_node: func_proc_nodes) {
-            nodes_to_erase.insert(static_cast<ast::Node*>(ast_node.get()));
-        }
-        node.erase_node(nodes_to_erase);
-    }
 }
 
 
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index 3619cbc32e..9d79e24803 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -103,9 +103,6 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     /// explicit vectorisation width
     int vector_width;
 
-    /// variable to check whether Function and Procedures blocks are inline by NMODL passes
-    bool nmodl_inline;
-
     /// newly generated code generation specific functions
     CodegenFunctionVector codegen_functions;
 
@@ -137,9 +134,8 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     static const std::string VOLTAGE_VAR;
     static const std::string NODE_INDEX_VAR;
 
-    CodegenLLVMHelperVisitor(int vector_width, bool nmodl_inline)
-        : vector_width(vector_width)
-        , nmodl_inline(nmodl_inline) {}
+    CodegenLLVMHelperVisitor(int vector_width)
+        : vector_width(vector_width) {}
 
     const InstanceVarHelper& get_instance_var_helper() {
         return instance_var_helper;
@@ -169,6 +165,9 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     void convert_local_statement(ast::StatementBlock& node);
     void rename_local_variables(ast::StatementBlock& node);
 
+    /// Remove Function and Procedure blocks from the node since they are already inlined
+    void remove_inlined_nodes(ast::Program& node);
+
     void visit_procedure_block(ast::ProcedureBlock& node) override;
     void visit_function_block(ast::FunctionBlock& node) override;
     void visit_nrn_state_block(ast::NrnStateBlock& node) override;
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 515949e329..6df5820d42 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -770,7 +770,7 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     //   - convert function and procedure blocks into CodegenFunctions
     //   - gather information about AST. For now, information about functions
     //     and procedures is used only.
-    CodegenLLVMHelperVisitor v{vector_width, nmodl_inline};
+    CodegenLLVMHelperVisitor v{vector_width};
     const auto& functions = v.get_codegen_functions(node);
     instance_var_helper = v.get_instance_var_helper();
     sym_tab = node.get_symbol_table();
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index cbc0f9b949..c3beb53640 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -69,9 +69,6 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     /// Output directory for code generation.
     std::string output_dir;
 
-    /// Variable to check if Functions and Procedures are inlined by NMODL passes
-    bool nmodl_inline;
-
   private:
     /// Underlying LLVM context.
     std::unique_ptr<llvm::LLVMContext> context = std::make_unique<llvm::LLVMContext>();
@@ -117,11 +114,9 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
                        int vector_width = 1,
                        std::string vec_lib = "none",
                        bool add_debug_information = false,
-                       std::vector<std::string> fast_math_flags = {},
-                       bool nmodl_inline = false)
+                       std::vector<std::string> fast_math_flags = {})
         : mod_filename(mod_filename)
         , output_dir(output_dir)
-        , nmodl_inline(nmodl_inline)
         , opt_passes(opt_passes)
         , vector_width(vector_width)
         , vector_library(veclib_map.at(vec_lib))
diff --git a/src/main.cpp b/src/main.cpp
index af018d9fff..b0b16d145d 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -697,8 +697,7 @@ int main(int argc, const char* argv[]) {
                                            llvm_vec_width,
                                            vector_library,
                                            !disable_debug_information,
-                                           llvm_fast_math_flags,
-                                           nmodl_inline);
+                                           llvm_fast_math_flags);
                 visitor.visit_program(*ast);
                 ast_to_nmodl(*ast, filepath("llvm", "mod"));
                 ast_to_json(*ast, filepath("llvm", "json"));
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index b43c6bee8e..cde67ef97c 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -58,8 +58,7 @@ std::string run_llvm_visitor(const std::string& text,
                                              vector_width,
                                              vec_lib,
                                              /*add_debug_information=*/false,
-                                             fast_math_flags,
-                                             nmodl_inline);
+                                             fast_math_flags);
 
     llvm_visitor.visit_program(*ast);
     return llvm_visitor.dump_module();
@@ -78,7 +77,7 @@ std::vector<std::shared_ptr<ast::Ast>> run_llvm_visitor_helper(
 
     SymtabVisitor().visit_program(*ast);
     SolveBlockVisitor().visit_program(*ast);
-    CodegenLLVMHelperVisitor(vector_width, /*nmodl_inline=*/false).visit_program(*ast);
+    CodegenLLVMHelperVisitor(vector_width).visit_program(*ast);
 
     const auto& nodes = collect_nodes(*ast, nodes_to_collect);
 

From 480f26e398aac29e62295f2e8e6a51a31dac231e Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Tue, 25 May 2021 12:22:14 +0200
Subject: [PATCH 067/105] Use VarName on the RHS of assignment expression
 (#669)

- NMODL parser uses VarName on the LHS of assignment expression
- Inline visitor was using Name on the LHS of assignment expression

Related to #667
---
 src/visitors/inline_visitor.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/visitors/inline_visitor.cpp b/src/visitors/inline_visitor.cpp
index b72c9cbdb9..3f0dd8e6c6 100644
--- a/src/visitors/inline_visitor.cpp
+++ b/src/visitors/inline_visitor.cpp
@@ -305,6 +305,8 @@ void InlineVisitor::visit_statement_block(StatementBlock& node) {
 /** Visit all wrapped expressions which can contain function calls.
  *  If a function call is replaced then the wrapped expression is
  *  also replaced with new variable node from the inlining result.
+ *  Note that we use `VarName` so that LHS of assignment expression
+ *  is `VarName`, similar to parser.
  */
 void InlineVisitor::visit_wrapped_expression(WrappedExpression& node) {
     node.visit_children(*this);
@@ -313,7 +315,9 @@ void InlineVisitor::visit_wrapped_expression(WrappedExpression& node) {
         auto expression = dynamic_cast<FunctionCall*>(e.get());
         if (replaced_fun_calls.find(expression) != replaced_fun_calls.end()) {
             auto var = replaced_fun_calls[expression];
-            node.set_expression(std::make_shared<Name>(new String(var)));
+            node.set_expression(std::make_shared<VarName>(new Name(new String(var)),
+                                                          /*at=*/nullptr,
+                                                          /*index=*/nullptr));
         }
     }
 }

From b907544e08236febd27e1b23cc040cd581934cec Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Sun, 30 May 2021 07:07:19 -0700
Subject: [PATCH 068/105] [LLVM] SLEEF and libsystem_m vector libraries support
 (#674)

* Added support for `libsystem_m` and `SLEEF` vector libraries. The
first one is supported by LLVM internally, so it comes for free with
LLVM 13. For `SLEEF`, basic support was added for AArch64 and
x86 architectures. Currently, we support
- `exp`
- `pow`

* Added corresponding IR checks for `libsystem_m` and
`SLEEF` (both AArch64 and x86).

* Updated LLVM binaries for MAC OS CI, as well as for latest LLVM 13
(trunk) to fix link errors for Darwin vector library.

Co-authored-by: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
---
 azure-pipelines.yml                       |  5 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp | 79 +++++++++++++++++++----
 src/codegen/llvm/codegen_llvm_visitor.hpp | 21 +++---
 src/main.cpp                              |  2 +-
 test/unit/codegen/codegen_llvm_ir.cpp     | 30 +++++++++
 5 files changed, 111 insertions(+), 26 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index f8cac1d6ad..ec31765d6e 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -194,13 +194,13 @@ stages:
       displayName: 'Install Dependencies'
     - script: |
         cd $HOME
-        git clone https://github.com/pramodk/llvm-nightly.git
+        git clone --depth 1 https://github.com/pramodk/llvm-nightly.git
       displayName: 'Setup LLVM v13'
     - script: |
         export PATH=/usr/local/opt/flex/bin:/usr/local/opt/bison/bin:$PATH;
         mkdir -p $(Build.Repository.LocalPath)/build
         cd $(Build.Repository.LocalPath)/build
-        cmake .. -DPYTHON_EXECUTABLE=$(which python3) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=RelWithDebInfo -DNMODL_ENABLE_PYTHON_BINDINGS=OFF -DLLVM_DIR=$HOME/llvm-nightly/0421/osx/lib/cmake/llvm -DNMODL_ENABLE_LLVM=ON
+        cmake .. -DPYTHON_EXECUTABLE=$(which python3) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=RelWithDebInfo -DNMODL_ENABLE_PYTHON_BINDINGS=OFF -DLLVM_DIR=$HOME/llvm-nightly/0621/osx/lib/cmake/llvm -DNMODL_ENABLE_LLVM=ON
         make -j 2
         if [ $? -ne 0 ]
         then
@@ -248,6 +248,7 @@ stages:
       displayName: 'Build Neuron and Run Integration Tests'
   - job: 'manylinux_wheels'
     timeoutInMinutes: 45
+    condition: eq(1,2)
     pool:
       vmImage: 'ubuntu-20.04'
     strategy:
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 6df5820d42..1e5ca89c6d 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -65,6 +65,68 @@ static bool can_vectorize(const ast::CodegenForStatement& statement, symtab::Sym
     return unsupported.empty() && supported.size() <= 1;
 }
 
+#if LLVM_VERSION_MAJOR >= 13
+void CodegenLLVMVisitor::add_vectorizable_functions_from_vec_lib(llvm::TargetLibraryInfoImpl& tli,
+                                                                 llvm::Triple& triple) {
+    // Since LLVM does not support SLEEF as a vector library yet, process it separately.
+    if (vector_library == "SLEEF") {
+        // Populate function definitions of only exp and pow (for now)
+#define FIXED(w)                        llvm::ElementCount::getFixed(w)
+#define DISPATCH(func, vec_func, width) {func, vec_func, width},
+        const llvm::VecDesc aarch64_functions[] = {
+            // clang-format off
+            DISPATCH("llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4))
+            DISPATCH("llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2))
+            DISPATCH("llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4))
+            DISPATCH("llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2))
+            // clang-format on
+        };
+        const llvm::VecDesc x86_functions[] = {
+            // clang-format off
+            DISPATCH("llvm.exp.f64", "_ZGVbN2v_exp", FIXED(2))
+            DISPATCH("llvm.exp.f64", "_ZGVdN4v_exp", FIXED(4))
+            DISPATCH("llvm.exp.f64", "_ZGVeN8v_exp", FIXED(8))
+            DISPATCH("llvm.pow.f64", "_ZGVbN2vv_pow", FIXED(2))
+            DISPATCH("llvm.pow.f64", "_ZGVdN4vv_pow", FIXED(4))
+            DISPATCH("llvm.pow.f64", "_ZGVeN8vv_pow", FIXED(8))
+            // clang-format on
+        };
+#undef DISPATCH
+
+        if (triple.isAArch64()) {
+            tli.addVectorizableFunctions(aarch64_functions);
+        }
+        if (triple.isX86() && triple.isArch64Bit()) {
+            tli.addVectorizableFunctions(x86_functions);
+        }
+
+    } else {
+        // A map to query vector library by its string value.
+        using VecLib = llvm::TargetLibraryInfoImpl::VectorLibrary;
+        static const std::map<std::string, VecLib> llvm_supported_vector_libraries = {
+            {"Accelerate", VecLib::Accelerate},
+            {"libmvec", VecLib::LIBMVEC_X86},
+            {"libsystem_m", VecLib ::DarwinLibSystemM},
+            {"MASSV", VecLib::MASSV},
+            {"none", VecLib::NoLibrary},
+            {"SVML", VecLib::SVML}};
+        const auto& library = llvm_supported_vector_libraries.find(vector_library);
+        if (library == llvm_supported_vector_libraries.end())
+            throw std::runtime_error("Error: unknown vector library - " + vector_library + "\n");
+
+        // Add vectorizable functions to the target library info.
+        switch (library->second) {
+        case VecLib::LIBMVEC_X86:
+            if (!triple.isX86() || !triple.isArch64Bit())
+                break;
+        default:
+            tli.addVectorizableFunctionsFromVecLib(library->second);
+            break;
+        }
+    }
+}
+#endif
+
 llvm::Value* CodegenLLVMVisitor::accept_and_get(const std::shared_ptr<ast::Node>& node) {
     node->accept(*this);
     return ir_builder.pop_last_value();
@@ -817,25 +879,20 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
         run_ir_opt_passes();
     }
 
-    // Optionally, replace LLVM's maths intrinsics with vector library calls.
-    if (vector_width > 1 && vector_library != llvm::TargetLibraryInfoImpl::NoLibrary) {
+    // Optionally, replace LLVM math intrinsics with vector library calls.
+    if (vector_width > 1) {
 #if LLVM_VERSION_MAJOR < 13
         logger->warn(
             "This version of LLVM does not support replacement of LLVM intrinsics with vector "
             "library calls");
 #else
-        // First, get the target library information.
+        // First, get the target library information and add vectorizable functions for the
+        // specified vector library.
         llvm::Triple triple(llvm::sys::getDefaultTargetTriple());
         llvm::TargetLibraryInfoImpl target_lib_info = llvm::TargetLibraryInfoImpl(triple);
+        add_vectorizable_functions_from_vec_lib(target_lib_info, triple);
 
-        // Populate target library information with vectorisable functions. Since libmvec is
-        // supported for x86_64 only, have a check to catch other architectures.
-        if (vector_library != llvm::TargetLibraryInfoImpl::LIBMVEC_X86 ||
-            (triple.isX86() && triple.isArch64Bit())) {
-            target_lib_info.addVectorizableFunctionsFromVecLib(vector_library);
-        }
-
-        // Run the codegen optimisation passes that replace maths intrinsics.
+        // Run passes that replace math intrinsics.
         codegen_pm.add(new llvm::TargetLibraryInfoWrapperPass(target_lib_info));
         codegen_pm.add(new llvm::ReplaceWithVeclibLegacy);
         codegen_pm.doInitialization();
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index c3beb53640..49285f9941 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -48,15 +48,6 @@ namespace codegen {
  * @{
  */
 
-/// A map to query vector library by its string value.
-static const std::map<std::string, llvm::TargetLibraryInfoImpl::VectorLibrary> veclib_map = {
-    {"Accelerate", llvm::TargetLibraryInfoImpl::Accelerate},
-#if LLVM_VERSION_MAJOR >= 13
-    {"libmvec", llvm::TargetLibraryInfoImpl::LIBMVEC_X86},
-#endif
-    {"MASSV", llvm::TargetLibraryInfoImpl::MASSV},
-    {"SVML", llvm::TargetLibraryInfoImpl::SVML},
-    {"none", llvm::TargetLibraryInfoImpl::NoLibrary}};
 
 /**
  * \class CodegenLLVMVisitor
@@ -100,8 +91,8 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     /// Pass manager for optimisation passes that are used for target code generation.
     llvm::legacy::FunctionPassManager codegen_pm;
 
-    /// Vector library used for maths functions.
-    llvm::TargetLibraryInfoImpl::VectorLibrary vector_library;
+    /// Vector library used for math functions.
+    std::string vector_library;
 
     /// Explicit vectorisation width.
     int vector_width;
@@ -119,7 +110,7 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
         , output_dir(output_dir)
         , opt_passes(opt_passes)
         , vector_width(vector_width)
-        , vector_library(veclib_map.at(vec_lib))
+        , vector_library(vec_lib)
         , add_debug_information(add_debug_information)
         , ir_builder(*context, use_single_precision, vector_width, fast_math_flags)
         , debug_builder(*module)
@@ -183,6 +174,12 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void wrap_kernel_functions();
 
   private:
+#if LLVM_VERSION_MAJOR >= 13
+    /// Populates target library info with the vector library definitions.
+    void add_vectorizable_functions_from_vec_lib(llvm::TargetLibraryInfoImpl& tli,
+                                                 llvm::Triple& triple);
+#endif
+
     /// Accepts the given AST node and returns the processed value.
     llvm::Value* accept_and_get(const std::shared_ptr<ast::Node>& node);
 
diff --git a/src/main.cpp b/src/main.cpp
index b0b16d145d..22662aafb7 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -349,7 +349,7 @@ int main(int argc, const char* argv[]) {
         fmt::format("LLVM explicit vectorisation width ({})", llvm_vec_width))->ignore_case();
     llvm_opt->add_option("--veclib",
                          vector_library,
-                         fmt::format("Vector library for maths functions ({})", vector_library))->check(CLI::IsMember({"Accelerate", "libmvec", "MASSV", "SVML", "none"}));
+                         fmt::format("Vector library for maths functions ({})", vector_library))->check(CLI::IsMember({"Accelerate", "libsystem_m", "libmvec", "MASSV", "SLEEF", "SVML", "none"}));
     llvm_opt->add_option("--fmf",
                          llvm_fast_math_flags,
                          "Fast math flags for floating-point optimizations (none)")->check(CLI::IsMember({"afn", "arcp", "contract", "ninf", "nnan", "nsz", "reassoc", "fast"}));
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index cde67ef97c..88458ebd86 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -1382,6 +1382,36 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
             REQUIRE(std::regex_search(accelerate_library_module_str, m, accelerate_exp_decl));
             REQUIRE(std::regex_search(accelerate_library_module_str, m, accelerate_exp_call));
             REQUIRE(!std::regex_search(accelerate_library_module_str, m, fexp_call));
+
+            // Check correct replacement of @llvm.exp.v2f64 into @_ZGV?N?v_exp when using SLEEF.
+            std::string sleef_library_module_str = run_llvm_visitor(nmodl_text,
+                                                                    /*opt=*/false,
+                                                                    /*use_single_precision=*/false,
+                                                                    /*vector_width=*/2,
+                                                                    /*vec_lib=*/"SLEEF");
+#if defined(__arm64__) || defined(__aarch64__)
+            std::regex sleef_exp_decl(R"(declare <2 x double> @_ZGVnN2v_exp\(<2 x double>\))");
+            std::regex sleef_exp_call(R"(call <2 x double> @_ZGVnN2v_exp\(<2 x double> .*\))");
+#else
+            std::regex sleef_exp_decl(R"(declare <2 x double> @_ZGVbN2v_exp\(<2 x double>\))");
+            std::regex sleef_exp_call(R"(call <2 x double> @_ZGVbN2v_exp\(<2 x double> .*\))");
+#endif
+            REQUIRE(std::regex_search(sleef_library_module_str, m, sleef_exp_decl));
+            REQUIRE(std::regex_search(sleef_library_module_str, m, sleef_exp_call));
+            REQUIRE(!std::regex_search(sleef_library_module_str, m, fexp_call));
+
+            // Check the replacements when using Darwin's libsystem_m.
+            std::string libsystem_m_library_module_str =
+                run_llvm_visitor(nmodl_text,
+                                 /*opt=*/false,
+                                 /*use_single_precision=*/true,
+                                 /*vector_width=*/4,
+                                 /*vec_lib=*/"libsystem_m");
+            std::regex libsystem_m_exp_decl(R"(declare <4 x float> @_simd_exp_f4\(<4 x float>\))");
+            std::regex libsystem_m_exp_call(R"(call <4 x float> @_simd_exp_f4\(<4 x float> .*\))");
+            REQUIRE(std::regex_search(libsystem_m_library_module_str, m, libsystem_m_exp_decl));
+            REQUIRE(std::regex_search(libsystem_m_library_module_str, m, libsystem_m_exp_call));
+            REQUIRE(!std::regex_search(libsystem_m_library_module_str, m, fexp_call));
 #endif
         }
     }

From a1c4b0f8fa0c387db1e97e41626ba086667f9f1c Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 3 Jun 2021 00:01:58 -0700
Subject: [PATCH 069/105] [LLVM] Enhancements for optimization pipeline  (#683)

Added several improvements to the way optimizations are run
for the LLVM code generation pipeline and benchmarking.

1. Created `llvm_utils` files that currently have logic/implementations
for optimizing the IR. In future, things like dumping IR to file will also go
there. This allows to share optimizing infrastructure between benchmarking
and LLVM visitor.

2. Replaced`--opt` with `--opt-level-ir` for LLVM visitor. The `--opt` option
was duplicated by `--opt-level-ir` in the benchmarking infrastructure.
With new `llvm_utils` package, we can simply reuse the optimizing routines
and use optimization levels instead.

3. Added IPO and AggressiveInstCombine passes

Importantly, if running the benchmark, the IR is still optimized after the
`targetMachine` is created to benefit from target-specific optimizations.

Example:
```bash
bin/nmodl test.mod llvm --ir --single-precision --vector-width 4 --opt-level-ir 3 \
                                  benchmark --run --opt-level-codegen 3
```
Co-authored-by: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
---
 cmake/LLVMHelper.cmake                        |  1 +
 src/codegen/llvm/CMakeLists.txt               |  4 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 37 +++------
 src/codegen/llvm/codegen_llvm_visitor.hpp     | 27 ++-----
 src/codegen/llvm/llvm_utils.cpp               | 79 +++++++++++++++++++
 src/codegen/llvm/llvm_utils.hpp               | 23 ++++++
 src/codegen/llvm/main.cpp                     |  2 +-
 src/main.cpp                                  | 18 ++---
 test/benchmark/jit_driver.cpp                 | 65 +--------------
 test/benchmark/jit_driver.hpp                 |  2 +-
 test/benchmark/llvm_benchmark.hpp             |  2 +-
 test/unit/codegen/codegen_llvm_execution.cpp  | 12 +--
 .../codegen/codegen_llvm_instance_struct.cpp  |  6 +-
 test/unit/codegen/codegen_llvm_ir.cpp         | 38 ++++-----
 14 files changed, 164 insertions(+), 152 deletions(-)
 create mode 100644 src/codegen/llvm/llvm_utils.cpp
 create mode 100644 src/codegen/llvm/llvm_utils.hpp

diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake
index 780ae29cfa..9e4af5d503 100644
--- a/cmake/LLVMHelper.cmake
+++ b/cmake/LLVMHelper.cmake
@@ -6,6 +6,7 @@ find_package(LLVM REQUIRED CONFIG)
 
 # include LLVM libraries
 set(NMODL_LLVM_COMPONENTS
+    aggressiveinstcombine
     analysis
     codegen
     core
diff --git a/src/codegen/llvm/CMakeLists.txt b/src/codegen/llvm/CMakeLists.txt
index 70398de185..8cf03ee67a 100644
--- a/src/codegen/llvm/CMakeLists.txt
+++ b/src/codegen/llvm/CMakeLists.txt
@@ -9,7 +9,9 @@ set(LLVM_CODEGEN_SOURCE_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_debug_builder.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_debug_builder.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_ir_builder.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_ir_builder.hpp)
+    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_ir_builder.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_utils.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_utils.hpp)
 
 # =============================================================================
 # LLVM codegen library and executable
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 1e5ca89c6d..ffbedbb063 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -6,6 +6,7 @@
  *************************************************************************/
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "codegen/llvm/llvm_utils.hpp"
 
 #include "ast/all.hpp"
 #include "visitors/rename_visitor.hpp"
@@ -15,6 +16,7 @@
 #include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Host.h"
@@ -431,25 +433,6 @@ llvm::Value* CodegenLLVMVisitor::read_variable(const ast::VarName& node) {
                              "' is not supported\n");
 }
 
-void CodegenLLVMVisitor::run_ir_opt_passes() {
-    // Run some common optimisation passes that are commonly suggested.
-    opt_pm.add(llvm::createInstructionCombiningPass());
-    opt_pm.add(llvm::createReassociatePass());
-    opt_pm.add(llvm::createGVNPass());
-    opt_pm.add(llvm::createCFGSimplificationPass());
-
-    // Initialize pass manager.
-    opt_pm.doInitialization();
-
-    // Iterate over all functions and run the optimisation passes.
-    auto& functions = module->getFunctionList();
-    for (auto& function: functions) {
-        llvm::verifyFunction(function);
-        opt_pm.run(function);
-    }
-    opt_pm.doFinalization();
-}
-
 void CodegenLLVMVisitor::write_to_variable(const ast::VarName& node, llvm::Value* value) {
     const auto& identifier = node.get_name();
     if (!identifier->is_name() && !identifier->is_indexed_name() &&
@@ -874,9 +857,10 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
         throw std::runtime_error("Error: incorrect IR has been generated!\n" + ostream.str());
     }
 
-    if (opt_passes) {
+    if (opt_level_ir) {
         logger->info("Running LLVM optimisation passes");
-        run_ir_opt_passes();
+        utils::initialise_optimisation_passes();
+        utils::optimise_module(*module, opt_level_ir);
     }
 
     // Optionally, replace LLVM math intrinsics with vector library calls.
@@ -893,14 +877,15 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
         add_vectorizable_functions_from_vec_lib(target_lib_info, triple);
 
         // Run passes that replace math intrinsics.
-        codegen_pm.add(new llvm::TargetLibraryInfoWrapperPass(target_lib_info));
-        codegen_pm.add(new llvm::ReplaceWithVeclibLegacy);
-        codegen_pm.doInitialization();
+        llvm::legacy::FunctionPassManager fpm(module.get());
+        fpm.add(new llvm::TargetLibraryInfoWrapperPass(target_lib_info));
+        fpm.add(new llvm::ReplaceWithVeclibLegacy);
+        fpm.doInitialization();
         for (auto& function: module->getFunctionList()) {
             if (!function.isDeclaration())
-                codegen_pm.run(function);
+                fpm.run(function);
         }
-        codegen_pm.doFinalization();
+        fpm.doFinalization();
 #endif
     }
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 49285f9941..5dd8eda15c 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -28,12 +28,8 @@
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
-#include "llvm/Transforms/InstCombine/InstCombine.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/GVN.h"
 
 namespace nmodl {
 namespace codegen {
@@ -82,14 +78,8 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     /// Instance variable helper.
     InstanceVarHelper instance_var_helper;
 
-    /// Run optimisation passes if true.
-    bool opt_passes;
-
-    /// Pass manager for optimisation passes that are run on IR and are not related to target.
-    llvm::legacy::FunctionPassManager opt_pm;
-
-    /// Pass manager for optimisation passes that are used for target code generation.
-    llvm::legacy::FunctionPassManager codegen_pm;
+    /// Optimisation level for LLVM IR transformations.
+    int opt_level_ir;
 
     /// Vector library used for math functions.
     std::string vector_library;
@@ -100,7 +90,7 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
   public:
     CodegenLLVMVisitor(const std::string& mod_filename,
                        const std::string& output_dir,
-                       bool opt_passes,
+                       int opt_level_ir,
                        bool use_single_precision = false,
                        int vector_width = 1,
                        std::string vec_lib = "none",
@@ -108,14 +98,12 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
                        std::vector<std::string> fast_math_flags = {})
         : mod_filename(mod_filename)
         , output_dir(output_dir)
-        , opt_passes(opt_passes)
+        , opt_level_ir(opt_level_ir)
         , vector_width(vector_width)
         , vector_library(vec_lib)
         , add_debug_information(add_debug_information)
         , ir_builder(*context, use_single_precision, vector_width, fast_math_flags)
-        , debug_builder(*module)
-        , codegen_pm(module.get())
-        , opt_pm(module.get()) {}
+        , debug_builder(*module) {}
 
     /// Dumps the generated LLVM IR module to string.
     std::string dump_module() const {
@@ -228,11 +216,6 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     /// Reads the given variable and returns the processed value.
     llvm::Value* read_variable(const ast::VarName& node);
 
-
-    /// Run multiple LLVM optimisation passes on generated IR.
-    /// TODO: this can be moved to a dedicated file or deprecated.
-    void run_ir_opt_passes();
-
     //// Writes the value to the given variable.
     void write_to_variable(const ast::VarName& node, llvm::Value* value);
 };
diff --git a/src/codegen/llvm/llvm_utils.cpp b/src/codegen/llvm/llvm_utils.cpp
new file mode 100644
index 0000000000..684f962b76
--- /dev/null
+++ b/src/codegen/llvm/llvm_utils.cpp
@@ -0,0 +1,79 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include "codegen/llvm/llvm_utils.hpp"
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+
+namespace nmodl {
+namespace utils {
+
+/// Populates pass managers with passes for the given optimisation levels.
+static void populate_pms(llvm::legacy::FunctionPassManager& func_pm,
+                         llvm::legacy::PassManager& module_pm,
+                         int opt_level,
+                         int size_level,
+                         llvm::TargetMachine* tm) {
+    // First, set the pass manager builder with some basic optimisation information.
+    llvm::PassManagerBuilder pm_builder;
+    pm_builder.OptLevel = opt_level;
+    pm_builder.SizeLevel = size_level;
+    pm_builder.DisableUnrollLoops = opt_level == 0;
+
+    // If target machine is defined, then initialise the TargetTransformInfo for the target.
+    if (tm) {
+        module_pm.add(createTargetTransformInfoWrapperPass(tm->getTargetIRAnalysis()));
+        func_pm.add(createTargetTransformInfoWrapperPass(tm->getTargetIRAnalysis()));
+    }
+
+    // Populate pass managers.
+    pm_builder.populateModulePassManager(module_pm);
+    pm_builder.populateFunctionPassManager(func_pm);
+}
+
+/// Runs the function and module passes on the provided module.
+static void run_optimisation_passes(llvm::Module& module,
+                                    llvm::legacy::FunctionPassManager& func_pm,
+                                    llvm::legacy::PassManager& module_pm) {
+    func_pm.doInitialization();
+    auto& functions = module.getFunctionList();
+    for (auto& function: functions) {
+        llvm::verifyFunction(function);
+        func_pm.run(function);
+    }
+    func_pm.doFinalization();
+    module_pm.run(module);
+}
+
+/****************************************************************************************/
+/*                             Optimisation utils                                       */
+/****************************************************************************************/
+
+void initialise_optimisation_passes() {
+    auto& registry = *llvm::PassRegistry::getPassRegistry();
+    llvm::initializeCore(registry);
+    llvm::initializeTransformUtils(registry);
+    llvm::initializeScalarOpts(registry);
+    llvm::initializeIPO(registry);
+    llvm::initializeInstCombine(registry);
+    llvm::initializeAggressiveInstCombine(registry);
+    llvm::initializeAnalysis(registry);
+}
+
+void optimise_module(llvm::Module& module, int opt_level, llvm::TargetMachine* tm) {
+    llvm::legacy::FunctionPassManager func_pm(&module);
+    llvm::legacy::PassManager module_pm;
+    populate_pms(func_pm, module_pm, opt_level, /*size_level=*/0, tm);
+    run_optimisation_passes(module, func_pm, module_pm);
+}
+}  // namespace utils
+}  // namespace nmodl
diff --git a/src/codegen/llvm/llvm_utils.hpp b/src/codegen/llvm/llvm_utils.hpp
new file mode 100644
index 0000000000..81dc30d97f
--- /dev/null
+++ b/src/codegen/llvm/llvm_utils.hpp
@@ -0,0 +1,23 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+
+namespace nmodl {
+namespace utils {
+
+/// Initialises some LLVM optimisation passes.
+void initialise_optimisation_passes();
+
+/// Optimises the given LLVM IR module.
+void optimise_module(llvm::Module& module, int opt_level, llvm::TargetMachine* tm = nullptr);
+
+}  // namespace utils
+}  // namespace nmodl
diff --git a/src/codegen/llvm/main.cpp b/src/codegen/llvm/main.cpp
index 2f4e1f653d..6d374999c3 100644
--- a/src/codegen/llvm/main.cpp
+++ b/src/codegen/llvm/main.cpp
@@ -48,7 +48,7 @@ int main(int argc, const char* argv[]) {
     visitor::SymtabVisitor().visit_program(*ast);
 
     logger->info("Running LLVM Visitor");
-    codegen::CodegenLLVMVisitor llvm_visitor(filename, /*output_dir=*/".", /*opt_passes=*/false);
+    codegen::CodegenLLVMVisitor llvm_visitor(filename, /*output_dir=*/".", /*opt_level_ir=*/0);
     llvm_visitor.visit_program(*ast);
     std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
 
diff --git a/src/main.cpp b/src/main.cpp
index 22662aafb7..07679032e1 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -173,9 +173,6 @@ int main(int argc, const char* argv[]) {
     /// use single precision floating-point types
     bool llvm_float_type(false);
 
-    /// run llvm optimisation passes
-    bool llvm_ir_opt_passes(false);
-
     /// llvm vector width
     int llvm_vec_width = 1;
 
@@ -338,9 +335,9 @@ int main(int argc, const char* argv[]) {
     llvm_opt->add_flag("--disable-debug-info",
                        disable_debug_information,
                        fmt::format("Disable debug information ({})", disable_debug_information))->ignore_case();
-    llvm_opt->add_flag("--opt",
-                       llvm_ir_opt_passes,
-                       fmt::format("Run few common LLVM IR optimisation passes ({})", llvm_ir_opt_passes))->ignore_case();
+    llvm_opt->add_option("--opt-level-ir",
+                              llvm_opt_level_ir,
+                              fmt::format("LLVM IR optimisation level (O{})", llvm_opt_level_ir))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"}));
     llvm_opt->add_flag("--single-precision",
                        llvm_float_type,
                        fmt::format("Use single precision floating-point types ({})", llvm_float_type))->ignore_case();
@@ -359,9 +356,6 @@ int main(int argc, const char* argv[]) {
     benchmark_opt->add_flag("--run",
                             run_llvm_benchmark,
                             fmt::format("Run LLVM benchmark ({})", run_llvm_benchmark))->ignore_case();
-    benchmark_opt->add_option("--opt-level-ir",
-                              llvm_opt_level_ir,
-                              fmt::format("LLVM IR optimisation level (O{})", llvm_opt_level_ir))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"}));
     benchmark_opt->add_option("--opt-level-codegen",
                               llvm_opt_level_codegen,
                               fmt::format("Machine code optimisation level (O{})", llvm_opt_level_codegen))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"}));
@@ -689,10 +683,14 @@ int main(int argc, const char* argv[]) {
 
 #ifdef NMODL_LLVM_BACKEND
             if (llvm_ir || run_llvm_benchmark) {
+                // If benchmarking, we want to optimize the IR with target information and not in
+                // LLVM visitor.
+                int llvm_opt_level = run_llvm_benchmark ? 0 : llvm_opt_level_ir;
+
                 logger->info("Running LLVM backend code generator");
                 CodegenLLVMVisitor visitor(modfile,
                                            output_dir,
-                                           llvm_ir_opt_passes,
+                                           llvm_opt_level,
                                            llvm_float_type,
                                            llvm_vec_width,
                                            vector_library,
diff --git a/test/benchmark/jit_driver.cpp b/test/benchmark/jit_driver.cpp
index a2d8df63f4..e5a7cd8928 100644
--- a/test/benchmark/jit_driver.cpp
+++ b/test/benchmark/jit_driver.cpp
@@ -7,9 +7,9 @@
 
 #include "jit_driver.hpp"
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "codegen/llvm/llvm_utils.hpp"
 #include "utils/common_utils.hpp"
 
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
@@ -21,12 +21,10 @@
 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
-#include "llvm/InitializePasses.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
 
 namespace nmodl {
 namespace runner {
@@ -35,63 +33,6 @@ namespace runner {
 /*                            Utilities for JIT driver                                  */
 /****************************************************************************************/
 
-/// Initialises some LLVM optimisation passes.
-static void initialise_optimisation_passes() {
-    auto& registry = *llvm::PassRegistry::getPassRegistry();
-    llvm::initializeCore(registry);
-    llvm::initializeTransformUtils(registry);
-    llvm::initializeScalarOpts(registry);
-    llvm::initializeInstCombine(registry);
-    llvm::initializeAnalysis(registry);
-}
-
-/// Populates pass managers with passes for the given optimisation levels.
-static void populate_pms(llvm::legacy::FunctionPassManager& func_pm,
-                         llvm::legacy::PassManager& module_pm,
-                         int opt_level,
-                         int size_level,
-                         llvm::TargetMachine* tm) {
-    // First, set the pass manager builder with some basic optimisation information.
-    llvm::PassManagerBuilder pm_builder;
-    pm_builder.OptLevel = opt_level;
-    pm_builder.SizeLevel = size_level;
-    pm_builder.DisableUnrollLoops = opt_level == 0;
-
-    // If target machine is defined, then initialise the TargetTransformInfo for the target.
-    if (tm) {
-        module_pm.add(createTargetTransformInfoWrapperPass(tm->getTargetIRAnalysis()));
-        func_pm.add(createTargetTransformInfoWrapperPass(tm->getTargetIRAnalysis()));
-    }
-
-    // Populate pass managers.
-    pm_builder.populateModulePassManager(module_pm);
-    pm_builder.populateFunctionPassManager(func_pm);
-}
-
-/// Runs the function and module passes on the provided module.
-static void run_optimisation_passes(llvm::Module& module,
-                                    llvm::legacy::FunctionPassManager& func_pm,
-                                    llvm::legacy::PassManager& module_pm) {
-    func_pm.doInitialization();
-    auto& functions = module.getFunctionList();
-    for (auto& function: functions) {
-        llvm::verifyFunction(function);
-        func_pm.run(function);
-    }
-    func_pm.doFinalization();
-    module_pm.run(module);
-}
-
-/// Optimises the given LLVM IR module.
-static void optimise_module(llvm::Module& module,
-                            int opt_level,
-                            llvm::TargetMachine* tm = nullptr) {
-    llvm::legacy::FunctionPassManager func_pm(&module);
-    llvm::legacy::PassManager module_pm;
-    populate_pms(func_pm, module_pm, opt_level, /*size_level=*/0, tm);
-    run_optimisation_passes(module, func_pm, module_pm);
-}
-
 /// Sets the target triple and the data layout of the module.
 static void set_triple_and_data_layout(llvm::Module& module, const std::string& features) {
     // Get the default target triple for the host.
@@ -149,7 +90,7 @@ void JITDriver::init(std::string features,
                      BenchmarkInfo* benchmark_info) {
     llvm::InitializeNativeTarget();
     llvm::InitializeNativeTargetAsmPrinter();
-    initialise_optimisation_passes();
+    utils::initialise_optimisation_passes();
 
     // Set the target triple and the data layout for the module.
     set_triple_and_data_layout(*module, features);
@@ -211,7 +152,7 @@ void JITDriver::init(std::string features,
 
         // Optimise the LLVM IR module and save it to .ll file if benchmarking.
         if (benchmark_info) {
-            optimise_module(*module, benchmark_info->opt_level_ir, tm.get());
+            utils::optimise_module(*module, benchmark_info->opt_level_ir, tm.get());
 
             std::error_code error_code;
             std::unique_ptr<llvm::ToolOutputFile> out =
diff --git a/test/benchmark/jit_driver.hpp b/test/benchmark/jit_driver.hpp
index afb1317cd8..d8e1127417 100644
--- a/test/benchmark/jit_driver.hpp
+++ b/test/benchmark/jit_driver.hpp
@@ -29,7 +29,7 @@ struct BenchmarkInfo {
     /// Object file output directory.
     std::string output_dir;
 
-    /// Optimisation level for generated IR.
+    /// Optimisation level for IT.
     int opt_level_ir;
 
     /// Optimisation level for machine code generation.
diff --git a/test/benchmark/llvm_benchmark.hpp b/test/benchmark/llvm_benchmark.hpp
index 9696191172..4a66de52fc 100644
--- a/test/benchmark/llvm_benchmark.hpp
+++ b/test/benchmark/llvm_benchmark.hpp
@@ -43,7 +43,7 @@ class LLVMBenchmark {
     /// Benchmarking backend
     std::string backend;
 
-    /// Optimisation level for LLVM IR transformations.
+    /// Optimisation level for IR generation.
     int opt_level_ir;
 
     /// Optimisation level for machine code generation.
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index e12dfd8981..312a4d4d28 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -127,7 +127,7 @@ SCENARIO("Arithmetic expression", "[llvm][runner]") {
         SymtabVisitor().visit_program(*ast);
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
-                                                 /*opt_passes=*/false);
+                                                 /*opt_level_ir=*/0);
         llvm_visitor.visit_program(*ast);
 
         std::unique_ptr<llvm::Module> m = llvm_visitor.get_module();
@@ -229,7 +229,7 @@ SCENARIO("Optimised arithmetic expression", "[llvm][runner]") {
         SymtabVisitor().visit_program(*ast);
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
-                                                 /*opt_passes=*/true);
+                                                 /*opt_level_ir=*/3);
         llvm_visitor.visit_program(*ast);
 
         std::unique_ptr<llvm::Module> m = llvm_visitor.get_module();
@@ -302,7 +302,7 @@ SCENARIO("Simple scalar kernel", "[llvm][runner]") {
         SolveBlockVisitor().visit_program(*ast);
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
-                                                 /*opt_passes=*/false,
+                                                 /*opt_level_ir=*/0,
                                                  /*use_single_precision=*/false,
                                                  /*vector_width=*/1);
         llvm_visitor.visit_program(*ast);
@@ -384,7 +384,7 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
         SolveBlockVisitor().visit_program(*ast);
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
-                                                 /*opt_passes=*/true,
+                                                 /*opt_level_ir=*/3,
                                                  /*use_single_precision=*/false,
                                                  /*vector_width=*/4);
         llvm_visitor.visit_program(*ast);
@@ -466,7 +466,7 @@ SCENARIO("Vectorised kernel with scatter instruction", "[llvm][runner]") {
         SolveBlockVisitor().visit_program(*ast);
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
-                                                 /*opt_passes=*/false,
+                                                 /*opt_level_ir=*/0,
                                                  /*use_single_precision=*/false,
                                                  /*vector_width=*/2);
         llvm_visitor.visit_program(*ast);
@@ -557,7 +557,7 @@ SCENARIO("Vectorised kernel with simple control flow", "[llvm][runner]") {
         SolveBlockVisitor().visit_program(*ast);
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
-                                                 /*opt_passes=*/false,
+                                                 /*opt_level_ir=*/0,
                                                  /*use_single_precision=*/false,
                                                  /*vector_width=*/2);
         llvm_visitor.visit_program(*ast);
diff --git a/test/unit/codegen/codegen_llvm_instance_struct.cpp b/test/unit/codegen/codegen_llvm_instance_struct.cpp
index e77b6844ae..6042aecfc8 100644
--- a/test/unit/codegen/codegen_llvm_instance_struct.cpp
+++ b/test/unit/codegen/codegen_llvm_instance_struct.cpp
@@ -27,7 +27,7 @@ using nmodl::parser::NmodlDriver;
 //=============================================================================
 
 codegen::CodegenInstanceData generate_instance_data(const std::string& text,
-                                                    bool opt = false,
+                                                    int opt_level = 0,
                                                     bool use_single_precision = false,
                                                     int vector_width = 1,
                                                     size_t num_elements = 100,
@@ -41,7 +41,7 @@ codegen::CodegenInstanceData generate_instance_data(const std::string& text,
 
     codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"test",
                                              /*output_dir=*/".",
-                                             opt,
+                                             opt_level,
                                              use_single_precision,
                                              vector_width);
     llvm_visitor.visit_program(*ast);
@@ -104,7 +104,7 @@ SCENARIO("Instance Struct creation", "[visitor][llvm][instance_struct]") {
             const size_t num_elements = 10;
             constexpr static double seed = 42;
             auto instance_data = generate_instance_data(nmodl_text,
-                                                        /*opt=*/false,
+                                                        /*opt_level=*/0,
                                                         /*use_single_precision=*/true,
                                                         /*vector_width*/ 1,
                                                         num_elements,
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 88458ebd86..4a7a81ea6b 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -35,7 +35,7 @@ using nmodl::parser::NmodlDriver;
 //=============================================================================
 
 std::string run_llvm_visitor(const std::string& text,
-                             bool opt = false,
+                             int opt_level = 0,
                              bool use_single_precision = false,
                              int vector_width = 1,
                              std::string vec_lib = "none",
@@ -53,7 +53,7 @@ std::string run_llvm_visitor(const std::string& text,
 
     codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                              /*output_dir=*/".",
-                                             opt,
+                                             opt_level,
                                              use_single_precision,
                                              vector_width,
                                              vec_lib,
@@ -99,7 +99,7 @@ SCENARIO("Binary expression", "[visitor][llvm]") {
 
         THEN("variables are loaded and add instruction is created") {
             std::string module_string =
-                run_llvm_visitor(nmodl_text, /*opt=*/false, /*use_single_precision=*/true);
+                run_llvm_visitor(nmodl_text, /*opt_level=*/0, /*use_single_precision=*/true);
             std::smatch m;
 
             std::regex rhs(R"(%1 = load float, float\* %b)");
@@ -179,7 +179,7 @@ SCENARIO("Binary expression", "[visitor][llvm]") {
 
         THEN("'pow' intrinsic is created") {
             std::string module_string =
-                run_llvm_visitor(nmodl_text, /*opt=*/false, /*use_single_precision=*/true);
+                run_llvm_visitor(nmodl_text, /*opt_level=*/0, /*use_single_precision=*/true);
             std::smatch m;
 
             // Check 'pow' intrinsic.
@@ -1046,7 +1046,7 @@ SCENARIO("Vectorised simple kernel", "[visitor][llvm]") {
 
         THEN("a gather instructions is created") {
             std::string module_string = run_llvm_visitor(nmodl_text,
-                                                         /*opt=*/false,
+                                                         /*opt_level=*/0,
                                                          /*use_single_precision=*/false,
                                                          /*vector_width=*/4);
             std::smatch m;
@@ -1098,7 +1098,7 @@ SCENARIO("Vectorised simple kernel with ion writes", "[visitor][llvm]") {
 
         THEN("a scatter instructions is created") {
             std::string module_string = run_llvm_visitor(nmodl_text,
-                                                         /*opt=*/false,
+                                                         /*opt_level=*/0,
                                                          /*use_single_precision=*/false,
                                                          /*vector_width=*/4);
             std::smatch m;
@@ -1154,7 +1154,7 @@ SCENARIO("Vectorised simple kernel with control flow", "[visitor][llvm]") {
 
         THEN("masked load and stores are created") {
             std::string module_string = run_llvm_visitor(nmodl_text,
-                                                         /*opt=*/false,
+                                                         /*opt_level=*/0,
                                                          /*use_single_precision=*/true,
                                                          /*vector_width=*/8);
             std::smatch m;
@@ -1326,7 +1326,7 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
 
             // Check exponential intrinsic is created.
             std::string no_library_module_str = run_llvm_visitor(nmodl_text,
-                                                                 /*opt=*/false,
+                                                                 /*opt_level=*/0,
                                                                  /*use_single_precision=*/false,
                                                                  /*vector_width=*/2);
             std::regex exp_decl(R"(declare <2 x double> @llvm\.exp\.v2f64\(<2 x double>\))");
@@ -1337,7 +1337,7 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
 #if LLVM_VERSION_MAJOR >= 13
             // Check exponential calls are replaced with calls to SVML library.
             std::string svml_library_module_str = run_llvm_visitor(nmodl_text,
-                                                                   /*opt=*/false,
+                                                                   /*opt_level=*/0,
                                                                    /*use_single_precision=*/false,
                                                                    /*vector_width=*/2,
                                                                    /*vec_lib=*/"SVML");
@@ -1350,7 +1350,7 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
             // Check that supported exponential calls are replaced with calls to MASSV library (i.e.
             // operating on vector of width 2).
             std::string massv2_library_module_str = run_llvm_visitor(nmodl_text,
-                                                                     /*opt=*/false,
+                                                                     /*opt_level=*/0,
                                                                      /*use_single_precision=*/false,
                                                                      /*vector_width=*/2,
                                                                      /*vec_lib=*/"MASSV");
@@ -1362,7 +1362,7 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
 
             // Check no replacement for MASSV happens for non-supported vector widths.
             std::string massv4_library_module_str = run_llvm_visitor(nmodl_text,
-                                                                     /*opt=*/false,
+                                                                     /*opt_level=*/0,
                                                                      /*use_single_precision=*/false,
                                                                      /*vector_width=*/4,
                                                                      /*vec_lib=*/"MASSV");
@@ -1372,7 +1372,7 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
             // Check correct replacement of @llvm.exp.v4f32 into @vexpf when using Accelerate.
             std::string accelerate_library_module_str =
                 run_llvm_visitor(nmodl_text,
-                                 /*opt=*/false,
+                                 /*opt_level=*/0,
                                  /*use_single_precision=*/true,
                                  /*vector_width=*/4,
                                  /*vec_lib=*/"Accelerate");
@@ -1385,7 +1385,7 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
 
             // Check correct replacement of @llvm.exp.v2f64 into @_ZGV?N?v_exp when using SLEEF.
             std::string sleef_library_module_str = run_llvm_visitor(nmodl_text,
-                                                                    /*opt=*/false,
+                                                                    /*opt_level=*/0,
                                                                     /*use_single_precision=*/false,
                                                                     /*vector_width=*/2,
                                                                     /*vec_lib=*/"SLEEF");
@@ -1403,7 +1403,7 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
             // Check the replacements when using Darwin's libsystem_m.
             std::string libsystem_m_library_module_str =
                 run_llvm_visitor(nmodl_text,
-                                 /*opt=*/false,
+                                 /*opt_level=*/0,
                                  /*use_single_precision=*/true,
                                  /*vector_width=*/4,
                                  /*vec_lib=*/"libsystem_m");
@@ -1432,7 +1432,7 @@ SCENARIO("Fast math flags", "[visitor][llvm]") {
         THEN("instructions are generated with the flags set") {
             std::string module_string =
                 run_llvm_visitor(nmodl_text,
-                                 /*opt=*/true,
+                                 /*opt_level=*/3,
                                  /*use_single_precision=*/false,
                                  /*vector_width=*/1,
                                  /*vec_lib=*/"none",
@@ -1462,12 +1462,12 @@ SCENARIO("Dead code removal", "[visitor][llvm][opt]") {
         )";
 
         THEN("with optimisation enabled, all ops are eliminated") {
-            std::string module_string = run_llvm_visitor(nmodl_text, true);
+            std::string module_string = run_llvm_visitor(nmodl_text, /*opt_level=*/3);
             std::smatch m;
 
-            // Check if the values are optimised out
+            // Check if the values are optimised out.
             std::regex empty_proc(
-                R"(define i32 @add\(double %a[0-9].*, double %b[0-9].*\) \{\n(\s)*ret i32 0\n\})");
+                R"(define i32 @add\(double %a[0-9].*, double %b[0-9].*\).*\{\n(\s)*ret i32 0\n\})");
             REQUIRE(std::regex_search(module_string, m, empty_proc));
         }
     }
@@ -1509,7 +1509,7 @@ SCENARIO("Removal of inlined functions and procedures", "[visitor][llvm][inline]
 
         THEN("when the code is inlined the procedure and function blocks are removed") {
             std::string module_string = run_llvm_visitor(nmodl_text,
-                                                         /*opt=*/false,
+                                                         /*opt_level=*/0,
                                                          /*use_single_precision=*/false,
                                                          /*vector_width=*/1,
                                                          /*vec_lib=*/"none",

From 6506fcfebf05ecc16b4c6e2f8dc56c7e1ab8d969 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 3 Jun 2021 08:01:37 -0700
Subject: [PATCH 070/105] [LLVM] Added saving to file utility (#685)

* Added saving to file utility
* Skip NEURON test in LLVM branch

Co-authored-by: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
---
 azure-pipelines.yml                       |  1 +
 src/codegen/llvm/codegen_llvm_visitor.cpp | 21 ++++++---------------
 src/codegen/llvm/llvm_utils.cpp           | 19 +++++++++++++++++++
 src/codegen/llvm/llvm_utils.hpp           |  3 +++
 test/benchmark/jit_driver.cpp             | 18 +++---------------
 5 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index ec31765d6e..d35f343a54 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -243,6 +243,7 @@ stages:
           exit 1
         fi
         ./bin/nrnivmodl-core $(Build.Repository.LocalPath)/test/integration/mod
+      condition: false
       env:
         SHELL: 'bash'
       displayName: 'Build Neuron and Run Integration Tests'
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index ffbedbb063..bac6f4e0b2 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -13,14 +13,11 @@
 #include "visitors/visitor_utils.hpp"
 
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Type.h"
-#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Host.h"
-#include "llvm/Support/ToolOutputFile.h"
 
 #if LLVM_VERSION_MAJOR >= 13
 #include "llvm/CodeGen/ReplaceWithVeclib.h"
@@ -72,9 +69,12 @@ void CodegenLLVMVisitor::add_vectorizable_functions_from_vec_lib(llvm::TargetLib
                                                                  llvm::Triple& triple) {
     // Since LLVM does not support SLEEF as a vector library yet, process it separately.
     if (vector_library == "SLEEF") {
-        // Populate function definitions of only exp and pow (for now)
-#define FIXED(w)                        llvm::ElementCount::getFixed(w)
+// clang-format off
+#define FIXED(w) llvm::ElementCount::getFixed(w)
+// clang-format on
 #define DISPATCH(func, vec_func, width) {func, vec_func, width},
+
+        // Populate function definitions of only exp and pow (for now)
         const llvm::VecDesc aarch64_functions[] = {
             // clang-format off
             DISPATCH("llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4))
@@ -890,17 +890,8 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     }
 
     // If the output directory is specified, save the IR to .ll file.
-    // \todo: Consider saving the generated LLVM IR to bytecode (.bc) file instead.
     if (output_dir != ".") {
-        std::error_code error_code;
-        std::unique_ptr<llvm::ToolOutputFile> out = std::make_unique<llvm::ToolOutputFile>(
-            output_dir + "/" + mod_filename + ".ll", error_code, llvm::sys::fs::OF_Text);
-        if (error_code)
-            throw std::runtime_error("Error: " + error_code.message());
-
-        std::unique_ptr<llvm::AssemblyAnnotationWriter> annotator;
-        module->print(out->os(), annotator.get());
-        out->keep();
+        utils::save_ir_to_ll_file(*module, output_dir + "/" + mod_filename);
     }
 
     logger->debug("Dumping generated IR...\n" + dump_module());
diff --git a/src/codegen/llvm/llvm_utils.cpp b/src/codegen/llvm/llvm_utils.cpp
index 684f962b76..59967c59c1 100644
--- a/src/codegen/llvm/llvm_utils.cpp
+++ b/src/codegen/llvm/llvm_utils.cpp
@@ -8,9 +8,12 @@
 #include "codegen/llvm/llvm_utils.hpp"
 
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 
@@ -75,5 +78,21 @@ void optimise_module(llvm::Module& module, int opt_level, llvm::TargetMachine* t
     populate_pms(func_pm, module_pm, opt_level, /*size_level=*/0, tm);
     run_optimisation_passes(module, func_pm, module_pm);
 }
+
+/****************************************************************************************/
+/*                                    File utils                                        */
+/****************************************************************************************/
+
+void save_ir_to_ll_file(llvm::Module& module, const std::string& filename) {
+    std::error_code error_code;
+    std::unique_ptr<llvm::ToolOutputFile> out = std::make_unique<llvm::ToolOutputFile>(
+        filename + ".ll", error_code, llvm::sys::fs::OF_Text);
+    if (error_code)
+        throw std::runtime_error("Error: " + error_code.message());
+
+    std::unique_ptr<llvm::AssemblyAnnotationWriter> annotator;
+    module.print(out->os(), annotator.get());
+    out->keep();
+}
 }  // namespace utils
 }  // namespace nmodl
diff --git a/src/codegen/llvm/llvm_utils.hpp b/src/codegen/llvm/llvm_utils.hpp
index 81dc30d97f..8e1e6e48dc 100644
--- a/src/codegen/llvm/llvm_utils.hpp
+++ b/src/codegen/llvm/llvm_utils.hpp
@@ -19,5 +19,8 @@ void initialise_optimisation_passes();
 /// Optimises the given LLVM IR module.
 void optimise_module(llvm::Module& module, int opt_level, llvm::TargetMachine* tm = nullptr);
 
+///
+void save_ir_to_ll_file(llvm::Module& module, const std::string& filename);
+
 }  // namespace utils
 }  // namespace nmodl
diff --git a/test/benchmark/jit_driver.cpp b/test/benchmark/jit_driver.cpp
index e5a7cd8928..a804a2d4fd 100644
--- a/test/benchmark/jit_driver.cpp
+++ b/test/benchmark/jit_driver.cpp
@@ -20,11 +20,9 @@
 #include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
-#include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/ToolOutputFile.h"
 
 namespace nmodl {
 namespace runner {
@@ -153,19 +151,9 @@ void JITDriver::init(std::string features,
         // Optimise the LLVM IR module and save it to .ll file if benchmarking.
         if (benchmark_info) {
             utils::optimise_module(*module, benchmark_info->opt_level_ir, tm.get());
-
-            std::error_code error_code;
-            std::unique_ptr<llvm::ToolOutputFile> out =
-                std::make_unique<llvm::ToolOutputFile>(benchmark_info->output_dir + "/" +
-                                                           benchmark_info->filename + "_opt.ll",
-                                                       error_code,
-                                                       llvm::sys::fs::OF_Text);
-            if (error_code)
-                throw std::runtime_error("Error: " + error_code.message());
-
-            std::unique_ptr<llvm::AssemblyAnnotationWriter> annotator;
-            module->print(out->os(), annotator.get());
-            out->keep();
+            const std::string filename = benchmark_info->output_dir + "/" +
+                                         benchmark_info->filename + "_opt";
+            utils::save_ir_to_ll_file(*module, filename);
         }
 
         return std::make_unique<llvm::orc::TMOwningSimpleCompiler>(std::move(tm));

From d48bb201dca2333b033b61076a658d640b1de761 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 3 Jun 2021 08:03:05 -0700
Subject: [PATCH 071/105] [LLVM] Aliasing and `cpu` options for LLVM visitor
 and the benchmark (#686)

* Added may-alias and cpu options
* Removed CPU checks
* Use steady clock as we saw issue on VM

Co-authored-by: Ioannis Magkanaris <iomagkanaris@gmail.com>
---
 azure-pipelines.yml                       |  1 +
 src/codegen/llvm/codegen_llvm_visitor.hpp |  9 ++-
 src/codegen/llvm/llvm_ir_builder.cpp      |  9 ++-
 src/codegen/llvm/llvm_ir_builder.hpp      |  9 ++-
 src/main.cpp                              | 21 ++++---
 test/benchmark/jit_driver.cpp             | 71 ++++++++++++++---------
 test/benchmark/jit_driver.hpp             | 26 ++++-----
 test/benchmark/llvm_benchmark.cpp         | 68 +++-------------------
 test/benchmark/llvm_benchmark.hpp         | 12 ++--
 9 files changed, 103 insertions(+), 123 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index d35f343a54..bef50433f0 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -246,6 +246,7 @@ stages:
       condition: false
       env:
         SHELL: 'bash'
+      condition: false
       displayName: 'Build Neuron and Run Integration Tests'
   - job: 'manylinux_wheels'
     timeoutInMinutes: 45
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 5dd8eda15c..22b9fafd83 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -95,14 +95,19 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
                        int vector_width = 1,
                        std::string vec_lib = "none",
                        bool add_debug_information = false,
-                       std::vector<std::string> fast_math_flags = {})
+                       std::vector<std::string> fast_math_flags = {},
+                       bool llvm_assume_alias = false)
         : mod_filename(mod_filename)
         , output_dir(output_dir)
         , opt_level_ir(opt_level_ir)
         , vector_width(vector_width)
         , vector_library(vec_lib)
         , add_debug_information(add_debug_information)
-        , ir_builder(*context, use_single_precision, vector_width, fast_math_flags)
+        , ir_builder(*context,
+                     use_single_precision,
+                     vector_width,
+                     fast_math_flags,
+                     !llvm_assume_alias)
         , debug_builder(*module) {}
 
     /// Dumps the generated LLVM IR module to string.
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index 004f28d857..a585c95b3b 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -200,12 +200,15 @@ void IRBuilder::set_kernel_attributes() {
     current_function->setDoesNotFreeMemory();
     current_function->setDoesNotThrow();
 
-    // We also want to specify that the pointers that instance struct holds, do not alias. In order
-    // to do that, we add a `noalias` attribute to the argument. As per Clang's specification:
+    // We also want to specify that the pointers that instance struct holds do not alias, unless
+    // specified otherwise. In order to do that, we add a `noalias` attribute to the argument. As
+    // per Clang's specification:
     //  > The `noalias` attribute indicates that the only memory accesses inside function are loads
     //  > and stores from objects pointed to by its pointer-typed arguments, with arbitrary
     //  > offsets.
-    current_function->addParamAttr(0, llvm::Attribute::NoAlias);
+    if (assume_noalias) {
+        current_function->addParamAttr(0, llvm::Attribute::NoAlias);
+    }
 
     // Finally, specify that the struct pointer does not capture and is read-only.
     current_function->addParamAttr(0, llvm::Attribute::NoCapture);
diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp
index b9736e2846..b3005db0c7 100644
--- a/src/codegen/llvm/llvm_ir_builder.hpp
+++ b/src/codegen/llvm/llvm_ir_builder.hpp
@@ -58,6 +58,9 @@ class IRBuilder {
     /// The vector width used for the vectorized code.
     unsigned vector_width;
 
+    /// Instance struct fields do not alias.
+    bool assume_noalias;
+
     /// Masked value used to predicate vector instructions.
     llvm::Value* mask;
 
@@ -71,7 +74,8 @@ class IRBuilder {
     IRBuilder(llvm::LLVMContext& context,
               bool use_single_precision = false,
               unsigned vector_width = 1,
-              std::vector<std::string> fast_math_flags = {})
+              std::vector<std::string> fast_math_flags = {},
+              bool assume_noalias = true)
         : builder(context)
         , symbol_table(nullptr)
         , current_function(nullptr)
@@ -81,7 +85,8 @@ class IRBuilder {
         , vector_width(vector_width)
         , mask(nullptr)
         , kernel_id("")
-        , fast_math_flags(fast_math_flags) {}
+        , fast_math_flags(fast_math_flags)
+        , assume_noalias(assume_noalias) {}
 
     /// Transforms the fast math flags provided to the builder into LLVM's representation.
     llvm::FastMathFlags transform_to_fmf(std::vector<std::string>& flags) {
diff --git a/src/main.cpp b/src/main.cpp
index 07679032e1..4bc2f30252 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -188,6 +188,9 @@ int main(int argc, const char* argv[]) {
     /// run llvm benchmark
     bool run_llvm_benchmark(false);
 
+    /// do not assume that instance struct fields do not alias
+    bool llvm_assume_alias(false);
+
     /// optimisation level for IR generation
     int llvm_opt_level_ir = 0;
 
@@ -203,8 +206,8 @@ int main(int argc, const char* argv[]) {
     /// the number of repeated experiments for the benchmarking
     int num_experiments = 100;
 
-    /// specify the backend for LLVM IR to target
-    std::string backend = "default";
+    /// specify the cpu for LLVM IR to target
+    std::string cpu = "default";
 #endif
 
     // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers,readability-magic-numbers)
@@ -341,6 +344,9 @@ int main(int argc, const char* argv[]) {
     llvm_opt->add_flag("--single-precision",
                        llvm_float_type,
                        fmt::format("Use single precision floating-point types ({})", llvm_float_type))->ignore_case();
+    llvm_opt->add_flag("--assume-may-alias",
+                       llvm_assume_alias,
+                       fmt::format("Assume instance struct fields may alias ({})", llvm_assume_alias))->ignore_case();
     llvm_opt->add_option("--vector-width",
         llvm_vec_width,
         fmt::format("LLVM explicit vectorisation width ({})", llvm_vec_width))->ignore_case();
@@ -368,9 +374,9 @@ int main(int argc, const char* argv[]) {
     benchmark_opt->add_option("--repeat",
                               num_experiments,
                               fmt::format("Number of experiments for benchmarking ({})", num_experiments))->ignore_case();
-    benchmark_opt->add_option("--backend",
-                       backend,
-                       fmt::format("Target's backend ({})", backend))->ignore_case()->check(CLI::IsMember({"avx2", "default", "sse2"}));
+    benchmark_opt->add_option("--cpu",
+                       cpu,
+                       fmt::format("Target's backend ({})", cpu))->ignore_case();
 #endif
     // clang-format on
 
@@ -695,7 +701,8 @@ int main(int argc, const char* argv[]) {
                                            llvm_vec_width,
                                            vector_library,
                                            !disable_debug_information,
-                                           llvm_fast_math_flags);
+                                           llvm_fast_math_flags,
+                                           llvm_assume_alias);
                 visitor.visit_program(*ast);
                 ast_to_nmodl(*ast, filepath("llvm", "mod"));
                 ast_to_json(*ast, filepath("llvm", "json"));
@@ -708,7 +715,7 @@ int main(int argc, const char* argv[]) {
                                                        shared_lib_paths,
                                                        num_experiments,
                                                        instance_size,
-                                                       backend,
+                                                       cpu,
                                                        llvm_opt_level_ir,
                                                        llvm_opt_level_codegen);
                     benchmark.run(ast);
diff --git a/test/benchmark/jit_driver.cpp b/test/benchmark/jit_driver.cpp
index a804a2d4fd..f91b41cda0 100644
--- a/test/benchmark/jit_driver.cpp
+++ b/test/benchmark/jit_driver.cpp
@@ -31,8 +31,21 @@ namespace runner {
 /*                            Utilities for JIT driver                                  */
 /****************************************************************************************/
 
+/// Get the host CPU features in the format:
+///   +feature,+feature,-feature,+feature,...
+/// where `+` indicates that the feature is enabled.
+std::string get_cpu_features(const std::string& cpu) {
+    llvm::SubtargetFeatures features;
+    llvm::StringMap<bool> host_features;
+    if (llvm::sys::getHostCPUFeatures(host_features)) {
+        for (auto& f: host_features)
+            features.AddFeature(f.first(), f.second);
+    }
+    return llvm::join(features.getFeatures().begin(), features.getFeatures().end(), ",");
+}
+
 /// Sets the target triple and the data layout of the module.
-static void set_triple_and_data_layout(llvm::Module& module, const std::string& features) {
+static void set_triple_and_data_layout(llvm::Module& module, const std::string& cpu) {
     // Get the default target triple for the host.
     auto target_triple = llvm::sys::getDefaultTargetTriple();
     std::string error_msg;
@@ -40,8 +53,8 @@ static void set_triple_and_data_layout(llvm::Module& module, const std::string&
     if (!target)
         throw std::runtime_error("Error " + error_msg + "\n");
 
-    // Get the CPU information and set a target machine to create the data layout.
-    std::string cpu(llvm::sys::getHostCPUName());
+    // Set a target machine to create the data layout.
+    std::string features = get_cpu_features(cpu);
     std::unique_ptr<llvm::TargetMachine> tm(
         target->createTargetMachine(target_triple, cpu, features, {}, {}));
     if (!tm)
@@ -52,10 +65,10 @@ static void set_triple_and_data_layout(llvm::Module& module, const std::string&
     module.setTargetTriple(target_triple);
 }
 
-/// Creates llvm::TargetMachine with certain CPU features turned on/off.
+/// Creates llvm::TargetMachine with for a specified CPU.
 static std::unique_ptr<llvm::TargetMachine> create_target(
     llvm::orc::JITTargetMachineBuilder* tm_builder,
-    const std::string& features,
+    const std::string& cpu,
     int opt_level) {
     // First, look up the target.
     std::string error_msg;
@@ -66,8 +79,8 @@ static std::unique_ptr<llvm::TargetMachine> create_target(
 
     // Create default target machine with provided features.
     auto tm = target->createTargetMachine(target_triple,
-                                          llvm::sys::getHostCPUName().str(),
-                                          features,
+                                          cpu,
+                                          get_cpu_features(cpu),
                                           tm_builder->getOptions(),
                                           tm_builder->getRelocationModel(),
                                           tm_builder->getCodeModel(),
@@ -83,15 +96,13 @@ static std::unique_ptr<llvm::TargetMachine> create_target(
 /*                                      JIT driver                                      */
 /****************************************************************************************/
 
-void JITDriver::init(std::string features,
-                     std::vector<std::string> lib_paths,
-                     BenchmarkInfo* benchmark_info) {
+void JITDriver::init(const std::string& cpu, BenchmarkInfo* benchmark_info) {
     llvm::InitializeNativeTarget();
     llvm::InitializeNativeTargetAsmPrinter();
     utils::initialise_optimisation_passes();
 
     // Set the target triple and the data layout for the module.
-    set_triple_and_data_layout(*module, features);
+    set_triple_and_data_layout(*module, cpu);
     auto data_layout = module->getDataLayout();
 
     // If benchmarking, enable listeners to use GDB, perf or VTune. Note that LLVM should be built
@@ -120,24 +131,26 @@ void JITDriver::init(std::string features,
         if (intel_event_listener)
             layer->registerJITEventListener(*intel_event_listener);
 
-        for (const auto& lib_path: lib_paths) {
-            // For every library path, create a corresponding memory buffer.
-            auto memory_buffer = llvm::MemoryBuffer::getFile(lib_path);
-            if (!memory_buffer)
-                throw std::runtime_error("Unable to create memory buffer for " + lib_path);
-
-            // Create a new JIT library instance for this session and resolve symbols.
-            auto& jd = session.createBareJITDylib(std::string(lib_path));
-            auto loaded =
-                llvm::orc::DynamicLibrarySearchGenerator::Load(lib_path.data(),
-                                                               data_layout.getGlobalPrefix());
-
-            if (!loaded)
-                throw std::runtime_error("Unable to load " + lib_path);
-            jd.addGenerator(std::move(*loaded));
-            cantFail(layer->add(jd, std::move(*memory_buffer)));
+        // If benchmarking, resolve shared libraries.
+        if (benchmark_info) {
+            for (const auto& lib_path: benchmark_info->shared_lib_paths) {
+                // For every library path, create a corresponding memory buffer.
+                auto memory_buffer = llvm::MemoryBuffer::getFile(lib_path);
+                if (!memory_buffer)
+                    throw std::runtime_error("Unable to create memory buffer for " + lib_path);
+
+                // Create a new JIT library instance for this session and resolve symbols.
+                auto& jd = session.createBareJITDylib(std::string(lib_path));
+                auto loaded =
+                    llvm::orc::DynamicLibrarySearchGenerator::Load(lib_path.data(),
+                                                                   data_layout.getGlobalPrefix());
+
+                if (!loaded)
+                    throw std::runtime_error("Unable to load " + lib_path);
+                jd.addGenerator(std::move(*loaded));
+                cantFail(layer->add(jd, std::move(*memory_buffer)));
+            }
         }
-
         return layer;
     };
 
@@ -146,7 +159,7 @@ void JITDriver::init(std::string features,
         -> llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
         // Create target machine with some features possibly turned off.
         int opt_level_codegen = benchmark_info ? benchmark_info->opt_level_codegen : 0;
-        auto tm = create_target(&tm_builder, features, opt_level_codegen);
+        auto tm = create_target(&tm_builder, cpu, opt_level_codegen);
 
         // Optimise the LLVM IR module and save it to .ll file if benchmarking.
         if (benchmark_info) {
diff --git a/test/benchmark/jit_driver.hpp b/test/benchmark/jit_driver.hpp
index d8e1127417..7106311523 100644
--- a/test/benchmark/jit_driver.hpp
+++ b/test/benchmark/jit_driver.hpp
@@ -17,6 +17,7 @@
 
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/Orc/LLJIT.h"
+#include "llvm/Support/Host.h"
 
 namespace nmodl {
 namespace runner {
@@ -29,6 +30,9 @@ struct BenchmarkInfo {
     /// Object file output directory.
     std::string output_dir;
 
+    /// Shared libraries' paths to link against.
+    std::vector<std::string> shared_lib_paths;
+
     /// Optimisation level for IT.
     int opt_level_ir;
 
@@ -63,9 +67,7 @@ class JITDriver {
         : module(std::move(m)) {}
 
     /// Initializes the JIT driver.
-    void init(std::string features = "",
-              std::vector<std::string> lib_paths = {},
-              BenchmarkInfo* benchmark_info = nullptr);
+    void init(const std::string& cpu, BenchmarkInfo* benchmark_info = nullptr);
 
     /// Lookups the entry-point without arguments in the JIT and executes it, returning the result.
     template <typename ReturnType>
@@ -131,7 +133,7 @@ class TestRunner: public BaseRunner {
         : BaseRunner(std::move(m)) {}
 
     virtual void initialize_driver() {
-        driver->init();
+        driver->init(llvm::sys::getHostCPUName().str());
     }
 };
 
@@ -145,27 +147,23 @@ class BenchmarkRunner: public BaseRunner {
     /// Benchmarking information passed to JIT driver.
     BenchmarkInfo benchmark_info;
 
-    /// CPU features specified by the user.
-    std::string features;
-
-    /// Shared libraries' paths to link against.
-    std::vector<std::string> shared_lib_paths;
+    /// CPU to target.
+    std::string cpu;
 
   public:
     BenchmarkRunner(std::unique_ptr<llvm::Module> m,
                     std::string filename,
                     std::string output_dir,
-                    std::string features = "",
+                    std::string cpu,
                     std::vector<std::string> lib_paths = {},
                     int opt_level_ir = 0,
                     int opt_level_codegen = 0)
         : BaseRunner(std::move(m))
-        , benchmark_info{filename, output_dir, opt_level_ir, opt_level_codegen}
-        , features(features)
-        , shared_lib_paths(lib_paths) {}
+        , cpu(cpu)
+        , benchmark_info{filename, output_dir, lib_paths, opt_level_ir, opt_level_codegen} {}
 
     virtual void initialize_driver() {
-        driver->init(features, shared_lib_paths, &benchmark_info);
+        driver->init(cpu, &benchmark_info);
     }
 };
 
diff --git a/test/benchmark/llvm_benchmark.cpp b/test/benchmark/llvm_benchmark.cpp
index e48df0d457..0e94ae231b 100644
--- a/test/benchmark/llvm_benchmark.cpp
+++ b/test/benchmark/llvm_benchmark.cpp
@@ -6,7 +6,6 @@
  *************************************************************************/
 
 #include <chrono>
-#include <fstream>
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "llvm_benchmark.hpp"
@@ -19,35 +18,6 @@
 namespace nmodl {
 namespace benchmark {
 
-/// Precision for the timing measurements.
-static constexpr int PRECISION = 9;
-
-/// Get the host CPU features in the format:
-///   +feature,+feature,-feature,+feature,...
-/// where `+` indicates that the feature is enabled.
-static std::vector<std::string> get_cpu_features() {
-    std::string cpu(llvm::sys::getHostCPUName());
-
-    llvm::SubtargetFeatures features;
-    llvm::StringMap<bool> host_features;
-    if (llvm::sys::getHostCPUFeatures(host_features)) {
-        for (auto& f: host_features)
-            features.AddFeature(f.first(), f.second);
-    }
-    return features.getFeatures();
-}
-
-
-void LLVMBenchmark::disable(const std::string& feature, std::vector<std::string>& host_features) {
-    for (auto& host_feature: host_features) {
-        if (feature == host_feature.substr(1)) {
-            host_feature[0] = '-';
-            logger->info("{}", host_feature);
-            return;
-        }
-    }
-}
-
 void LLVMBenchmark::run(const std::shared_ptr<ast::Program>& node) {
     // create functions
     generate_llvm(node);
@@ -57,9 +27,9 @@ void LLVMBenchmark::run(const std::shared_ptr<ast::Program>& node) {
 
 void LLVMBenchmark::generate_llvm(const std::shared_ptr<ast::Program>& node) {
     // First, visit the AST to build the LLVM IR module and wrap the kernel function calls.
-    auto start = std::chrono::high_resolution_clock::now();
+    auto start = std::chrono::steady_clock::now();
     llvm_visitor.wrap_kernel_functions();
-    auto end = std::chrono::high_resolution_clock::now();
+    auto end = std::chrono::steady_clock::now();
 
     // Log the time taken to visit the AST and build LLVM IR.
     std::chrono::duration<double> diff = end - start;
@@ -72,37 +42,17 @@ void LLVMBenchmark::run_benchmark(const std::shared_ptr<ast::Program>& node) {
     std::vector<std::string> kernel_names;
     llvm_visitor.find_kernel_names(kernel_names);
 
-    // Get feature's string and turn them off depending on the backend.
-    std::vector<std::string> features = get_cpu_features();
-    logger->info("Backend: {}", backend);
-    if (backend == "avx2") {
-        // Disable SSE.
-        logger->info("Disabling features:");
-        disable("sse", features);
-        disable("sse2", features);
-        disable("sse3", features);
-        disable("sse4.1", features);
-        disable("sse4.2", features);
-    } else if (backend == "sse2") {
-        // Disable AVX.
-        logger->info("Disabling features:");
-        disable("avx", features);
-        disable("avx2", features);
-    }
+    // Get feature's string and turn them off depending on the cpu.
+    std::string cpu_name = cpu == "default" ? llvm::sys::getHostCPUName().str() : cpu;
+    logger->info("CPU: {}", cpu_name);
 
-    std::string features_str = llvm::join(features.begin(), features.end(), ",");
     std::unique_ptr<llvm::Module> m = llvm_visitor.get_module();
 
     // Create the benchmark runner and initialize it.
     std::string filename = "v" + std::to_string(llvm_visitor.get_vector_width()) + "_" +
                            mod_filename;
-    runner::BenchmarkRunner runner(std::move(m),
-                                   filename,
-                                   output_dir,
-                                   features_str,
-                                   shared_libs,
-                                   opt_level_ir,
-                                   opt_level_codegen);
+    runner::BenchmarkRunner runner(
+        std::move(m), filename, output_dir, cpu_name, shared_libs, opt_level_ir, opt_level_codegen);
     runner.initialize_driver();
 
     // Benchmark every kernel.
@@ -124,9 +74,9 @@ void LLVMBenchmark::run_benchmark(const std::shared_ptr<ast::Program>& node) {
 
             // Record the execution time of the kernel.
             std::string wrapper_name = "__" + kernel_name + "_wrapper";
-            auto start = std::chrono::high_resolution_clock::now();
+            auto start = std::chrono::steady_clock::now();
             runner.run_with_argument<int, void*>(kernel_name, instance_data.base_ptr);
-            auto end = std::chrono::high_resolution_clock::now();
+            auto end = std::chrono::steady_clock::now();
             std::chrono::duration<double> diff = end - start;
 
             // Log the time taken for each run.
diff --git a/test/benchmark/llvm_benchmark.hpp b/test/benchmark/llvm_benchmark.hpp
index 4a66de52fc..cc9dd3bcf0 100644
--- a/test/benchmark/llvm_benchmark.hpp
+++ b/test/benchmark/llvm_benchmark.hpp
@@ -7,6 +7,7 @@
 
 #pragma once
 
+#include <fstream>
 #include <string>
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
@@ -40,8 +41,8 @@ class LLVMBenchmark {
     /// The size of the instance struct for benchmarking.
     int instance_size;
 
-    /// Benchmarking backend
-    std::string backend;
+    /// CPU to target.
+    std::string cpu;
 
     /// Optimisation level for IR generation.
     int opt_level_ir;
@@ -59,7 +60,7 @@ class LLVMBenchmark {
                   std::vector<std::string> shared_libs,
                   int num_experiments,
                   int instance_size,
-                  const std::string& backend,
+                  const std::string& cpu,
                   int opt_level_ir,
                   int opt_level_codegen)
         : llvm_visitor(llvm_visitor)
@@ -68,7 +69,7 @@ class LLVMBenchmark {
         , shared_libs(shared_libs)
         , num_experiments(num_experiments)
         , instance_size(instance_size)
-        , backend(backend)
+        , cpu(cpu)
         , opt_level_ir(opt_level_ir)
         , opt_level_codegen(opt_level_codegen) {}
 
@@ -76,9 +77,6 @@ class LLVMBenchmark {
     void run(const std::shared_ptr<ast::Program>& node);
 
   private:
-    /// Disables the specified feature in the target.
-    void disable(const std::string& feature, std::vector<std::string>& host_features);
-
     /// Visits the AST to construct the LLVM IR module.
     void generate_llvm(const std::shared_ptr<ast::Program>& node);
 

From c95dec98c8d58b255db36f4ddc2c59e7ed40666a Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Thu, 3 Jun 2021 18:52:35 +0200
Subject: [PATCH 072/105] Fix azure yaml pipeline from merge (#687)

---
 azure-pipelines.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index bef50433f0..a3401b4edc 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -243,7 +243,6 @@ stages:
           exit 1
         fi
         ./bin/nrnivmodl-core $(Build.Repository.LocalPath)/test/integration/mod
-      condition: false
       env:
         SHELL: 'bash'
       condition: false

From 6f5e0374712e9ddbc1d23c9afda00e1acb52897d Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 8 Mar 2022 12:21:10 +0100
Subject: [PATCH 073/105] [LLVM] Support for newer versions of LLVM APIs

This commit fixes deprications warnings and errors that
occur due to switch of LLVM to opaque pointers: recent versions
of LLVM instruction builder APIs require explicit type parameters
when doing `gep`s, `load`s, `gather`s.

Moreover, with recent change to LLVM `https://reviews.llvm.org/D106678`
MASSV SIMD functions have no `_P8` suffix by default. Tests were
adjusted to take that into account.

Note: tested with the LLVM version from `brew` (13).
---
 src/codegen/llvm/llvm_ir_builder.cpp  | 44 ++++++++++++++++++---------
 test/unit/codegen/codegen_llvm_ir.cpp |  4 +--
 2 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index a585c95b3b..1015b437f3 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -347,11 +347,14 @@ llvm::Value* IRBuilder::create_inbounds_gep(const std::string& var_name, llvm::V
 
     // Since we index through the pointer, we need an extra 0 index in the indices list for GEP.
     ValueVector indices{llvm::ConstantInt::get(get_i64_type(), 0), index};
-    return builder.CreateInBoundsGEP(variable_ptr, indices);
+    llvm::Type* variable_type = variable_ptr->getType()->getPointerElementType();
+    return builder.CreateInBoundsGEP(variable_type, variable_ptr, indices);
 }
 
 llvm::Value* IRBuilder::create_inbounds_gep(llvm::Value* variable, llvm::Value* index) {
-    return builder.CreateInBoundsGEP(variable, {index});
+    ValueVector indices{index};
+    llvm::Type* variable_type = variable->getType()->getPointerElementType();
+    return builder.CreateInBoundsGEP(variable_type, variable, indices);
 }
 
 llvm::Value* IRBuilder::create_index(llvm::Value* value) {
@@ -378,23 +381,25 @@ llvm::Value* IRBuilder::create_index(llvm::Value* value) {
 
 llvm::Value* IRBuilder::create_load(const std::string& name, bool masked) {
     llvm::Value* ptr = lookup_value(name);
+    llvm::Type* loaded_type = ptr->getType()->getPointerElementType();
 
     // Check if the generated IR is vectorized and masked.
     if (masked) {
-        return builder.CreateMaskedLoad(ptr, llvm::Align(), mask);
+        builder.CreateMaskedLoad(loaded_type, ptr, llvm::Align(), mask);
     }
-    llvm::Type* loaded_type = ptr->getType()->getPointerElementType();
     llvm::Value* loaded = builder.CreateLoad(loaded_type, ptr);
     value_stack.push_back(loaded);
     return loaded;
 }
 
 llvm::Value* IRBuilder::create_load(llvm::Value* ptr, bool masked) {
+    llvm::Type* loaded_type = ptr->getType()->getPointerElementType();
+
     // Check if the generated IR is vectorized and masked.
     if (masked) {
-        return builder.CreateMaskedLoad(ptr, llvm::Align(), mask);
+        builder.CreateMaskedLoad(loaded_type, ptr, llvm::Align(), mask);
     }
-    llvm::Type* loaded_type = ptr->getType()->getPointerElementType();
+
     llvm::Value* loaded = builder.CreateLoad(loaded_type, ptr);
     value_stack.push_back(loaded);
     return loaded;
@@ -466,7 +471,9 @@ llvm::Value* IRBuilder::get_struct_member_ptr(llvm::Value* struct_variable, int
     ValueVector indices;
     indices.push_back(llvm::ConstantInt::get(get_i32_type(), 0));
     indices.push_back(llvm::ConstantInt::get(get_i32_type(), member_index));
-    return builder.CreateInBoundsGEP(struct_variable, indices);
+
+    llvm::Type* type = struct_variable->getType()->getPointerElementType();
+    return builder.CreateInBoundsGEP(type, struct_variable, indices);
 }
 
 void IRBuilder::invert_mask() {
@@ -491,14 +498,23 @@ llvm::Value* IRBuilder::load_to_or_store_from_array(const std::string& id_name,
     bool generating_vector_ir = vector_width > 1 && vectorize;
 
     // If the vector code is generated, we need to distinguish between two cases. If the array is
-    // indexed indirectly (i.e. not by an induction variable `kernel_id`), create a gather
-    // instruction.
+    // indexed indirectly (i.e. not by an induction variable `kernel_id`), create gather/scatter
+    // instructions.
     if (id_name != kernel_id && generating_vector_ir) {
-        return maybe_value_to_store ? builder.CreateMaskedScatter(maybe_value_to_store,
-                                                                  element_ptr,
-                                                                  llvm::Align(),
-                                                                  mask)
-                                    : builder.CreateMaskedGather(element_ptr, llvm::Align(), mask);
+        if (maybe_value_to_store) {
+            return builder.CreateMaskedScatter(maybe_value_to_store,
+                                               element_ptr,
+                                               llvm::Align(),
+                                               mask);
+        } else {
+            // Construct the loaded vector type.
+            auto* ptrs = llvm::cast<llvm::VectorType>(element_ptr->getType());
+            llvm::ElementCount element_count = ptrs->getElementCount();
+            llvm::Type* element_type = ptrs->getElementType()->getPointerElementType();
+            llvm::Type* loaded_type = llvm::VectorType::get(element_type, element_count);
+
+            return builder.CreateMaskedGather(loaded_type, element_ptr, llvm::Align(), mask);
+        }
     }
 
     llvm::Value* ptr;
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 4a7a81ea6b..397ff46111 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -1354,8 +1354,8 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
                                                                      /*use_single_precision=*/false,
                                                                      /*vector_width=*/2,
                                                                      /*vec_lib=*/"MASSV");
-            std::regex massv2_exp_decl(R"(declare <2 x double> @__expd2_P8\(<2 x double>\))");
-            std::regex massv2_exp_call(R"(call <2 x double> @__expd2_P8\(<2 x double> .*\))");
+            std::regex massv2_exp_decl(R"(declare <2 x double> @__expd2\(<2 x double>\))");
+            std::regex massv2_exp_call(R"(call <2 x double> @__expd2\(<2 x double> .*\))");
             REQUIRE(std::regex_search(massv2_library_module_str, m, massv2_exp_decl));
             REQUIRE(std::regex_search(massv2_library_module_str, m, massv2_exp_call));
             REQUIRE(!std::regex_search(massv2_library_module_str, m, exp_call));

From 58476bdaaffb982bb8cabea76e8ac735c4ce039a Mon Sep 17 00:00:00 2001
From: Pramod S Kumbhar <pramod.s.kumbhar@gmail.com>
Date: Tue, 8 Mar 2022 15:12:14 +0100
Subject: [PATCH 074/105] Fix build issues for the rebased branch  - fix bad
 merges  - remove custom llvm version, use brew with llvm@13  - follow
 hpc/gitlab-pipelines changes from Olli  - load extra module llvm using
 SPACK_EXTRA_MODULES  - use gcc build instead of nvhpc which fails to compile
 with llvm    headers

---
 .gitlab-ci.yml                                   | 10 ++++++----
 azure-pipelines.yml                              |  6 +-----
 src/codegen/codegen_info.hpp                     |  1 +
 src/codegen/llvm/codegen_llvm_helper_visitor.hpp |  1 +
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index ee3c248543..2687b8ebb0 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -42,6 +42,8 @@ trigger cvf:
   variables:
     SPACK_PACKAGE: nmodl
     SPACK_PACKAGE_SPEC: ~legacy-unit+python
+    SPACK_EXTRA_MODULES: llvm
+    SPACK_INSTALL_EXTRA_FLAGS: -v
 
 spack_setup:
   extends: .spack_setup_ccache
@@ -65,12 +67,12 @@ build:intel:
   variables:
     SPACK_PACKAGE_COMPILER: intel
 
-build:nvhpc:
+build:gcc:
   extends:
     - .spack_build
     - .spack_nmodl
   variables:
-    SPACK_PACKAGE_COMPILER: nvhpc
+    SPACK_PACKAGE_COMPILER: gcc
     SPACK_PACKAGE_DEPENDENCIES: ^bison%gcc^flex%gcc^py-jinja2%gcc^py-sympy%gcc^py-pyyaml%gcc
 
 .nmodl_tests:
@@ -84,8 +86,8 @@ test:intel:
     - .nmodl_tests
   needs: ["build:intel"]
 
-test:nvhpc:
+test:gcc:
   extends:
     - .ctest
     - .nmodl_tests
-  needs: ["build:nvhpc"]
+  needs: ["build:gcc"]
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index a3401b4edc..4cfd1c9fcb 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -192,15 +192,11 @@ stages:
         python3 -m pip install --upgrade pip setuptools
         python3 -m pip install --user 'Jinja2>=2.9.3' 'PyYAML>=3.13' pytest pytest-cov numpy 'sympy>=1.3'
       displayName: 'Install Dependencies'
-    - script: |
-        cd $HOME
-        git clone --depth 1 https://github.com/pramodk/llvm-nightly.git
-      displayName: 'Setup LLVM v13'
     - script: |
         export PATH=/usr/local/opt/flex/bin:/usr/local/opt/bison/bin:$PATH;
         mkdir -p $(Build.Repository.LocalPath)/build
         cd $(Build.Repository.LocalPath)/build
-        cmake .. -DPYTHON_EXECUTABLE=$(which python3) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=RelWithDebInfo -DNMODL_ENABLE_PYTHON_BINDINGS=OFF -DLLVM_DIR=$HOME/llvm-nightly/0621/osx/lib/cmake/llvm -DNMODL_ENABLE_LLVM=ON
+        cmake .. -DPYTHON_EXECUTABLE=$(which python3) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=RelWithDebInfo -DNMODL_ENABLE_PYTHON_BINDINGS=OFF -DLLVM_DIR=$(brew --prefix llvm)/lib/cmake/llvm -DNMODL_ENABLE_LLVM=ON
         make -j 2
         if [ $? -ne 0 ]
         then
diff --git a/src/codegen/codegen_info.hpp b/src/codegen/codegen_info.hpp
index d1278fd4dd..75923b61a3 100644
--- a/src/codegen/codegen_info.hpp
+++ b/src/codegen/codegen_info.hpp
@@ -224,6 +224,7 @@ enum BlockType {
     BlockTypeEnd
 };
 
+
 /**
  * \class ShadowUseStatement
  * \brief Represents ion write statement during code generation
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index 9d79e24803..9b3f759bfa 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -18,6 +18,7 @@
 #include "codegen/codegen_info.hpp"
 #include "symtab/symbol_table.hpp"
 #include "visitors/ast_visitor.hpp"
+ #include "utils/logger.hpp"
 
 namespace nmodl {
 namespace codegen {

From a30d52ec802a52ec25e46cfcdd12c8c87032578e Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Thu, 10 Mar 2022 17:39:11 +0100
Subject: [PATCH 075/105] [LLVM] Allocate InstanceStruct on the GPU using
 cudaMallocManaged (#815)

* Added CUDA includes and libraries
* Added ability to allocate data with cudamallocmanaged if CUDA backend is enabled
* Make cmake 3.17 minimum required cmake version
* Use cmake version 3.17 in azure CI
---
 CMakeLists.txt                                | 14 ++++++++++-
 azure-pipelines.yml                           | 10 ++++----
 .../llvm/codegen_llvm_helper_visitor.hpp      |  2 +-
 test/unit/CMakeLists.txt                      |  6 +++++
 test/unit/codegen/codegen_data_helper.cpp     | 25 +++++++++++++++++--
 5 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6cbb3ba3b4..8e4221ac17 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,7 @@
 # See top-level LICENSE file for details.
 # =============================================================================
 
-cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
 
 project(NMODL LANGUAGES CXX)
 
@@ -23,6 +23,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
 option(NMODL_ENABLE_PYTHON_BINDINGS "Enable pybind11 based python bindings" OFF)
 option(NMODL_ENABLE_LEGACY_UNITS "Use original faraday, R, etc. instead of 2019 nist constants" OFF)
 option(NMODL_ENABLE_LLVM "Enable LLVM based code generation" ON)
+option(NMODL_ENABLE_LLVM_GPU "Enable LLVM based GPU code generation" ON)
 option(NMODL_ENABLE_JIT_EVENT_LISTENERS "Enable JITEventListener for Perf and Vtune" OFF)
 
 if(NMODL_ENABLE_LEGACY_UNITS)
@@ -184,6 +185,11 @@ if(NMODL_ENABLE_LLVM)
   include(cmake/LLVMHelper.cmake)
   include_directories(${LLVM_INCLUDE_DIRS})
   add_definitions(-DNMODL_LLVM_BACKEND)
+  if(NMODL_ENABLE_LLVM_CUDA)
+    enable_language(CUDA)
+    find_package(CUDAToolkit)
+    add_definitions(-DNMODL_LLVM_CUDA_BACKEND)
+  endif()
 endif()
 
 # =============================================================================
@@ -291,6 +297,12 @@ if(NMODL_ENABLE_LLVM)
   message(STATUS "  CMAKE             | ${LLVM_CMAKE_DIR}")
   message(STATUS "  JIT LISTENERS     | ${NMODL_ENABLE_JIT_EVENT_LISTENERS}")
 endif()
+message(STATUS "LLVM CUDA Codegen   | ${NMODL_ENABLE_LLVM_CUDA}")
+if(NMODL_ENABLE_LLVM_CUDA)
+  message(STATUS "  CUDA VERSION      | ${CUDAToolkit_VERSION}")
+  message(STATUS "  INCLUDE           | ${CUDAToolkit_INCLUDE_DIRS}")
+  message(STATUS "  LIBRARY           | ${CUDAToolkit_LIBRARY_DIR}")
+endif()
 message(STATUS "--------------+--------------------------------------------------------------")
 message(STATUS " See documentation : https://github.com/BlueBrain/nmodl/")
 message(STATUS "--------------+--------------------------------------------------------------")
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 4cfd1c9fcb..5fa8ce7acc 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -104,8 +104,8 @@ stages:
         chmod +x llvm.sh
         sudo ./llvm.sh 13
       env:
-        CMAKE_VER: 'v3.15.0'
-        CMAKE_PKG: 'cmake-3.15.0-Linux-x86_64'
+        CMAKE_VER: 'v3.17.0'
+        CMAKE_PKG: 'cmake-3.17.0-Linux-x86_64'
       displayName: 'Install Dependencies'
     - script: |
         export PATH=$(pwd)/$CMAKE_PKG/bin:/home/vsts/.local/bin:$PATH
@@ -123,7 +123,7 @@ stages:
         make install #this is needed for the integration tests
         env CTEST_OUTPUT_ON_FAILURE=1 make test
       env:
-        CMAKE_PKG: 'cmake-3.15.0-Linux-x86_64'
+        CMAKE_PKG: 'cmake-3.17.0-Linux-x86_64'
       displayName: 'Build and Run Unit Tests'
     - script: |
         export PATH=$(pwd)/$CMAKE_PKG/bin:/home/vsts/.local/bin:$PATH
@@ -154,7 +154,7 @@ stages:
         fi
         ./bin/nrnivmodl-core $(Build.Repository.LocalPath)/test/integration/mod
       env:
-        CMAKE_PKG: 'cmake-3.15.0-Linux-x86_64'
+        CMAKE_PKG: 'cmake-3.17.0-Linux-x86_64'
         SHELL: 'bash'
       displayName: 'Build Neuron and Run Integration Tests'
     - script: |
@@ -178,7 +178,7 @@ stages:
         fi
         ./bin/nrnivmodl-core $(Build.Repository.LocalPath)/test/integration/mod
       env:
-        CMAKE_PKG: 'cmake-3.15.0-Linux-x86_64'
+        CMAKE_PKG: 'cmake-3.17.0-Linux-x86_64'
       displayName: 'Build CoreNEURON and Run Integration Tests with ISPC compiler'
   - job: 'osx11'
     pool:
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index 9b3f759bfa..21aff4a92d 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -17,8 +17,8 @@
 #include "ast/instance_struct.hpp"
 #include "codegen/codegen_info.hpp"
 #include "symtab/symbol_table.hpp"
+#include "utils/logger.hpp"
 #include "visitors/ast_visitor.hpp"
- #include "utils/logger.hpp"
 
 namespace nmodl {
 namespace codegen {
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index c98c919728..d9c7e2c349 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -117,6 +117,12 @@ if(NMODL_ENABLE_LLVM)
                           codegen/codegen_data_helper.cpp codegen/codegen_llvm_instance_struct.cpp)
   add_executable(test_llvm_runner visitor/main.cpp codegen/codegen_data_helper.cpp
                                   codegen/codegen_llvm_execution.cpp)
+  if(NMODL_ENABLE_LLVM_CUDA)
+    include_directories(${CUDAToolkit_INCLUDE_DIRS})
+    target_link_libraries(benchmark_data PRIVATE CUDA::cudart)
+    target_link_libraries(testllvm CUDA::cudart)
+    target_link_libraries(test_llvm_runner CUDA::cudart)
+  endif()
   target_link_libraries(
     testllvm
     Catch2::Catch2
diff --git a/test/unit/codegen/codegen_data_helper.cpp b/test/unit/codegen/codegen_data_helper.cpp
index a0ee6ec957..d2b17277bc 100644
--- a/test/unit/codegen/codegen_data_helper.cpp
+++ b/test/unit/codegen/codegen_data_helper.cpp
@@ -1,5 +1,9 @@
 #include <algorithm>
 
+#ifdef NMODL_LLVM_CUDA_BACKEND
+#include <cuda_runtime_api.h>
+#endif
+
 #include "ast/codegen_var_type.hpp"
 #include "codegen/llvm/codegen_llvm_helper_visitor.hpp"
 
@@ -18,10 +22,18 @@ const int default_second_order_value = 0;
 CodegenInstanceData::~CodegenInstanceData() {
     // first free num_ptr_members members which are pointers
     for (size_t i = 0; i < num_ptr_members; i++) {
+#ifdef NMODL_LLVM_CUDA_BACKEND
+        cudaFree(members[i]);
+#else
         free(members[i]);
+#endif
     }
-    // and then pointer to container struct
+// and then pointer to container struct
+#ifdef NMODL_LLVM_CUDA_BACKEND
+    cudaFree(base_ptr);
+#else
     free(base_ptr);
+#endif
 }
 
 /**
@@ -85,8 +97,13 @@ CodegenInstanceData CodegenDataHelper::create_data(size_t num_elements, size_t s
     // max size of each member : pointer / double has maximum size
     size_t member_size = std::max(sizeof(double), sizeof(double*));
 
-    // allocate instance object with memory alignment
+// allocate instance object with memory alignment
+#ifdef NMODL_LLVM_CUDA_BACKEND
+    cudaMallocManaged(&base, member_size * variables.size());
+#else
     posix_memalign(&base, NBYTE_ALIGNMENT, member_size * variables.size());
+#endif
+
     data.base_ptr = base;
     data.num_bytes += member_size * variables.size();
 
@@ -114,7 +131,11 @@ CodegenInstanceData CodegenDataHelper::create_data(size_t num_elements, size_t s
 
         // allocate memory and setup a pointer
         void* member;
+#ifdef NMODL_LLVM_CUDA_BACKEND
+        cudaMallocManaged(&member, member_size * num_elements);
+#else
         posix_memalign(&member, NBYTE_ALIGNMENT, member_size * num_elements);
+#endif
 
         // integer values are often offsets so they must start from
         // 0 to num_elements-1 to avoid out of bound accesses.

From a755719b079f76b062022338241db5adbfddff10 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Mon, 14 Mar 2022 12:48:49 +0100
Subject: [PATCH 076/105] [LLVM][GPU] Separated CPU and GPU CLI options (#817)

Now, CLI has two options: `cpu` and `gpu` that allow users to target different platforms. For example,

```
bin/nmodl mod/test.mod -o out llvm --ir

bin/nmodl mod/test.mod -o out llvm --ir cpu --name skylake --vector-width 2

bin/nmodl mod/test.mod -o out llvm --ir gpu --name cuda
```

Moreover, `assume_no_alias` option was dropped and
made default (it didn't affect the computation in
our experiments).

The new CLI looks like:
```
llvm
  LLVM code generation option
  Options:
    --ir REQUIRED                         Generate LLVM IR (false)
    --no-debug                            Disable debug information (false)
    --opt-level-ir INT:{0,1,2,3}          LLVM IR optimisation level (O0)
    --single-precision                    Use single precision floating-point types (false)
    --fmf TEXT:{afn,arcp,contract,ninf,nnan,nsz,reassoc,fast} ...
                                          Fast math flags for floating-point optimizations (none)

cpu
  LLVM CPU option
  Options:
    --name TEXT                           Name of CPU platform to use
    --math-library TEXT:{Accelerate,libmvec,libsystem_m,MASSV,SLEEF,SVML,none}
                                          Math library for SIMD code generation (none)
    --vector-width INT                    Explicit vectorization width for IR generation (1)

gpu
  LLVM GPU option
  Options:
    --name TEXT                           Name of GPU platform to use
    --math-library TEXT:{libdevice}       Math library for GPU code generation (none)

benchmark
  LLVM benchmark option
  Options:
    --run                                 Run LLVM benchmark (false)
    --opt-level-codegen INT:{0,1,2,3}     Machine code optimisation level (O0)
    --libs TEXT:FILE ...                  Shared libraries to link IR against
    --instance-size INT                   Instance struct size (10000)
    --repeat INT                          Number of experiments for benchmarking (100)
```
---
 src/main.cpp | 153 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 83 insertions(+), 70 deletions(-)

diff --git a/src/main.cpp b/src/main.cpp
index 4bc2f30252..8db5376251 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -173,26 +173,29 @@ int main(int argc, const char* argv[]) {
     /// use single precision floating-point types
     bool llvm_float_type(false);
 
-    /// llvm vector width
-    int llvm_vec_width = 1;
+    /// optimisation level for IR generation
+    int llvm_opt_level_ir = 0;
 
-    /// vector library name
-    std::string vector_library("none");
+    /// math library name
+    std::string llvm_math_library("none");
 
     /// disable debug information generation for the IR
-    bool disable_debug_information(false);
+    bool llvm_no_debug(false);
 
     /// fast math flags for LLVM backend
     std::vector<std::string> llvm_fast_math_flags;
 
-    /// run llvm benchmark
-    bool run_llvm_benchmark(false);
+    /// traget CPU platform name
+    std::string llvm_cpu_name = "default";
 
-    /// do not assume that instance struct fields do not alias
-    bool llvm_assume_alias(false);
+    /// traget GPU platform name
+    std::string llvm_gpu_name = "default";
 
-    /// optimisation level for IR generation
-    int llvm_opt_level_ir = 0;
+    /// llvm vector width if generating code for CPUs
+    int llvm_vector_width = 1;
+
+    /// run llvm benchmark
+    bool llvm_benchmark(false);
 
     /// optimisation level for machine code generation
     int llvm_opt_level_codegen = 0;
@@ -205,9 +208,6 @@ int main(int argc, const char* argv[]) {
 
     /// the number of repeated experiments for the benchmarking
     int num_experiments = 100;
-
-    /// specify the cpu for LLVM IR to target
-    std::string cpu = "default";
 #endif
 
     // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers,readability-magic-numbers)
@@ -332,36 +332,57 @@ int main(int argc, const char* argv[]) {
 
     // LLVM IR code generation options.
     auto llvm_opt = app.add_subcommand("llvm", "LLVM code generation option")->ignore_case();
-    llvm_opt->add_flag("--ir",
+    auto llvm_ir_opt = llvm_opt->add_flag("--ir",
         llvm_ir,
         fmt::format("Generate LLVM IR ({})", llvm_ir))->ignore_case();
-    llvm_opt->add_flag("--disable-debug-info",
-                       disable_debug_information,
-                       fmt::format("Disable debug information ({})", disable_debug_information))->ignore_case();
+    llvm_ir_opt->required(true);
+    llvm_opt->add_flag("--no-debug",
+        llvm_no_debug,
+        fmt::format("Disable debug information ({})", llvm_no_debug))->ignore_case();
     llvm_opt->add_option("--opt-level-ir",
-                              llvm_opt_level_ir,
-                              fmt::format("LLVM IR optimisation level (O{})", llvm_opt_level_ir))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"}));
+        llvm_opt_level_ir,
+        fmt::format("LLVM IR optimisation level (O{})", llvm_opt_level_ir))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"}));
     llvm_opt->add_flag("--single-precision",
-                       llvm_float_type,
-                       fmt::format("Use single precision floating-point types ({})", llvm_float_type))->ignore_case();
-    llvm_opt->add_flag("--assume-may-alias",
-                       llvm_assume_alias,
-                       fmt::format("Assume instance struct fields may alias ({})", llvm_assume_alias))->ignore_case();
-    llvm_opt->add_option("--vector-width",
-        llvm_vec_width,
-        fmt::format("LLVM explicit vectorisation width ({})", llvm_vec_width))->ignore_case();
-    llvm_opt->add_option("--veclib",
-                         vector_library,
-                         fmt::format("Vector library for maths functions ({})", vector_library))->check(CLI::IsMember({"Accelerate", "libsystem_m", "libmvec", "MASSV", "SLEEF", "SVML", "none"}));
+        llvm_float_type,
+        fmt::format("Use single precision floating-point types ({})", llvm_float_type))->ignore_case();
     llvm_opt->add_option("--fmf",
-                         llvm_fast_math_flags,
-                         "Fast math flags for floating-point optimizations (none)")->check(CLI::IsMember({"afn", "arcp", "contract", "ninf", "nnan", "nsz", "reassoc", "fast"}));
+        llvm_fast_math_flags,
+        "Fast math flags for floating-point optimizations (none)")->check(CLI::IsMember({"afn", "arcp", "contract", "ninf", "nnan", "nsz", "reassoc", "fast"}));
+
+    // Platform options for LLVM code generation.
+    auto cpu_opt = app.add_subcommand("cpu", "LLVM CPU option")->ignore_case();
+    cpu_opt->needs(llvm_opt);
+    cpu_opt->add_option("--name",
+        llvm_cpu_name,
+        "Name of CPU platform to use")->ignore_case();
+    auto simd_math_library_opt = cpu_opt->add_option("--math-library",
+        llvm_math_library,
+        "Math library for SIMD code generation ({})"_format(llvm_math_library));
+    simd_math_library_opt->check(CLI::IsMember({"Accelerate", "libmvec", "libsystem_m", "MASSV", "SLEEF", "SVML", "none"}));
+    cpu_opt->add_option("--vector-width",
+        llvm_vector_width,
+        "Explicit vectorization width for IR generation ({})"_format(llvm_vector_width))->ignore_case();
+
+    auto gpu_opt = app.add_subcommand("gpu", "LLVM GPU option")->ignore_case();
+    gpu_opt->needs(llvm_opt);
+    gpu_opt->add_option("--name",
+        llvm_gpu_name,
+        "Name of GPU platform to use")->ignore_case();
+    auto gpu_math_library_opt = gpu_opt->add_option("--math-library",
+        llvm_math_library,
+        "Math library for GPU code generation ({})"_format(llvm_math_library));
+    gpu_math_library_opt->check(CLI::IsMember({"libdevice"}));
+
+    // Allow only one platform at a time.
+    cpu_opt->excludes(gpu_opt);
+    gpu_opt->excludes(cpu_opt);
 
     // LLVM IR benchmark options.
     auto benchmark_opt = app.add_subcommand("benchmark", "LLVM benchmark option")->ignore_case();
+    benchmark_opt->needs(llvm_opt);
     benchmark_opt->add_flag("--run",
-                            run_llvm_benchmark,
-                            fmt::format("Run LLVM benchmark ({})", run_llvm_benchmark))->ignore_case();
+                            llvm_benchmark,
+                            fmt::format("Run LLVM benchmark ({})", llvm_benchmark))->ignore_case();
     benchmark_opt->add_option("--opt-level-codegen",
                               llvm_opt_level_codegen,
                               fmt::format("Machine code optimisation level (O{})", llvm_opt_level_codegen))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"}));
@@ -374,9 +395,6 @@ int main(int argc, const char* argv[]) {
     benchmark_opt->add_option("--repeat",
                               num_experiments,
                               fmt::format("Number of experiments for benchmarking ({})", num_experiments))->ignore_case();
-    benchmark_opt->add_option("--cpu",
-                       cpu,
-                       fmt::format("Target's backend ({})", cpu))->ignore_case();
 #endif
     // clang-format on
 
@@ -688,38 +706,33 @@ int main(int argc, const char* argv[]) {
             }
 
 #ifdef NMODL_LLVM_BACKEND
-            if (llvm_ir || run_llvm_benchmark) {
-                // If benchmarking, we want to optimize the IR with target information and not in
-                // LLVM visitor.
-                int llvm_opt_level = run_llvm_benchmark ? 0 : llvm_opt_level_ir;
-
-                logger->info("Running LLVM backend code generator");
-                CodegenLLVMVisitor visitor(modfile,
-                                           output_dir,
-                                           llvm_opt_level,
-                                           llvm_float_type,
-                                           llvm_vec_width,
-                                           vector_library,
-                                           !disable_debug_information,
-                                           llvm_fast_math_flags,
-                                           llvm_assume_alias);
-                visitor.visit_program(*ast);
-                ast_to_nmodl(*ast, filepath("llvm", "mod"));
-                ast_to_json(*ast, filepath("llvm", "json"));
-
-                if (run_llvm_benchmark) {
-                    logger->info("Running LLVM benchmark");
-                    benchmark::LLVMBenchmark benchmark(visitor,
-                                                       modfile,
-                                                       output_dir,
-                                                       shared_lib_paths,
-                                                       num_experiments,
-                                                       instance_size,
-                                                       cpu,
-                                                       llvm_opt_level_ir,
-                                                       llvm_opt_level_codegen);
-                    benchmark.run(ast);
-                }
+            if (llvm_ir || llvm_benchmark) {
+              // If benchmarking, we want to optimize the IR with target
+              // information and not in LLVM visitor.
+              int llvm_opt_level = llvm_benchmark ? 0 : llvm_opt_level_ir;
+
+              if (llvm_gpu_name != "default") {
+                logger->warn("GPU code generation is not supported, targeting "
+                             "CPU instead");
+              }
+
+              logger->info("Running LLVM backend code generator");
+              CodegenLLVMVisitor visitor(modfile, output_dir, llvm_opt_level,
+                                         llvm_float_type, llvm_vector_width,
+                                         llvm_math_library, !llvm_no_debug,
+                                         llvm_fast_math_flags, true);
+              visitor.visit_program(*ast);
+              ast_to_nmodl(*ast, filepath("llvm", "mod"));
+              ast_to_json(*ast, filepath("llvm", "json"));
+
+              if (llvm_benchmark) {
+                logger->info("Running LLVM benchmark");
+                benchmark::LLVMBenchmark benchmark(
+                    visitor, modfile, output_dir, shared_lib_paths,
+                    num_experiments, instance_size, llvm_cpu_name,
+                    llvm_opt_level_ir, llvm_opt_level_codegen);
+                benchmark.run(ast);
+              }
             }
 #endif
         }

From 412af4c4c20e27172fae1387b92f000bd076b951 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 15 Mar 2022 12:46:11 +0100
Subject: [PATCH 077/105] [LLVM][refactoring] Added platform abstraction (#818)

* This commit introduces a handy `Plarform` class that is designed
   to incorporate target information for IR  generation, such as precision,
  vectorization width (if applicable), type of target (CPU/GPU), etc.
* In future, more functionality can be added to `Platform`, e.g. we can
  move functionality of handling `llvm::Target`, math SIMD libraries, etc.
* Note: this is just a very basic implementation that enables
   easier integration of GPU code generation.
---
 src/codegen/llvm/CMakeLists.txt               |  4 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 20 ++--
 src/codegen/llvm/codegen_llvm_visitor.hpp     | 25 ++---
 src/codegen/llvm/llvm_ir_builder.cpp          | 29 +++---
 src/codegen/llvm/llvm_ir_builder.hpp          | 23 ++---
 src/codegen/llvm/main.cpp                     |  5 +-
 src/codegen/llvm/target_platform.cpp          | 54 +++++++++++
 src/codegen/llvm/target_platform.hpp          | 92 +++++++++++++++++++
 src/main.cpp                                  | 24 +++--
 test/unit/codegen/codegen_llvm_execution.cpp  | 40 +++++---
 .../codegen/codegen_llvm_instance_struct.cpp  |  6 +-
 test/unit/codegen/codegen_llvm_ir.cpp         | 14 ++-
 12 files changed, 246 insertions(+), 90 deletions(-)
 create mode 100644 src/codegen/llvm/target_platform.cpp
 create mode 100644 src/codegen/llvm/target_platform.hpp

diff --git a/src/codegen/llvm/CMakeLists.txt b/src/codegen/llvm/CMakeLists.txt
index 8cf03ee67a..792591c447 100644
--- a/src/codegen/llvm/CMakeLists.txt
+++ b/src/codegen/llvm/CMakeLists.txt
@@ -11,7 +11,9 @@ set(LLVM_CODEGEN_SOURCE_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_ir_builder.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_ir_builder.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_utils.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_utils.hpp)
+    ${CMAKE_CURRENT_SOURCE_DIR}/llvm_utils.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/target_platform.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/target_platform.hpp)
 
 # =============================================================================
 # LLVM codegen library and executable
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index bac6f4e0b2..0fa81de691 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -68,7 +68,7 @@ static bool can_vectorize(const ast::CodegenForStatement& statement, symtab::Sym
 void CodegenLLVMVisitor::add_vectorizable_functions_from_vec_lib(llvm::TargetLibraryInfoImpl& tli,
                                                                  llvm::Triple& triple) {
     // Since LLVM does not support SLEEF as a vector library yet, process it separately.
-    if (vector_library == "SLEEF") {
+    if (platform.get_math_library() == "SLEEF") {
 // clang-format off
 #define FIXED(w) llvm::ElementCount::getFixed(w)
 // clang-format on
@@ -112,9 +112,9 @@ void CodegenLLVMVisitor::add_vectorizable_functions_from_vec_lib(llvm::TargetLib
             {"MASSV", VecLib::MASSV},
             {"none", VecLib::NoLibrary},
             {"SVML", VecLib::SVML}};
-        const auto& library = llvm_supported_vector_libraries.find(vector_library);
+        const auto& library = llvm_supported_vector_libraries.find(platform.get_math_library());
         if (library == llvm_supported_vector_libraries.end())
-            throw std::runtime_error("Error: unknown vector library - " + vector_library + "\n");
+            throw std::runtime_error("Error: unknown vector library - " + platform.get_math_library() + "\n");
 
         // Add vectorizable functions to the target library info.
         switch (library->second) {
@@ -542,7 +542,7 @@ void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) {
  * \todo support this properly.
  */
 void CodegenLLVMVisitor::visit_codegen_atomic_statement(const ast::CodegenAtomicStatement& node) {
-    if (vector_width > 1)
+    if (platform.is_cpu_with_simd())
         logger->warn("Atomic operations are not supported");
 
     // Support only assignment for now.
@@ -555,7 +555,7 @@ void CodegenLLVMVisitor::visit_codegen_atomic_statement(const ast::CodegenAtomic
         throw std::runtime_error("Error: only 'VarName' assignment is supported\n");
 
     // Process the assignment as if it was non-atomic.
-    if (vector_width > 1)
+    if (platform.is_cpu_with_simd())
         logger->warn("Treating write as non-atomic");
     write_to_variable(*var, rhs);
 }
@@ -625,7 +625,7 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     ir_builder.set_insertion_point(for_body);
 
     // If not processing remainder of the loop, start vectorization.
-    if (vector_width > 1 && main_loop_initialization)
+    if (platform.is_cpu_with_simd() && main_loop_initialization)
         ir_builder.generate_vector_ir();
 
     // Generate code for the loop body and create the basic block for the increment.
@@ -666,7 +666,7 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
 
     // Process function or procedure body. If the function is a compute kernel, enable
     // vectorization. If so, the return statement is handled in a separate visitor.
-    if (vector_width > 1 && is_kernel_function(name)) {
+    if (platform.is_cpu_with_simd() && is_kernel_function(name)) {
         ir_builder.generate_vector_ir();
         block->accept(*this);
         ir_builder.generate_scalar_ir();
@@ -740,7 +740,7 @@ void CodegenLLVMVisitor::visit_function_call(const ast::FunctionCall& node) {
 
 void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) {
     // If vectorizing the compute kernel with control flow, process it separately.
-    if (vector_width > 1 && ir_builder.vectorizing()) {
+    if (platform.is_cpu_with_simd() && ir_builder.vectorizing()) {
         create_vectorized_control_flow_block(node);
         return;
     }
@@ -815,7 +815,7 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     //   - convert function and procedure blocks into CodegenFunctions
     //   - gather information about AST. For now, information about functions
     //     and procedures is used only.
-    CodegenLLVMHelperVisitor v{vector_width};
+    CodegenLLVMHelperVisitor v{platform.get_instruction_width()};
     const auto& functions = v.get_codegen_functions(node);
     instance_var_helper = v.get_instance_var_helper();
     sym_tab = node.get_symbol_table();
@@ -864,7 +864,7 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     }
 
     // Optionally, replace LLVM math intrinsics with vector library calls.
-    if (vector_width > 1) {
+    if (platform.is_cpu_with_simd()) {
 #if LLVM_VERSION_MAJOR < 13
         logger->warn(
             "This version of LLVM does not support replacement of LLVM intrinsics with vector "
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 22b9fafd83..396d8cbb67 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -81,33 +81,22 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     /// Optimisation level for LLVM IR transformations.
     int opt_level_ir;
 
-    /// Vector library used for math functions.
-    std::string vector_library;
-
-    /// Explicit vectorisation width.
-    int vector_width;
+    /// Target platform for the code generation.
+    Platform platform;
 
   public:
     CodegenLLVMVisitor(const std::string& mod_filename,
                        const std::string& output_dir,
+                       Platform& platform,
                        int opt_level_ir,
-                       bool use_single_precision = false,
-                       int vector_width = 1,
-                       std::string vec_lib = "none",
                        bool add_debug_information = false,
-                       std::vector<std::string> fast_math_flags = {},
-                       bool llvm_assume_alias = false)
+                       std::vector<std::string> fast_math_flags = {})
         : mod_filename(mod_filename)
         , output_dir(output_dir)
+        , platform(platform)
         , opt_level_ir(opt_level_ir)
-        , vector_width(vector_width)
-        , vector_library(vec_lib)
         , add_debug_information(add_debug_information)
-        , ir_builder(*context,
-                     use_single_precision,
-                     vector_width,
-                     fast_math_flags,
-                     !llvm_assume_alias)
+        , ir_builder(*context, platform, fast_math_flags)
         , debug_builder(*module) {}
 
     /// Dumps the generated LLVM IR module to string.
@@ -139,7 +128,7 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
 
     /// Returns vector width
     int get_vector_width() const {
-        return vector_width;
+        return platform.get_instruction_width();
     }
 
     // Visitors.
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index 1015b437f3..e7a6a4a60b 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -41,13 +41,13 @@ llvm::Type* IRBuilder::get_i64_type() {
 }
 
 llvm::Type* IRBuilder::get_fp_type() {
-    if (fp_precision == single_precision)
+    if (platform.is_single_precision())
         return llvm::Type::getFloatTy(builder.getContext());
     return llvm::Type::getDoubleTy(builder.getContext());
 }
 
 llvm::Type* IRBuilder::get_fp_ptr_type() {
-    if (fp_precision == single_precision)
+    if (platform.is_single_precision())
         return llvm::Type::getFloatPtrTy(builder.getContext());
     return llvm::Type::getDoublePtrTy(builder.getContext());
 }
@@ -92,7 +92,7 @@ llvm::Value* IRBuilder::pop_last_value() {
 /****************************************************************************************/
 
 void IRBuilder::create_boolean_constant(int value) {
-    if (vector_width > 1 && vectorize) {
+    if (platform.is_cpu_with_simd() && vectorize) {
         value_stack.push_back(get_vector_constant<llvm::ConstantInt>(get_boolean_type(), value));
     } else {
         value_stack.push_back(get_scalar_constant<llvm::ConstantInt>(get_boolean_type(), value));
@@ -100,7 +100,7 @@ void IRBuilder::create_boolean_constant(int value) {
 }
 
 void IRBuilder::create_fp_constant(const std::string& value) {
-    if (vector_width > 1 && vectorize) {
+    if (platform.is_cpu_with_simd() && vectorize) {
         value_stack.push_back(get_vector_constant<llvm::ConstantFP>(get_fp_type(), value));
     } else {
         value_stack.push_back(get_scalar_constant<llvm::ConstantFP>(get_fp_type(), value));
@@ -112,7 +112,7 @@ llvm::Value* IRBuilder::create_global_string(const ast::String& node) {
 }
 
 void IRBuilder::create_i32_constant(int value) {
-    if (vector_width > 1 && vectorize) {
+    if (platform.is_cpu_with_simd() && vectorize) {
         value_stack.push_back(get_vector_constant<llvm::ConstantInt>(get_i32_type(), value));
     } else {
         value_stack.push_back(get_scalar_constant<llvm::ConstantInt>(get_i32_type(), value));
@@ -126,6 +126,8 @@ llvm::Value* IRBuilder::get_scalar_constant(llvm::Type* type, V value) {
 
 template <typename C, typename V>
 llvm::Value* IRBuilder::get_vector_constant(llvm::Type* type, V value) {
+    int vector_width = platform.get_instruction_width();
+
     ConstantVector constants;
     for (unsigned i = 0; i < vector_width; ++i) {
         const auto& element = C::get(type, value);
@@ -206,9 +208,7 @@ void IRBuilder::set_kernel_attributes() {
     //  > The `noalias` attribute indicates that the only memory accesses inside function are loads
     //  > and stores from objects pointed to by its pointer-typed arguments, with arbitrary
     //  > offsets.
-    if (assume_noalias) {
-        current_function->addParamAttr(0, llvm::Attribute::NoAlias);
-    }
+    current_function->addParamAttr(0, llvm::Attribute::NoAlias);
 
     // Finally, specify that the struct pointer does not capture and is read-only.
     current_function->addParamAttr(0, llvm::Attribute::NoCapture);
@@ -227,7 +227,7 @@ void IRBuilder::set_loop_metadata(llvm::BranchInst* branch) {
     loop_metadata.push_back(nullptr);
 
     // If `vector_width` is 1, explicitly disable vectorization for benchmarking purposes.
-    if (vector_width == 1) {
+    if (platform.is_cpu() && platform.get_instruction_width() == 1) {
         llvm::MDString* name = llvm::MDString::get(context, "llvm.loop.vectorize.enable");
         llvm::Value* false_value = llvm::ConstantInt::get(get_boolean_type(), 0);
         llvm::ValueAsMetadata* value = llvm::ValueAsMetadata::get(false_value);
@@ -376,6 +376,7 @@ llvm::Value* IRBuilder::create_index(llvm::Value* value) {
     const auto& element_type = llvm::cast<llvm::IntegerType>(vector_type->getElementType());
     if (element_type->getBitWidth() == i64_type->getIntegerBitWidth())
         return value;
+    int vector_width = platform.get_instruction_width();
     return builder.CreateSExtOrTrunc(value, llvm::FixedVectorType::get(i64_type, vector_width));
 }
 
@@ -449,7 +450,8 @@ void IRBuilder::create_scalar_or_vector_alloca(const std::string& name,
     // Even if generating vectorised code, some variables still need to be scalar. Particularly, the
     // induction variable "id" and remainder loop variables (that start with "epilogue" prefix).
     llvm::Type* type;
-    if (vector_width > 1 && vectorize && name != kernel_id && name.rfind("epilogue", 0)) {
+    if (platform.is_cpu_with_simd() && vectorize && name != kernel_id && name.rfind("epilogue", 0)) {
+        int vector_width = platform.get_instruction_width();
         type = llvm::FixedVectorType::get(element_or_scalar_type, vector_width);
     } else {
         type = element_or_scalar_type;
@@ -495,7 +497,7 @@ llvm::Value* IRBuilder::load_to_or_store_from_array(const std::string& id_name,
     llvm::Value* element_ptr = create_inbounds_gep(array, id_value);
 
     // Find out if the vector code is generated.
-    bool generating_vector_ir = vector_width > 1 && vectorize;
+    bool generating_vector_ir = platform.is_cpu_with_simd() && vectorize;
 
     // If the vector code is generated, we need to distinguish between two cases. If the array is
     // indexed indirectly (i.e. not by an induction variable `kernel_id`), create gather/scatter
@@ -523,7 +525,7 @@ llvm::Value* IRBuilder::load_to_or_store_from_array(const std::string& id_name,
         // to a vector pointer
         llvm::Type* vector_type = llvm::PointerType::get(
             llvm::FixedVectorType::get(element_ptr->getType()->getPointerElementType(),
-                                       vector_width),
+                                       platform.get_instruction_width()),
             /*AddressSpace=*/0);
         ptr = builder.CreateBitCast(element_ptr, vector_type);
     } else {
@@ -541,11 +543,12 @@ llvm::Value* IRBuilder::load_to_or_store_from_array(const std::string& id_name,
 
 void IRBuilder::maybe_replicate_value(llvm::Value* value) {
     // If the value should not be vectorised, or it is already a vector, add it to the stack.
-    if (!vectorize || vector_width == 1 || value->getType()->isVectorTy()) {
+    if (!vectorize || !platform.is_cpu_with_simd() || value->getType()->isVectorTy()) {
         value_stack.push_back(value);
     } else {
         // Otherwise, we generate vectorized code inside the loop, so replicate the value to form a
         // vector.
+        int vector_width = platform.get_instruction_width();
         llvm::Value* vector_value = builder.CreateVectorSplat(vector_width, value);
         value_stack.push_back(vector_value);
     }
diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp
index b3005db0c7..cf9e7f936d 100644
--- a/src/codegen/llvm/llvm_ir_builder.hpp
+++ b/src/codegen/llvm/llvm_ir_builder.hpp
@@ -10,6 +10,7 @@
 #include <string>
 
 #include "codegen/llvm/codegen_llvm_helper_visitor.hpp"
+#include "codegen/llvm/target_platform.hpp"
 #include "symtab/symbol_table.hpp"
 
 #include "llvm/IR/IRBuilder.h"
@@ -52,14 +53,8 @@ class IRBuilder {
     /// Flag to indicate that the generated IR should be vectorized.
     bool vectorize;
 
-    /// Precision of the floating-point numbers (32 or 64 bit).
-    unsigned fp_precision;
-
-    /// The vector width used for the vectorized code.
-    unsigned vector_width;
-
-    /// Instance struct fields do not alias.
-    bool assume_noalias;
+    /// Target platform for which IR is built.
+    Platform platform;
 
     /// Masked value used to predicate vector instructions.
     llvm::Value* mask;
@@ -72,21 +67,17 @@ class IRBuilder {
 
   public:
     IRBuilder(llvm::LLVMContext& context,
-              bool use_single_precision = false,
-              unsigned vector_width = 1,
-              std::vector<std::string> fast_math_flags = {},
-              bool assume_noalias = true)
+              Platform& platform,
+              std::vector<std::string> fast_math_flags = {})
         : builder(context)
+        , platform(platform)
         , symbol_table(nullptr)
         , current_function(nullptr)
         , vectorize(false)
         , alloca_ip(nullptr)
-        , fp_precision(use_single_precision ? single_precision : double_precision)
-        , vector_width(vector_width)
         , mask(nullptr)
         , kernel_id("")
-        , fast_math_flags(fast_math_flags)
-        , assume_noalias(assume_noalias) {}
+        , fast_math_flags(fast_math_flags) {}
 
     /// Transforms the fast math flags provided to the builder into LLVM's representation.
     llvm::FastMathFlags transform_to_fmf(std::vector<std::string>& flags) {
diff --git a/src/codegen/llvm/main.cpp b/src/codegen/llvm/main.cpp
index 6d374999c3..92d8a486c1 100644
--- a/src/codegen/llvm/main.cpp
+++ b/src/codegen/llvm/main.cpp
@@ -47,8 +47,11 @@ int main(int argc, const char* argv[]) {
     logger->info("Running Symtab Visitor");
     visitor::SymtabVisitor().visit_program(*ast);
 
+    // Use default platform for this toy example.
+    codegen::Platform platform;
+
     logger->info("Running LLVM Visitor");
-    codegen::CodegenLLVMVisitor llvm_visitor(filename, /*output_dir=*/".", /*opt_level_ir=*/0);
+    codegen::CodegenLLVMVisitor llvm_visitor(filename, /*output_dir=*/".", platform, /*opt_level_ir=*/0);
     llvm_visitor.visit_program(*ast);
     std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
 
diff --git a/src/codegen/llvm/target_platform.cpp b/src/codegen/llvm/target_platform.cpp
new file mode 100644
index 0000000000..6cb8c7bb2b
--- /dev/null
+++ b/src/codegen/llvm/target_platform.cpp
@@ -0,0 +1,54 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include "codegen/llvm/target_platform.hpp"
+
+namespace nmodl {
+namespace codegen {
+
+const std::string Platform::DEFAULT_PLATFORM_NAME = "default";
+const std::string Platform::DEFAULT_MATH_LIBRARY = "none";
+
+bool Platform::is_default_platform() {
+    // Default platform is a CPU.
+    return platform_id == PlatformID::CPU &&  name == Platform::DEFAULT_PLATFORM_NAME;
+}
+
+bool Platform::is_cpu() {
+    return platform_id == PlatformID::CPU;
+}
+
+bool Platform::is_cpu_with_simd() {
+    return platform_id == PlatformID::CPU && instruction_width > 1;
+}
+
+bool Platform::is_gpu() {
+    return platform_id == PlatformID::GPU;
+}
+
+bool Platform::is_single_precision() {
+  return use_single_precision;
+}
+
+std::string Platform::get_name() const {
+    return name;
+}
+
+std::string Platform::get_math_library() const {
+    return math_library;
+}
+
+int Platform::get_instruction_width() const {
+    return instruction_width;
+}
+
+int Platform::get_precision() const {
+    return use_single_precision? 32 : 64;
+}
+
+}  // namespace codegen
+}  // namespace nmodl
diff --git a/src/codegen/llvm/target_platform.hpp b/src/codegen/llvm/target_platform.hpp
new file mode 100644
index 0000000000..2eabbb1a4b
--- /dev/null
+++ b/src/codegen/llvm/target_platform.hpp
@@ -0,0 +1,92 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+#include <string>
+
+namespace nmodl {
+namespace codegen {
+
+enum PlatformID {
+    CPU,
+    GPU
+};
+
+/**
+ * \class Platform
+ * \brief A class that represents the target platform. It is needed to
+ * reduce the amount of code passed to LLVM visitor and its helpers.
+ */
+class Platform {
+  public:
+    /// Default name of the target and math library.
+    static const std::string DEFAULT_PLATFORM_NAME;
+    static const std::string DEFAULT_MATH_LIBRARY;
+
+  private:
+    /// Name of the platform.
+    const std::string name = Platform::DEFAULT_PLATFORM_NAME;
+
+    /// Target-specific id to compare platforms easily.
+    PlatformID platform_id;
+
+    /// User-provided width that is used to construct LLVM instructions
+    //  and types.
+    int instruction_width = 1;
+
+    /// Use single-precision floating-point types.
+    bool use_single_precision = false;
+
+    /// A name of user-provided math library.
+    std::string math_library = Platform::DEFAULT_MATH_LIBRARY;
+
+  public:
+    Platform(PlatformID platform_id,
+             const std::string& name,
+             std::string& math_library,
+             bool use_single_precision = false,
+             int instruction_width = 1)
+              : platform_id(platform_id)
+              , name(name)
+              , math_library(math_library)
+              , use_single_precision(use_single_precision)
+              , instruction_width(instruction_width) {}
+
+    Platform(bool use_single_precision,
+             int instruction_width)
+            : platform_id(PlatformID::CPU)
+            , use_single_precision(use_single_precision)
+            , instruction_width(instruction_width) {}
+
+    Platform() : platform_id(PlatformID::CPU) {}
+
+    /// Checks if this platform is a default platform.
+    bool is_default_platform();
+
+    /// Checks if this platform is a CPU.
+    bool is_cpu();
+
+    /// Checks if this platform is a CPU with SIMD support.
+    bool is_cpu_with_simd();
+
+    /// Checks if this platform is a GPU.
+    bool is_gpu();
+
+    bool is_single_precision();
+
+    std::string get_name() const;
+
+    std::string get_math_library() const;
+
+    int get_instruction_width() const;
+
+    int get_precision() const;
+};
+
+}  // namespace codegen
+}  // namespace nmodl
diff --git a/src/main.cpp b/src/main.cpp
index 8db5376251..cb3588793d 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -711,21 +711,29 @@ int main(int argc, const char* argv[]) {
               // information and not in LLVM visitor.
               int llvm_opt_level = llvm_benchmark ? 0 : llvm_opt_level_ir;
 
-              if (llvm_gpu_name != "default") {
-                logger->warn("GPU code generation is not supported, targeting "
-                             "CPU instead");
-              }
+              // Create platform abstraction.
+              PlatformID pid = llvm_gpu_name == "default" ? PlatformID::CPU
+                                                          : PlatformID::GPU;
+              const std::string name =
+                  llvm_gpu_name == "default" ? llvm_cpu_name : llvm_gpu_name;
+              Platform platform(pid, name, llvm_math_library, llvm_float_type,
+                                llvm_vector_width);
 
               logger->info("Running LLVM backend code generator");
-              CodegenLLVMVisitor visitor(modfile, output_dir, llvm_opt_level,
-                                         llvm_float_type, llvm_vector_width,
-                                         llvm_math_library, !llvm_no_debug,
-                                         llvm_fast_math_flags, true);
+              CodegenLLVMVisitor visitor(modfile, output_dir, platform,
+                                         llvm_opt_level, !llvm_no_debug,
+                                         llvm_fast_math_flags);
               visitor.visit_program(*ast);
               ast_to_nmodl(*ast, filepath("llvm", "mod"));
               ast_to_json(*ast, filepath("llvm", "json"));
 
               if (llvm_benchmark) {
+                // \todo integrate Platform class here
+                if (llvm_gpu_name != "default") {
+                  logger->warn("GPU benchmarking is not supported, targeting "
+                               "CPU instead");
+                }
+
                 logger->info("Running LLVM benchmark");
                 benchmark::LLVMBenchmark benchmark(
                     visitor, modfile, output_dir, shared_lib_paths,
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index 312a4d4d28..c10cf0fdc0 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -125,8 +125,12 @@ SCENARIO("Arithmetic expression", "[llvm][runner]") {
         const auto& ast = driver.parse_string(nmodl_text);
 
         SymtabVisitor().visit_program(*ast);
+
+        codegen::Platform cpu_platform(/*use_single_precision=*/false,
+                                       /*instruction_width=*/1);
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
+                                                 cpu_platform,
                                                  /*opt_level_ir=*/0);
         llvm_visitor.visit_program(*ast);
 
@@ -227,8 +231,12 @@ SCENARIO("Optimised arithmetic expression", "[llvm][runner]") {
         const auto& ast = driver.parse_string(nmodl_text);
 
         SymtabVisitor().visit_program(*ast);
+
+        codegen::Platform cpu_platform(/*use_single_precision=*/false,
+                                       /*instruction_width=*/1);
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
+                                                 cpu_platform,
                                                  /*opt_level_ir=*/3);
         llvm_visitor.visit_program(*ast);
 
@@ -300,11 +308,13 @@ SCENARIO("Simple scalar kernel", "[llvm][runner]") {
         SymtabVisitor().visit_program(*ast);
         NeuronSolveVisitor().visit_program(*ast);
         SolveBlockVisitor().visit_program(*ast);
+
+        codegen::Platform cpu_platform(/*use_single_precision=*/false,
+                                       /*instruction_width=*/1);
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
-                                                 /*opt_level_ir=*/0,
-                                                 /*use_single_precision=*/false,
-                                                 /*vector_width=*/1);
+                                                 cpu_platform,
+                                                 /*opt_level_ir=*/0);
         llvm_visitor.visit_program(*ast);
         llvm_visitor.wrap_kernel_functions();
 
@@ -382,11 +392,13 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
         SymtabVisitor().visit_program(*ast);
         NeuronSolveVisitor().visit_program(*ast);
         SolveBlockVisitor().visit_program(*ast);
+
+        codegen::Platform simd_cpu_platform(/*use_single_precision=*/false,
+                                            /*instruction_width=*/4);
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
-                                                 /*opt_level_ir=*/3,
-                                                 /*use_single_precision=*/false,
-                                                 /*vector_width=*/4);
+                                                 simd_cpu_platform,
+                                                 /*opt_level_ir=*/3);
         llvm_visitor.visit_program(*ast);
         llvm_visitor.wrap_kernel_functions();
 
@@ -464,11 +476,13 @@ SCENARIO("Vectorised kernel with scatter instruction", "[llvm][runner]") {
         SymtabVisitor().visit_program(*ast);
         NeuronSolveVisitor().visit_program(*ast);
         SolveBlockVisitor().visit_program(*ast);
+
+        codegen::Platform simd_cpu_platform(/*use_single_precision=*/false,
+                                            /*instruction_width=*/2);
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
-                                                 /*opt_level_ir=*/0,
-                                                 /*use_single_precision=*/false,
-                                                 /*vector_width=*/2);
+                                                 simd_cpu_platform,
+                                                 /*opt_level_ir=*/0);
         llvm_visitor.visit_program(*ast);
         llvm_visitor.wrap_kernel_functions();
 
@@ -555,11 +569,13 @@ SCENARIO("Vectorised kernel with simple control flow", "[llvm][runner]") {
         SymtabVisitor().visit_program(*ast);
         NeuronSolveVisitor().visit_program(*ast);
         SolveBlockVisitor().visit_program(*ast);
+
+        codegen::Platform simd_cpu_platform(/*use_single_precision=*/false,
+                                            /*instruction_width=*/2);
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
-                                                 /*opt_level_ir=*/0,
-                                                 /*use_single_precision=*/false,
-                                                 /*vector_width=*/2);
+                                                 simd_cpu_platform,
+                                                 /*opt_level_ir=*/0);
         llvm_visitor.visit_program(*ast);
         llvm_visitor.wrap_kernel_functions();
 
diff --git a/test/unit/codegen/codegen_llvm_instance_struct.cpp b/test/unit/codegen/codegen_llvm_instance_struct.cpp
index 6042aecfc8..fbb07dfbcd 100644
--- a/test/unit/codegen/codegen_llvm_instance_struct.cpp
+++ b/test/unit/codegen/codegen_llvm_instance_struct.cpp
@@ -39,11 +39,11 @@ codegen::CodegenInstanceData generate_instance_data(const std::string& text,
     SymtabVisitor().visit_program(*ast);
     NeuronSolveVisitor().visit_program(*ast);
 
+    codegen::Platform cpu_platform(use_single_precision, vector_width);
     codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"test",
                                              /*output_dir=*/".",
-                                             opt_level,
-                                             use_single_precision,
-                                             vector_width);
+                                             cpu_platform,
+                                             opt_level);
     llvm_visitor.visit_program(*ast);
     llvm_visitor.dump_module();
     const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 397ff46111..d2e94acc70 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -51,14 +51,12 @@ std::string run_llvm_visitor(const std::string& text,
     NeuronSolveVisitor().visit_program(*ast);
     SolveBlockVisitor().visit_program(*ast);
 
-    codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
-                                             /*output_dir=*/".",
-                                             opt_level,
-                                             use_single_precision,
-                                             vector_width,
-                                             vec_lib,
-                                             /*add_debug_information=*/false,
-                                             fast_math_flags);
+    codegen::Platform cpu_platform(codegen::PlatformID::CPU, /*name=*/"default",
+                                   vec_lib, use_single_precision, vector_width);
+    codegen::CodegenLLVMVisitor llvm_visitor(
+        /*mod_filename=*/"unknown",
+        /*output_dir=*/".", cpu_platform, opt_level,
+        /*add_debug_information=*/false, fast_math_flags);
 
     llvm_visitor.visit_program(*ast);
     return llvm_visitor.dump_module();

From 2d05aaa6c2354ed54bf6f3ce48dac27d8eabdc8e Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 22 Mar 2022 12:04:42 +0100
Subject: [PATCH 078/105] [LLVM][GPU] Added GPU-specific AST transformations
 (#819)

This commit adds a new AST node: `CodegenThreadId` that
represents thread id used in GPU computation. Thanks to
the new platform class abstraction, the code to generate
compute body of NEURON block was readapted to support
AST transformations needed for GPU.

Example of the transformation:
```
GPU_ID id
INTEGER node_id
DOUBLE v
for(id = THREAD_ID; id<mech->node_count; id = id+GRID_STRIDE) {
    node_id = mech->node_index[id]
    v = mech->voltage[node_id]
    mech->m[id] = mech->y[id]+2
}
```

Co-authored-by: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Co-authored-by: Ioannis Magkanaris <iomagkanaris@gmail.com>
---
 .../llvm/codegen_llvm_helper_visitor.cpp      | 261 +++++++++---------
 .../llvm/codegen_llvm_helper_visitor.hpp      |  37 ++-
 src/codegen/llvm/codegen_llvm_visitor.cpp     |   8 +-
 src/language/code_generator.cmake             |   2 +
 src/language/codegen.yaml                     |  21 ++
 test/unit/codegen/codegen_llvm_ir.cpp         |  58 +++-
 6 files changed, 250 insertions(+), 137 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 9e3d9eedef..a06f59812b 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -445,7 +445,7 @@ void CodegenLLVMHelperVisitor::ion_write_statements(BlockType type,
  * @param node Ast node under which variables to be converted to instance type
  */
 void CodegenLLVMHelperVisitor::convert_to_instance_variable(ast::Node& node,
-                                                            std::string& index_var) {
+                                                            const std::string& index_var) {
     /// collect all variables in the node of type ast::VarName
     auto variables = collect_nodes(node, {ast::AstNodeType::VAR_NAME});
     for (const auto& v: variables) {
@@ -551,48 +551,64 @@ void CodegenLLVMHelperVisitor::visit_function_block(ast::FunctionBlock& node) {
     create_function_for_node(node);
 }
 
-/**
- * Create loop increment expression `id = id + width`
- * \todo : same as int_initialization_expression()
- */
-static std::shared_ptr<ast::Expression> loop_increment_expression(const std::string& induction_var,
-                                                                  int vector_width) {
-    // first create id + x
+std::shared_ptr<ast::Expression>
+CodegenLLVMHelperVisitor::loop_initialization_expression(const std::string& induction_var,
+                                                         bool is_remainder_loop) {
+    if (platform.is_gpu()) {
+        const auto& id = create_varname(induction_var);
+        const auto& tid = new ast::CodegenThreadId();
+        return std::make_shared<ast::BinaryExpression>(id, ast::BinaryOperator(ast::BOP_ASSIGN), tid);
+    }
+
+  // Otherwise, platfrom is CPU. Since the loop can be a remainder loop, check if
+  // we need to initialize at all.
+    if (is_remainder_loop)
+        return nullptr;
+    return int_initialization_expression(induction_var);
+}
+
+std::shared_ptr<ast::Expression>
+CodegenLLVMHelperVisitor::loop_increment_expression(const std::string& induction_var,
+                                                    bool is_remainder_loop) {
     const auto& id = create_varname(induction_var);
-    const auto& inc = new ast::Integer(vector_width, nullptr);
+
+    // For GPU platforms, increment by grid stride.
+    if (platform.is_gpu()) {
+        const auto& stride = new ast::CodegenGridStride();
+        const auto& inc_expr =
+            new ast::BinaryExpression(id, ast::BinaryOperator(ast::BOP_ADDITION), stride);
+        return std::make_shared<ast::BinaryExpression>(id->clone(),
+                                                    ast::BinaryOperator(ast::BOP_ASSIGN),
+                                                    inc_expr);
+    }
+
+    // Otherwise, proceed with increment for CPU loop.
+    const int width = is_remainder_loop ? 1 : platform.get_instruction_width();
+    const auto& inc = new ast::Integer(width, nullptr);
     const auto& inc_expr =
         new ast::BinaryExpression(id, ast::BinaryOperator(ast::BOP_ADDITION), inc);
-    // now create id = id + x
     return std::make_shared<ast::BinaryExpression>(id->clone(),
                                                    ast::BinaryOperator(ast::BOP_ASSIGN),
                                                    inc_expr);
 }
 
-/**
- * Create loop count comparison expression
- *
- * Based on if loop is vectorised or not, the condition for loop
- * is different. For example:
- *  - serial loop : `id < node_count`
- *  - vector loop : `id < (node_count - vector_width + 1)`
- *
- * \todo : same as int_initialization_expression()
- */
-static std::shared_ptr<ast::Expression> loop_count_expression(const std::string& induction_var,
-                                                              const std::string& node_count,
-                                                              int vector_width) {
+std::shared_ptr<ast::Expression>
+CodegenLLVMHelperVisitor::loop_count_expression(const std::string& induction_var,
+                                                const std::string& node_count,
+                                                bool is_remainder_loop) {
+    const int width = is_remainder_loop ? 1 : platform.get_instruction_width();
     const auto& id = create_varname(induction_var);
     const auto& mech_node_count = create_varname(node_count);
 
     // For non-vectorised loop, the condition is id < mech->node_count
-    if (vector_width == 1) {
+    if (width == 1) {
         return std::make_shared<ast::BinaryExpression>(id->clone(),
                                                        ast::BinaryOperator(ast::BOP_LESS),
                                                        mech_node_count);
     }
 
-    // For vectorised loop, the condition is id < mech->node_count - vector_width + 1
-    const auto& remainder = new ast::Integer(vector_width - 1, /*macro=*/nullptr);
+    // For vectorised loop, the condition is id < mech->node_count - width + 1
+    const auto& remainder = new ast::Integer(width - 1, /*macro=*/nullptr);
     const auto& count = new ast::BinaryExpression(mech_node_count,
                                                   ast::BinaryOperator(ast::BOP_SUBTRACTION),
                                                   remainder);
@@ -614,35 +630,29 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     /// statements for new function to be generated
     ast::StatementVector function_statements;
 
-    /// create variable definition for loop index and insert at the beginning
-    std::string loop_index_var = "id";
-    std::vector<std::string> induction_variables{"id"};
-    function_statements.push_back(
-        create_local_variable_statement(induction_variables, INTEGER_TYPE));
-
     /// create vectors of local variables that would be used in compute part
     std::vector<std::string> int_variables{"node_id"};
     std::vector<std::string> double_variables{"v"};
 
-    /// create now main compute part : for loop over channel instances
+    /// create now main compute part
 
-    /// loop body : initialization + solve blocks
-    ast::StatementVector loop_def_statements;
-    ast::StatementVector loop_index_statements;
-    ast::StatementVector loop_body_statements;
+    /// compute body : initialization + solve blocks
+    ast::StatementVector def_statements;
+    ast::StatementVector index_statements;
+    ast::StatementVector body_statements;
     {
         /// access node index and corresponding voltage
-        loop_index_statements.push_back(
+        index_statements.push_back(
             visitor::create_statement("node_id = node_index[{}]"_format(INDUCTION_VAR)));
-        loop_body_statements.push_back(
+        body_statements.push_back(
             visitor::create_statement("v = {}[node_id]"_format(VOLTAGE_VAR)));
 
         /// read ion variables
         ion_read_statements(BlockType::State,
                             int_variables,
                             double_variables,
-                            loop_index_statements,
-                            loop_body_statements);
+                            index_statements,
+                            body_statements);
 
         /// main compute node : extract solution expressions from the derivative block
         const auto& solutions = collect_nodes(node, {ast::AstNodeType::SOLUTION_EXPRESSION});
@@ -650,109 +660,39 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
             const auto& solution = std::dynamic_pointer_cast<ast::SolutionExpression>(statement);
             const auto& block = std::dynamic_pointer_cast<ast::StatementBlock>(
                 solution->get_node_to_solve());
-            append_statements_from_block(loop_body_statements, block);
+            append_statements_from_block(body_statements, block);
         }
 
         /// add breakpoint block if no current
         if (info.currents.empty() && info.breakpoint_node != nullptr) {
             auto block = info.breakpoint_node->get_statement_block();
-            append_statements_from_block(loop_body_statements, block);
+            append_statements_from_block(body_statements, block);
         }
 
         /// write ion statements
         ion_write_statements(BlockType::State,
                              int_variables,
                              double_variables,
-                             loop_index_statements,
-                             loop_body_statements);
+                             index_statements,
+                             body_statements);
 
         // \todo handle process_shadow_update_statement and wrote_conc_call yet
     }
 
-    ast::StatementVector loop_body;
-    loop_body.insert(loop_body.end(), loop_def_statements.begin(), loop_def_statements.end());
-    loop_body.insert(loop_body.end(), loop_index_statements.begin(), loop_index_statements.end());
-    loop_body.insert(loop_body.end(), loop_body_statements.begin(), loop_body_statements.end());
-
-    /// now construct a new code block which will become the body of the loop
-    auto loop_block = std::make_shared<ast::StatementBlock>(loop_body);
-
-    /// declare main FOR loop local variables
-    function_statements.push_back(create_local_variable_statement(int_variables, INTEGER_TYPE));
-    function_statements.push_back(create_local_variable_statement(double_variables, FLOAT_TYPE));
-
-    /// main loop possibly vectorized on vector_width
-    {
-        /// loop constructs : initialization, condition and increment
-        const auto& initialization = int_initialization_expression(INDUCTION_VAR);
-        const auto& condition = loop_count_expression(INDUCTION_VAR, NODECOUNT_VAR, vector_width);
-        const auto& increment = loop_increment_expression(INDUCTION_VAR, vector_width);
-
-        /// clone it
-        auto local_loop_block = std::shared_ptr<ast::StatementBlock>(loop_block->clone());
-
-        /// convert local statement to codegenvar statement
-        convert_local_statement(*local_loop_block);
-
-        auto for_loop_statement_main = std::make_shared<ast::CodegenForStatement>(initialization,
-                                                                                  condition,
-                                                                                  increment,
-                                                                                  local_loop_block);
-
-        /// convert all variables inside loop body to instance variables
-        convert_to_instance_variable(*for_loop_statement_main, loop_index_var);
-
-        /// loop itself becomes one of the statement in the function
-        function_statements.push_back(for_loop_statement_main);
-    }
-
-    /// vectors containing renamed FOR loop local variables
-    std::vector<std::string> renamed_int_variables;
-    std::vector<std::string> renamed_double_variables;
-
-    /// remainder loop possibly vectorized on vector_width
-    if (vector_width > 1) {
-        /// loop constructs : initialization, condition and increment
-        const auto& condition =
-            loop_count_expression(INDUCTION_VAR, NODECOUNT_VAR, /*vector_width=*/1);
-        const auto& increment = loop_increment_expression(INDUCTION_VAR, /*vector_width=*/1);
-
-        /// rename local variables to avoid conflict with main loop
-        rename_local_variables(*loop_block);
-
-        /// convert local statement to codegenvar statement
-        convert_local_statement(*loop_block);
-
-        auto for_loop_statement_remainder =
-            std::make_shared<ast::CodegenForStatement>(nullptr, condition, increment, loop_block);
+    /// create target-specific compute body
+    ast::StatementVector compute_body;
+    compute_body.insert(compute_body.end(), def_statements.begin(), def_statements.end());
+    compute_body.insert(compute_body.end(), index_statements.begin(), index_statements.end());
+    compute_body.insert(compute_body.end(), body_statements.begin(), body_statements.end());
 
-        const auto& loop_statements = for_loop_statement_remainder->get_statement_block();
-        // \todo: Change RenameVisitor to take a vector of names to which it would append a single
-        // prefix.
-        for (const auto& name: int_variables) {
-            std::string new_name = epilogue_variable_prefix + name;
-            renamed_int_variables.push_back(new_name);
-            visitor::RenameVisitor v(name, new_name);
-            loop_statements->accept(v);
-        }
-        for (const auto& name: double_variables) {
-            std::string new_name = epilogue_variable_prefix + name;
-            renamed_double_variables.push_back(new_name);
-            visitor::RenameVisitor v(name, epilogue_variable_prefix + name);
-            loop_statements->accept(v);
-        }
-
-        /// declare remainder FOR loop local variables
-        function_statements.push_back(
-            create_local_variable_statement(renamed_int_variables, INTEGER_TYPE));
-        function_statements.push_back(
-            create_local_variable_statement(renamed_double_variables, FLOAT_TYPE));
-
-        /// convert all variables inside loop body to instance variables
-        convert_to_instance_variable(*for_loop_statement_remainder, loop_index_var);
+    std::vector<std::string> induction_variables{INDUCTION_VAR};
+    function_statements.push_back(
+            create_local_variable_statement(induction_variables, INTEGER_TYPE));
 
-        /// loop itself becomes one of the statement in the function
-        function_statements.push_back(for_loop_statement_remainder);
+    if (platform.is_gpu()) {
+        create_gpu_compute_body(compute_body, function_statements, int_variables, double_variables);
+    } else {
+        create_cpu_compute_body(compute_body, function_statements, int_variables, double_variables);
     }
 
     /// new block for the function
@@ -779,6 +719,73 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     std::cout << nmodl::to_nmodl(function) << std::endl;
 }
 
+void CodegenLLVMHelperVisitor::create_gpu_compute_body(ast::StatementVector& body,
+                                                       ast::StatementVector& function_statements,
+                                                       std::vector<std::string>& int_variables,
+                                                       std::vector<std::string>& double_variables) {
+    auto kernel_block = std::make_shared<ast::StatementBlock>(body);
+
+    // dispatch loop creation with right parameters
+    create_compute_body_loop(kernel_block, function_statements, int_variables, double_variables);
+}
+
+void CodegenLLVMHelperVisitor::create_cpu_compute_body(ast::StatementVector& body,
+                                                       ast::StatementVector& function_statements,
+                                                       std::vector<std::string>& int_variables,
+                                                       std::vector<std::string>& double_variables) {
+    auto loop_block = std::make_shared<ast::StatementBlock>(body);
+    create_compute_body_loop(loop_block, function_statements, int_variables, double_variables);
+    if (platform.is_cpu_with_simd())
+        create_compute_body_loop(loop_block, function_statements, int_variables, double_variables, /*is_remainder_loop=*/true);
+}
+
+void CodegenLLVMHelperVisitor::create_compute_body_loop(std::shared_ptr<ast::StatementBlock>& block,
+                                                        ast::StatementVector& function_statements,
+                                                        std::vector<std::string>& int_variables,
+                                                        std::vector<std::string>& double_variables,
+                                                        bool is_remainder_loop) {
+    const auto& initialization = loop_initialization_expression(INDUCTION_VAR, is_remainder_loop);
+    const auto& condition = loop_count_expression(INDUCTION_VAR, NODECOUNT_VAR, is_remainder_loop);
+    const auto& increment = loop_increment_expression(INDUCTION_VAR, is_remainder_loop);
+
+    // Clone the statement block if needed since it can be used by the remainder loop.
+    auto loop_block = (is_remainder_loop || !platform.is_cpu_with_simd()) ? block : std::shared_ptr<ast::StatementBlock>(block->clone());
+
+    // Convert local statement to use CodegenVar statements and create a FOR loop node. Also, if creating
+    // a remainder loop then rename variables to avoid conflicts.
+    if (is_remainder_loop)
+        rename_local_variables(*loop_block);
+    convert_local_statement(*loop_block);
+    auto for_loop = std::make_shared<ast::CodegenForStatement>(initialization,
+                                                               condition,
+                                                               increment,
+                                                               loop_block);
+
+    // Convert all variables inside loop body to be instance variables.
+    convert_to_instance_variable(*for_loop, INDUCTION_VAR);
+
+    // Rename variables if processing remainder loop.
+    if (is_remainder_loop) {
+        const auto& loop_statements = for_loop->get_statement_block();
+        auto rename = [&](std::vector<std::string>& vars) {
+            for (int i = 0; i < vars.size(); ++i) {
+                std::string old_name = vars[i];
+                std::string new_name = epilogue_variable_prefix + vars[i];
+                vars[i] = new_name;
+                visitor::RenameVisitor v(old_name, new_name);
+                loop_statements->accept(v);
+            }
+        };
+        rename(int_variables);
+        rename(double_variables);
+    }
+
+    // Push variables and  the loop to the function statements vector.
+    function_statements.push_back(create_local_variable_statement(int_variables, INTEGER_TYPE));
+    function_statements.push_back(create_local_variable_statement(double_variables, FLOAT_TYPE));
+    function_statements.push_back(for_loop);
+}
+
 void CodegenLLVMHelperVisitor::remove_inlined_nodes(ast::Program& node) {
     auto program_symtab = node.get_model_symbol_table();
     const auto& func_proc_nodes =
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index 21aff4a92d..2aa7f2fe03 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -16,6 +16,7 @@
 
 #include "ast/instance_struct.hpp"
 #include "codegen/codegen_info.hpp"
+#include "codegen/llvm/target_platform.hpp"
 #include "symtab/symbol_table.hpp"
 #include "utils/logger.hpp"
 #include "visitors/ast_visitor.hpp"
@@ -101,8 +102,8 @@ struct InstanceVarHelper {
  * these will be common across all backends.
  */
 class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
-    /// explicit vectorisation width
-    int vector_width;
+    /// target platform
+    Platform platform;
 
     /// newly generated code generation specific functions
     CodegenFunctionVector codegen_functions;
@@ -135,8 +136,8 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     static const std::string VOLTAGE_VAR;
     static const std::string NODE_INDEX_VAR;
 
-    CodegenLLVMHelperVisitor(int vector_width)
-        : vector_width(vector_width) {}
+    CodegenLLVMHelperVisitor(Platform& platform)
+        : platform(platform) {}
 
     const InstanceVarHelper& get_instance_var_helper() {
         return instance_var_helper;
@@ -161,7 +162,7 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
                               ast::StatementVector& index_statements,
                               ast::StatementVector& body_statements);
 
-    void convert_to_instance_variable(ast::Node& node, std::string& index_var);
+    void convert_to_instance_variable(ast::Node& node, const std::string& index_var);
 
     void convert_local_statement(ast::StatementBlock& node);
     void rename_local_variables(ast::StatementBlock& node);
@@ -173,6 +174,32 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     void visit_function_block(ast::FunctionBlock& node) override;
     void visit_nrn_state_block(ast::NrnStateBlock& node) override;
     void visit_program(ast::Program& node) override;
+
+  private:
+    /// Methods to create target-specific loop constructs.
+    std::shared_ptr<ast::Expression> loop_initialization_expression(const std::string& induction_var,
+                                                                    bool is_remainder_loop);
+    std::shared_ptr<ast::Expression> loop_count_expression(const std::string& induction_var,
+                                                           const std::string& node_count,
+                                                           bool is_remainder_loop);
+    std::shared_ptr<ast::Expression> loop_increment_expression(const std::string& induction_var,
+                                                               bool is_remainder_loop);
+
+    /// Methods to populate`function_statements` with necessary AST constructs to form
+    /// a kernel for a specific target.
+    void create_gpu_compute_body(ast::StatementVector& body,
+                                 ast::StatementVector& function_statements,
+                                 std::vector<std::string>& int_variables,
+                                 std::vector<std::string>& double_variables);
+    void create_cpu_compute_body(ast::StatementVector& body,
+                                 ast::StatementVector& function_statements,
+                                 std::vector<std::string>& int_variables,
+                                 std::vector<std::string>& double_variables);
+    void create_compute_body_loop(std::shared_ptr<ast::StatementBlock>& block,
+                                  ast::StatementVector& function_statements,
+                                  std::vector<std::string>& int_variables,
+                                  std::vector<std::string>& double_variables,
+                                  bool is_remainder_loop = false);
 };
 
 /** @} */  // end of llvm_codegen_details
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 0fa81de691..2f677cfbec 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -815,12 +815,18 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     //   - convert function and procedure blocks into CodegenFunctions
     //   - gather information about AST. For now, information about functions
     //     and procedures is used only.
-    CodegenLLVMHelperVisitor v{platform.get_instruction_width()};
+    CodegenLLVMHelperVisitor v{platform};
     const auto& functions = v.get_codegen_functions(node);
     instance_var_helper = v.get_instance_var_helper();
     sym_tab = node.get_symbol_table();
     std::string kernel_id = v.get_kernel_id();
 
+    // \todo: implement GPU codegen functionality.
+    if (platform.is_gpu()) {
+      logger->warn("GPU code generation is not supported yet, aborting!");
+      return;
+    }
+
     // Initialize the builder for this NMODL program.
     ir_builder.initialize(*sym_tab, kernel_id);
 
diff --git a/src/language/code_generator.cmake b/src/language/code_generator.cmake
index 1667437217..c1aaf733e6 100644
--- a/src/language/code_generator.cmake
+++ b/src/language/code_generator.cmake
@@ -68,9 +68,11 @@ set(AST_GENERATED_SOURCES
     ${PROJECT_BINARY_DIR}/src/ast/codegen_atomic_statement.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_for_statement.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_function.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/codegen_grid_stride.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_instance_var.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_return_statement.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_struct.hpp
+    ${PROJECT_BINARY_DIR}/src/ast/codegen_thread_id.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_var.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_var_list_statement.hpp
     ${PROJECT_BINARY_DIR}/src/ast/codegen_var_type.hpp
diff --git a/src/language/codegen.yaml b/src/language/codegen.yaml
index 01593035ac..02adca49f4 100644
--- a/src/language/codegen.yaml
+++ b/src/language/codegen.yaml
@@ -199,6 +199,27 @@
                             brief: "member functions of the class/struct"
                             type: CodegenFunction
                             vector: true
+                  - CodegenThreadId:
+                      brief: "Represents thread id expression for GPU code generation"
+                      description: |
+                        For GPU code generation, we use a special AST node to enocde the initial
+                        thread id calculation. In NMODL, this expression is usually of the form:
+                        \code{.cpp}
+                            tid = blockId.x * blockDim.x + threadId.x
+                        \endcode
+                        To be able to support multiple GPU backends, we choose to have a custom AST
+                        node. Therefore, the code generation for this node is kept very simple,
+                        mapping expression to target-specific GPU inrinsics.
+                      nmodl: "THREAD_ID"
+                  - CodegenGridStride:
+                      brief: "Represents grid stride for GPU code generation"
+                      description: |
+                        For GPU code generation, we use a special AST node to enocde the loop
+                        increment expression. In NMODL, this expression is usually of the form:
+                        \code{.cpp}
+                            for (int i = tid; i < n; i += blockDim.x * gridDim.x)
+                        \endcode
+                      nmodl: "GRID_STRIDE"
             - Statement:
                 brief: "Statement base class"
                 children:
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index d2e94acc70..bf18760349 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -68,14 +68,14 @@ std::string run_llvm_visitor(const std::string& text,
 
 std::vector<std::shared_ptr<ast::Ast>> run_llvm_visitor_helper(
     const std::string& text,
-    int vector_width,
+    codegen::Platform& platform,
     const std::vector<ast::AstNodeType>& nodes_to_collect) {
     NmodlDriver driver;
     const auto& ast = driver.parse_string(text);
 
     SymtabVisitor().visit_program(*ast);
     SolveBlockVisitor().visit_program(*ast);
-    CodegenLLVMHelperVisitor(vector_width).visit_program(*ast);
+    CodegenLLVMHelperVisitor(platform).visit_program(*ast);
 
     const auto& nodes = collect_nodes(*ast, nodes_to_collect);
 
@@ -1228,8 +1228,9 @@ SCENARIO("Scalar derivative block", "[visitor][llvm][derivative]") {
             })";
 
         THEN("a single scalar loops is constructed") {
+            codegen::Platform default_platform;
             auto result = run_llvm_visitor_helper(nmodl_text,
-                                                  /*vector_width=*/1,
+                                                  default_platform,
                                                   {ast::AstNodeType::CODEGEN_FOR_STATEMENT});
             REQUIRE(result.size() == 1);
 
@@ -1279,8 +1280,9 @@ SCENARIO("Vectorised derivative block", "[visitor][llvm][derivative]") {
 
 
         THEN("vector and epilogue scalar loops are constructed") {
+            codegen::Platform simd_platform(/*use_single_precision=*/false, /*instruction_width=*/8);
             auto result = run_llvm_visitor_helper(nmodl_text,
-                                                  /*vector_width=*/8,
+                                                  simd_platform,
                                                   {ast::AstNodeType::CODEGEN_FOR_STATEMENT});
             REQUIRE(result.size() == 2);
 
@@ -1523,3 +1525,51 @@ SCENARIO("Removal of inlined functions and procedures", "[visitor][llvm][inline]
         }
     }
 }
+
+//=============================================================================
+// Basic GPU kernel AST generation
+//=============================================================================
+
+SCENARIO("GPU kernel body", "[visitor][llvm][gpu]") {
+    GIVEN("For GPU platforms") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+                RANGE x, y
+            }
+
+            ASSIGNED { x y }
+
+            STATE { m }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+            }
+
+            DERIVATIVE states {
+              m = y + 2
+            }
+        )";
+
+
+        std::string expected_loop = R"(
+            for(id = THREAD_ID; id<mech->node_count; id = id+GRID_STRIDE) {
+                node_id = mech->node_index[id]
+                v = mech->voltage[node_id]
+                mech->m[id] = mech->y[id]+2
+            })";
+
+        THEN("a loop with GPU-specific AST nodes is constructed") {
+            std::string name = "default";
+            std::string math_library = "none";
+            codegen::Platform gpu_platform(codegen::PlatformID::GPU, name, math_library);
+            auto result = run_llvm_visitor_helper(nmodl_text,
+                                                  gpu_platform,
+                                                  {ast::AstNodeType::CODEGEN_FOR_STATEMENT});
+            REQUIRE(result.size() == 1);
+
+            auto loop = reindent_text(to_nmodl(result[0]));
+            REQUIRE(loop == reindent_text(expected_loop));
+        }
+    }
+}

From 2e5e149345ddbf9089d5e43919d4fbf5a107e167 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 22 Mar 2022 14:38:14 +0100
Subject: [PATCH 079/105] [LLVM][GPU] Basic code generation for NVPTX backend
 (#820)

* Kernel annotations are now generated when targeting GPU platforms
* Lowering of `CodegenThreadId` and `CodegenGridStride` was implemented using NVVM intrinsics to get thread/block id/dimensions and grid stride
* Adapted code generation for GPU expressions
* Added tests for annotations/intrinsics
* GPU code generation is now enabled by this PR

Co-authored-by: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Co-authored-by: Pramod S Kumbhar <pramod.s.kumbhar@gmail.com>
---
 src/codegen/llvm/codegen_llvm_visitor.cpp | 42 +++++++++----
 src/codegen/llvm/codegen_llvm_visitor.hpp |  5 ++
 src/codegen/llvm/llvm_ir_builder.cpp      | 34 ++++++++++
 src/codegen/llvm/llvm_ir_builder.hpp      |  6 ++
 test/unit/codegen/codegen_llvm_ir.cpp     | 76 +++++++++++++++++++++++
 5 files changed, 152 insertions(+), 11 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 2f677cfbec..25fb173842 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -64,6 +64,16 @@ static bool can_vectorize(const ast::CodegenForStatement& statement, symtab::Sym
     return unsupported.empty() && supported.size() <= 1;
 }
 
+void CodegenLLVMVisitor::annotate_kernel_with_nvvm(llvm::Function* kernel) {
+    llvm::Metadata* metadata[] = {
+        llvm::ValueAsMetadata::get(kernel),
+        llvm::MDString::get(*context, "kernel"),
+        llvm::ValueAsMetadata::get(
+            llvm::ConstantInt::get(llvm::Type::getInt32Ty(*context), 1))};
+    llvm::MDNode* node = llvm::MDNode::get(*context, metadata);
+    module->getOrInsertNamedMetadata("nvvm.annotations")->addOperand(node);
+}
+
 #if LLVM_VERSION_MAJOR >= 13
 void CodegenLLVMVisitor::add_vectorizable_functions_from_vec_lib(llvm::TargetLibraryInfoImpl& tli,
                                                                  llvm::Triple& triple) {
@@ -665,11 +675,19 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
     ir_builder.allocate_function_arguments(func, arguments);
 
     // Process function or procedure body. If the function is a compute kernel, enable
-    // vectorization. If so, the return statement is handled in a separate visitor.
-    if (platform.is_cpu_with_simd() && is_kernel_function(name)) {
-        ir_builder.generate_vector_ir();
-        block->accept(*this);
-        ir_builder.generate_scalar_ir();
+    // vectorization or add NVVM annotations. If this is the case, the return statement is
+    // handled in a separate visitor.
+    if (is_kernel_function(name)) {
+        if (platform.is_cpu_with_simd()) {
+            ir_builder.generate_vector_ir();
+            block->accept(*this);
+            ir_builder.generate_scalar_ir();
+        } else if (platform.is_gpu()) {
+            block->accept(*this);
+            annotate_kernel_with_nvvm(func);
+        } else { // scalar
+            block->accept(*this);
+        }
     } else {
         block->accept(*this);
     }
@@ -685,6 +703,10 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
     ir_builder.clear_function();
 }
 
+void CodegenLLVMVisitor::visit_codegen_grid_stride(const ast::CodegenGridStride& node) {
+    ir_builder.create_grid_stride();
+}
+
 void CodegenLLVMVisitor::visit_codegen_return_statement(const ast::CodegenReturnStatement& node) {
     if (!node.get_statement()->is_name())
         throw std::runtime_error("Error: CodegenReturnStatement must contain a name node\n");
@@ -694,6 +716,10 @@ void CodegenLLVMVisitor::visit_codegen_return_statement(const ast::CodegenReturn
     ir_builder.create_return(ret_value);
 }
 
+void CodegenLLVMVisitor::visit_codegen_thread_id(const ast::CodegenThreadId& node) {
+    ir_builder.create_thread_id();
+}
+
 void CodegenLLVMVisitor::visit_codegen_var_list_statement(
     const ast::CodegenVarListStatement& node) {
     llvm::Type* scalar_type = get_codegen_var_type(*node.get_var_type());
@@ -821,12 +847,6 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     sym_tab = node.get_symbol_table();
     std::string kernel_id = v.get_kernel_id();
 
-    // \todo: implement GPU codegen functionality.
-    if (platform.is_gpu()) {
-      logger->warn("GPU code generation is not supported yet, aborting!");
-      return;
-    }
-
     // Initialize the builder for this NMODL program.
     ir_builder.initialize(*sym_tab, kernel_id);
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 396d8cbb67..27150ff296 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -137,7 +137,9 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void visit_codegen_atomic_statement(const ast::CodegenAtomicStatement& node) override;
     void visit_codegen_for_statement(const ast::CodegenForStatement& node) override;
     void visit_codegen_function(const ast::CodegenFunction& node) override;
+    void visit_codegen_grid_stride(const ast::CodegenGridStride& node) override;
     void visit_codegen_return_statement(const ast::CodegenReturnStatement& node) override;
+    void visit_codegen_thread_id(const ast::CodegenThreadId& node) override;
     void visit_codegen_var_list_statement(const ast::CodegenVarListStatement& node) override;
     void visit_double(const ast::Double& node) override;
     void visit_function_block(const ast::FunctionBlock& node) override;
@@ -156,6 +158,9 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void wrap_kernel_functions();
 
   private:
+    // Annotates kernel function with NVVM metadata.
+    void annotate_kernel_with_nvvm(llvm::Function* kernel);
+
 #if LLVM_VERSION_MAJOR >= 13
     /// Populates target library info with the vector library definitions.
     void add_vectorizable_functions_from_vec_lib(llvm::TargetLibraryInfoImpl& tli,
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index e7a6a4a60b..c851f02970 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -10,6 +10,7 @@
 
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
 #include "llvm/IR/ValueSymbolTable.h"
 
 namespace nmodl {
@@ -554,6 +555,39 @@ void IRBuilder::maybe_replicate_value(llvm::Value* value) {
     }
 }
 
+void IRBuilder::create_grid_stride() {
+    llvm::Module* m = builder.GetInsertBlock()->getParent()->getParent();
+    auto create_call = [&](llvm::Intrinsic::ID id) {
+      llvm::Function* intrinsic = llvm::Intrinsic::getDeclaration(m, id);
+      return builder.CreateCall(intrinsic, {});
+    };
+
+    llvm::Value* block_dim = create_call(llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x);
+    llvm::Value* grid_dim = create_call(llvm::Intrinsic::nvvm_read_ptx_sreg_nctaid_x);
+    llvm::Value* stride = builder.CreateMul(block_dim, grid_dim);
+
+    value_stack.push_back(stride);
+}
+
+void IRBuilder::create_thread_id() {
+    llvm::Module* m = builder.GetInsertBlock()->getParent()->getParent();
+    auto create_call = [&](llvm::Intrinsic::ID id) {
+      llvm::Function* intrinsic = llvm::Intrinsic::getDeclaration(m, id);
+      return builder.CreateCall(intrinsic, {});
+    };
+
+    // For now, this function only supports NVPTX backend, however it can be easily
+    // adjusted to generate thread id variable for any other platform.
+    llvm::Value* block_id = create_call(llvm::Intrinsic::nvvm_read_ptx_sreg_ctaid_x);
+    llvm::Value* block_dim = create_call(llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x);
+    llvm::Value* tmp = builder.CreateMul(block_id, block_dim);
+
+    llvm::Value* tid = create_call(llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x);
+    llvm::Value* id = builder.CreateAdd(tmp, tid);
+
+    value_stack.push_back(id);
+}
+
 
 /****************************************************************************************/
 /*                                 LLVM block utilities                                 */
diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp
index cf9e7f936d..1b144afcfd 100644
--- a/src/codegen/llvm/llvm_ir_builder.hpp
+++ b/src/codegen/llvm/llvm_ir_builder.hpp
@@ -230,6 +230,12 @@ class IRBuilder {
     void create_scalar_or_vector_alloca(const std::string& name,
                                         llvm::Type* element_or_scalar_type);
 
+    /// Creates an expression of the form: blockDim.x * gridDim.x
+    void create_grid_stride();
+
+    /// Creates an expression of the form: blockIdx.x * blockDim.x + threadIdx.x
+    void create_thread_id();
+
     /// Generates LLVM IR for the given unary operator.
     void create_unary_op(llvm::Value* value, ast::UnaryOp op);
 
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index bf18760349..b010bdf61c 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -34,6 +34,32 @@ using nmodl::parser::NmodlDriver;
 // Utility to get LLVM module as a string
 //=============================================================================
 
+std::string run_gpu_llvm_visitor(const std::string& text,
+                                 int opt_level = 0,
+                                 bool use_single_precision = false,
+                                 std::string math_library = "none",
+                                 bool nmodl_inline = false) {
+    NmodlDriver driver;
+    const auto& ast = driver.parse_string(text);
+
+    SymtabVisitor().visit_program(*ast);
+    if (nmodl_inline) {
+        InlineVisitor().visit_program(*ast);
+    }
+    NeuronSolveVisitor().visit_program(*ast);
+    SolveBlockVisitor().visit_program(*ast);
+
+    codegen::Platform gpu_platform(codegen::PlatformID::GPU, /*name=*/"nvidia",
+                                   math_library, use_single_precision, 1);
+    codegen::CodegenLLVMVisitor llvm_visitor(
+        /*mod_filename=*/"unknown",
+        /*output_dir=*/".", gpu_platform, opt_level,
+        /*add_debug_information=*/false);
+
+    llvm_visitor.visit_program(*ast);
+    return llvm_visitor.dump_module();
+}
+
 std::string run_llvm_visitor(const std::string& text,
                              int opt_level = 0,
                              bool use_single_precision = false,
@@ -1573,3 +1599,53 @@ SCENARIO("GPU kernel body", "[visitor][llvm][gpu]") {
         }
     }
 }
+
+//=============================================================================
+// Basic NVVM/LLVM IR generation for GPU platforms
+//=============================================================================
+
+SCENARIO("GPU kernel body IR generation", "[visitor][llvm][gpu]") {
+    GIVEN("For GPU platforms") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+                RANGE x, y
+            }
+
+            ASSIGNED { x y }
+
+            STATE { m }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+            }
+
+            DERIVATIVE states {
+              m = y + 2
+            }
+        )";
+
+        THEN("kernel annotations are added and thread id intrinsics generated") {
+            std::string module_string = run_gpu_llvm_visitor(nmodl_text,
+                                                             /*opt_level=*/0,
+                                                             /*use_single_precision=*/false);
+            std::smatch m;
+
+            // Check kernel annotations are correclty created.
+            std::regex annotations(R"(!nvvm\.annotations = !\{!0\})");
+            std::regex kernel_data(R"(!0 = !\{void \(%.*__instance_var__type\*\)\* @nrn_state_.*, !\"kernel\", i32 1\})");
+            REQUIRE(std::regex_search(module_string, m, annotations));
+            REQUIRE(std::regex_search(module_string, m, kernel_data));
+
+            // Check thread/block id/dim instrinsics are created.
+            std::regex block_id(R"(call i32 @llvm\.nvvm\.read\.ptx\.sreg\.ctaid\.x\(\))");
+            std::regex block_dim(R"(call i32 @llvm\.nvvm\.read\.ptx\.sreg\.ntid\.x\(\))");
+            std::regex tid(R"(call i32 @llvm\.nvvm\.read\.ptx\.sreg\.tid\.x\(\))");
+            std::regex grid_dim(R"(call i32 @llvm\.nvvm\.read\.ptx\.sreg\.nctaid\.x\(\))");
+            REQUIRE(std::regex_search(module_string, m, block_id));
+            REQUIRE(std::regex_search(module_string, m, block_dim));
+            REQUIRE(std::regex_search(module_string, m, tid));
+            REQUIRE(std::regex_search(module_string, m, grid_dim));
+        }
+    }
+}

From 6b9df33eee98795e695c8ca23ce40eecd1cffceb Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Wed, 23 Mar 2022 18:37:41 +0100
Subject: [PATCH 080/105] Print kernel wrappers and nrn_init based on Instance
 Struct (#551)

* Setup all the parameters of `<mod>__instance_var__type` in `setup_instance`
* Generate wrapper functions for `nrn_cur_<mod>`, `nrn_init_<mod>` and `nrn_state_<mod>`
* Print LLVM IR code to `<mod_filename>.ll`
* Only assign the correct pointer to the index variables
* Added unit test and integration tests for oacc and ispc backend

Co-authored-by: Pramod Kumbhar <pramod.s.kumbhar@gmail.com>
---
 src/codegen/codegen_c_visitor.cpp             |  42 +++-
 src/codegen/codegen_c_visitor.hpp             |  50 +++-
 src/codegen/codegen_info.cpp                  |  16 +-
 src/codegen/codegen_info.hpp                  |   2 +-
 src/codegen/codegen_ispc_visitor.hpp          |   2 +-
 src/codegen/codegen_naming.hpp                |   6 +
 .../llvm/codegen_llvm_helper_visitor.cpp      |  18 +-
 .../llvm/codegen_llvm_helper_visitor.hpp      |   8 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 228 ++++++++++++++++-
 src/codegen/llvm/codegen_llvm_visitor.hpp     | 156 +++++++++++-
 src/printer/code_printer.hpp                  |   2 +
 test/integration/CMakeLists.txt               |   3 +
 test/unit/CMakeLists.txt                      |  11 +-
 test/unit/codegen/codegen_llvm_visitor.cpp    | 231 ++++++++++++++++++
 14 files changed, 719 insertions(+), 56 deletions(-)
 create mode 100644 test/unit/codegen/codegen_llvm_visitor.cpp

diff --git a/src/codegen/codegen_c_visitor.cpp b/src/codegen/codegen_c_visitor.cpp
index af464f3957..3ae0772752 100644
--- a/src/codegen/codegen_c_visitor.cpp
+++ b/src/codegen/codegen_c_visitor.cpp
@@ -709,6 +709,22 @@ bool CodegenCVisitor::is_constant_variable(const std::string& name) const {
             is_constant = true;
         }
     }
+    // Check whether the variable exists in the codegen_int_variables of the CodegenInfo struct
+    // which hold information whether the variables are const or not
+    const auto& int_variable_it = std::find_if(info.codegen_int_variables.begin(),
+                                               info.codegen_int_variables.end(),
+                                               [&name](const IndexVariableInfo& var) {
+                                                   return var.symbol->get_name() == name;
+                                               });
+    const auto& const_variable_it = std::find_if(info.constant_variables.begin(),
+                                                 info.constant_variables.end(),
+                                                 [&name](const IndexVariableInfo& var) {
+                                                     return var.symbol->get_name() == name;
+                                                 });
+    is_constant = is_constant ||
+                  (int_variable_it != info.codegen_int_variables.end() &&
+                   int_variable_it->is_constant) ||
+                  const_variable_it != info.constant_variables.end();
     return is_constant;
 }
 
@@ -802,6 +818,9 @@ std::string CodegenCVisitor::get_parameter_str(const ParamVector& params) {
     return param;
 }
 
+void CodegenCVisitor::print_backend_compute_routine_decl() {
+    // backend specific, do nothing
+}
 
 void CodegenCVisitor::print_channel_iteration_tiling_block_begin(BlockType /* type */) {
     // no tiling for cpu backend, just get loop bounds
@@ -897,13 +916,19 @@ bool CodegenCVisitor::nrn_cur_reduction_loop_required() {
 }
 
 
+void CodegenCVisitor::print_channel_iteration_loop(const std::string& start = "start",
+                                                   const std::string& end = "end") {
+    printer->start_block("for (int id = {}; id < {}; id++)"_format(start, end));
+}
+
+
 /**
  * \details For CPU backend we iterate over all node counts. For cuda we use thread
  * index to check if block needs to be executed or not.
  */
 void CodegenCVisitor::print_channel_iteration_block_begin(BlockType type) {
     print_channel_iteration_block_parallel_hint(type);
-    printer->start_block("for (int id = start; id < end; id++)");
+    print_channel_iteration_loop();
 }
 
 
@@ -954,7 +979,7 @@ void CodegenCVisitor::print_atomic_reduction_pragma() {
 
 
 void CodegenCVisitor::print_shadow_reduction_block_begin() {
-    printer->start_block("for (int id = start; id < end; id++)");
+    print_channel_iteration_loop();
 }
 
 
@@ -4374,11 +4399,13 @@ void CodegenCVisitor::print_g_unused() const {
 void CodegenCVisitor::print_compute_functions() {
     print_top_verbatim_blocks();
     print_function_prototypes();
-    for (const auto& procedure: info.procedures) {
-        print_procedure(*procedure);
-    }
-    for (const auto& function: info.functions) {
-        print_function(*function);
+    if (print_procedures_and_functions) {
+        for (const auto& procedure: info.procedures) {
+            print_procedure(*procedure);
+        }
+        for (const auto& function: info.functions) {
+            print_function(*function);
+        }
     }
     for (size_t i = 0; i < info.before_after_blocks.size(); i++) {
         print_before_after_block(info.before_after_blocks[i], i);
@@ -4387,6 +4414,7 @@ void CodegenCVisitor::print_compute_functions() {
         auto block = callback->get_node_to_solve().get();
         print_derivimplicit_kernel(block);
     }
+    print_backend_compute_routine_decl();
     print_net_send_buffering();
     print_net_init();
     print_watch_activate();
diff --git a/src/codegen/codegen_c_visitor.hpp b/src/codegen/codegen_c_visitor.hpp
index 0d08da907f..b945554ba6 100644
--- a/src/codegen/codegen_c_visitor.hpp
+++ b/src/codegen/codegen_c_visitor.hpp
@@ -164,6 +164,11 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
      */
     int current_watch_statement = 0;
 
+    /**
+     * Bool to select whether procedures and functions should be printed in the generated file
+     */
+    bool print_procedures_and_functions = true;
+
     /**
      * Data type of floating point variables
      */
@@ -261,6 +266,10 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
         return codegen::naming::DEFAULT_INTEGER_TYPE;
     }
 
+    /**
+     * Instance Struct type name suffix
+     */
+    std::string instance_struct_type_suffix = "Instance";
 
     /**
      * Checks if given function name is \c net_send
@@ -294,7 +303,7 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
      * Name of structure that wraps range variables
      */
     std::string instance_struct() const {
-        return fmt::format("{}_Instance", info.mod_suffix);
+        return fmt::format("{}_{}", info.mod_suffix, instance_struct_type_suffix);
     }
 
 
@@ -1083,6 +1092,35 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
     void print_net_event_call(const ast::FunctionCall& node);
 
 
+    /**
+     * Print the for loop statement going through all the mechanism instances
+     */
+    void print_channel_iteration_loop(const std::string& start, const std::string& end);
+
+
+    /**
+     * Print backend compute routines declaration for various backends
+     */
+    virtual void print_backend_compute_routine_decl();
+
+
+    /**
+     * Print channel iterations from which tasks are created
+     *
+     * \note This is not used for the C backend
+     * \param type
+     */
+    virtual void print_channel_iteration_task_begin(BlockType type);
+
+
+    /**
+     * Print end of channel iteration for task
+     *
+     * \note This is not used for the C backend
+     */
+    virtual void print_channel_iteration_task_end();
+
+
     /**
      * Print block start for tiling on channel iteration
      */
@@ -1612,19 +1650,19 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
      * \param skip_init_check \c true if we want the generated code to execute the initialization
      *                        conditionally
      */
-    void print_nrn_init(bool skip_init_check = true);
+    virtual void print_nrn_init(bool skip_init_check = true);
 
 
     /**
      * Print nrn_state / state update function definition
      */
-    void print_nrn_state();
+    virtual void print_nrn_state();
 
 
     /**
      * Print nrn_cur / current update function definition
      */
-    void print_nrn_cur();
+    virtual void print_nrn_cur();
 
     /**
      * Print fast membrane current calculation code
@@ -1716,12 +1754,12 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
      * @param print_initialisers Whether or not default values for variables
      *                           be included in the struct declaration.
      */
-    void print_mechanism_range_var_structure(bool print_initialisers);
+    virtual void print_mechanism_range_var_structure();
 
     /**
      * Print the function that initialize instance structure
      */
-    void print_instance_variable_setup();
+    virtual void print_instance_variable_setup();
 
     void visit_binary_expression(const ast::BinaryExpression& node) override;
     void visit_binary_operator(const ast::BinaryOperator& node) override;
diff --git a/src/codegen/codegen_info.cpp b/src/codegen/codegen_info.cpp
index 5538519565..61b6869db3 100644
--- a/src/codegen/codegen_info.cpp
+++ b/src/codegen/codegen_info.cpp
@@ -226,7 +226,7 @@ void CodegenInfo::get_int_variables() {
                                                         // not have doubles between read/write. Same
                                                         // name variables are allowed
         for (const auto& var: ion.reads) {
-            const std::string name = "ion_" + var;
+            const std::string name = naming::ION_VARNAME_PREFIX + var;
             codegen_int_variables.emplace_back(make_symbol(name));
             codegen_int_variables.back().is_constant = true;
             ion_vars[name] = codegen_int_variables.size() - 1;
@@ -236,16 +236,17 @@ void CodegenInfo::get_int_variables() {
         std::shared_ptr<symtab::Symbol> ion_di_dv_var = nullptr;
 
         for (const auto& var: ion.writes) {
-            const std::string name = "ion_" + var;
+            const std::string name = naming::ION_VARNAME_PREFIX + var;
 
             const auto ion_vars_it = ion_vars.find(name);
             if (ion_vars_it != ion_vars.end()) {
                 codegen_int_variables[ion_vars_it->second].is_constant = false;
             } else {
-                codegen_int_variables.emplace_back(make_symbol("ion_" + var));
+                codegen_int_variables.emplace_back(make_symbol(naming::ION_VARNAME_PREFIX + var));
             }
             if (ion.is_ionic_current(var)) {
-                ion_di_dv_var = make_symbol("ion_di" + ion.name + "dv");
+                ion_di_dv_var = make_symbol(std::string(naming::ION_VARNAME_PREFIX) + "di" +
+                                            ion.name + "dv");
             }
             if (ion.is_intra_cell_conc(var) || ion.is_extra_cell_conc(var)) {
                 need_style = true;
@@ -322,10 +323,11 @@ void CodegenInfo::get_int_variables() {
 void CodegenInfo::get_shadow_variables() {
     for (const auto& ion: ions) {
         for (const auto& var: ion.writes) {
-            codegen_shadow_variables.push_back({make_symbol(shadow_varname("ion_" + var))});
+            codegen_shadow_variables.push_back(
+                {make_symbol(shadow_varname(naming::ION_VARNAME_PREFIX + var))});
             if (ion.is_ionic_current(var)) {
-                codegen_shadow_variables.push_back(
-                    {make_symbol(shadow_varname("ion_di" + ion.name + "dv"))});
+                codegen_shadow_variables.push_back({make_symbol(shadow_varname(
+                    std::string(naming::ION_VARNAME_PREFIX) + "di" + ion.name + "dv"))});
             }
         }
     }
diff --git a/src/codegen/codegen_info.hpp b/src/codegen/codegen_info.hpp
index 75923b61a3..4d9375f0c0 100644
--- a/src/codegen/codegen_info.hpp
+++ b/src/codegen/codegen_info.hpp
@@ -568,7 +568,7 @@ struct CodegenInfo {
         if (artificial_cell) {
             return false;
         }
-        return nrn_state_block != nullptr || currents.empty();
+        return nrn_state_block != nullptr || breakpoint_exist();
     }
 
     /**
diff --git a/src/codegen/codegen_ispc_visitor.hpp b/src/codegen/codegen_ispc_visitor.hpp
index f97d0085c6..4e35819962 100644
--- a/src/codegen/codegen_ispc_visitor.hpp
+++ b/src/codegen/codegen_ispc_visitor.hpp
@@ -168,7 +168,7 @@ class CodegenIspcVisitor: public CodegenCVisitor {
     void print_procedure(const ast::ProcedureBlock& node) override;
 
 
-    void print_backend_compute_routine_decl();
+    void print_backend_compute_routine_decl() override;
 
 
     /// print wrapper function that calls ispc kernel
diff --git a/src/codegen/codegen_naming.hpp b/src/codegen/codegen_naming.hpp
index 98d1003734..9ee2425e3b 100644
--- a/src/codegen/codegen_naming.hpp
+++ b/src/codegen/codegen_naming.hpp
@@ -167,6 +167,12 @@ static constexpr char THREAD_ARGS_PROTO[] = "_threadargsproto_";
 /// prefix for ion variable
 static constexpr char ION_VARNAME_PREFIX[] = "ion_";
 
+/// name of the mechanism instance parameter in LLVM IR
+static constexpr char MECH_INSTANCE_VAR[] = "mech";
+static constexpr char MECH_NODECOUNT_VAR[] = "node_count";
+
+/// name of induction variable used in the kernel.
+static constexpr char INDUCTION_VAR[] = "id";
 
 /// commonly used variables in verbatim block and how they
 /// should be mapped to new code generation backends
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index a06f59812b..a23b24fa5b 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -244,7 +244,7 @@ std::shared_ptr<ast::InstanceStruct> CodegenLLVMHelperVisitor::create_instance_s
     add_var_with_type(naming::NTHREAD_DT_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
     add_var_with_type(naming::CELSIUS_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
     add_var_with_type(naming::SECOND_ORDER_VARIABLE, INTEGER_TYPE, /*is_pointer=*/0);
-    add_var_with_type(NODECOUNT_VAR, INTEGER_TYPE, /*is_pointer=*/0);
+    add_var_with_type(naming::MECH_NODECOUNT_VAR, INTEGER_TYPE, /*is_pointer=*/0);
 
     return std::make_shared<ast::InstanceStruct>(codegen_vars);
 }
@@ -464,7 +464,7 @@ void CodegenLLVMHelperVisitor::convert_to_instance_variable(ast::Node& node,
         /// instance_var_helper check of instance variables from mod file as well
         /// as extra variables like ion index variables added for code generation
         if (instance_var_helper.is_an_instance_variable(variable_name)) {
-            auto name = new ast::Name(new ast::String(MECH_INSTANCE_VAR));
+            auto name = new ast::Name(new ast::String(naming::MECH_INSTANCE_VAR));
             auto var = std::make_shared<ast::CodegenInstanceVar>(name, variable->clone());
             variable->set_name(var);
         }
@@ -643,7 +643,7 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     {
         /// access node index and corresponding voltage
         index_statements.push_back(
-            visitor::create_statement("node_id = node_index[{}]"_format(INDUCTION_VAR)));
+            visitor::create_statement("node_id = node_index[{}]"_format(naming::INDUCTION_VAR)));
         body_statements.push_back(
             visitor::create_statement("v = {}[node_id]"_format(VOLTAGE_VAR)));
 
@@ -685,7 +685,7 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     compute_body.insert(compute_body.end(), index_statements.begin(), index_statements.end());
     compute_body.insert(compute_body.end(), body_statements.begin(), body_statements.end());
 
-    std::vector<std::string> induction_variables{INDUCTION_VAR};
+    std::vector<std::string> induction_variables{naming::INDUCTION_VAR};
     function_statements.push_back(
             create_local_variable_statement(induction_variables, INTEGER_TYPE));
 
@@ -707,7 +707,7 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     ast::CodegenVarWithTypeVector code_arguments;
 
     auto instance_var_type = new ast::CodegenVarType(ast::AstNodeType::INSTANCE_STRUCT);
-    auto instance_var_name = new ast::Name(new ast::String(MECH_INSTANCE_VAR));
+    auto instance_var_name = new ast::Name(new ast::String(naming::MECH_INSTANCE_VAR));
     auto instance_var = new ast::CodegenVarWithType(instance_var_type, 1, instance_var_name);
     code_arguments.emplace_back(instance_var);
 
@@ -744,9 +744,9 @@ void CodegenLLVMHelperVisitor::create_compute_body_loop(std::shared_ptr<ast::Sta
                                                         std::vector<std::string>& int_variables,
                                                         std::vector<std::string>& double_variables,
                                                         bool is_remainder_loop) {
-    const auto& initialization = loop_initialization_expression(INDUCTION_VAR, is_remainder_loop);
-    const auto& condition = loop_count_expression(INDUCTION_VAR, NODECOUNT_VAR, is_remainder_loop);
-    const auto& increment = loop_increment_expression(INDUCTION_VAR, is_remainder_loop);
+    const auto& initialization = loop_initialization_expression(naming::INDUCTION_VAR, is_remainder_loop);
+    const auto& condition = loop_count_expression(naming::INDUCTION_VAR, NODECOUNT_VAR, is_remainder_loop);
+    const auto& increment = loop_increment_expression(naming::INDUCTION_VAR, is_remainder_loop);
 
     // Clone the statement block if needed since it can be used by the remainder loop.
     auto loop_block = (is_remainder_loop || !platform.is_cpu_with_simd()) ? block : std::shared_ptr<ast::StatementBlock>(block->clone());
@@ -762,7 +762,7 @@ void CodegenLLVMHelperVisitor::create_compute_body_loop(std::shared_ptr<ast::Sta
                                                                loop_block);
 
     // Convert all variables inside loop body to be instance variables.
-    convert_to_instance_variable(*for_loop, INDUCTION_VAR);
+    convert_to_instance_variable(*for_loop, naming::INDUCTION_VAR);
 
     // Rename variables if processing remainder loop.
     if (is_remainder_loop) {
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index 2aa7f2fe03..c2eb415cb2 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -114,12 +114,6 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     /// mechanism data helper
     InstanceVarHelper instance_var_helper;
 
-    /// name of the mechanism instance parameter
-    const std::string MECH_INSTANCE_VAR = "mech";
-
-    /// name of induction variable used in the kernel.
-    const std::string INDUCTION_VAR = "id";
-
     /// create new function for FUNCTION or PROCEDURE block
     void create_function_for_node(ast::Block& node);
 
@@ -144,7 +138,7 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     }
 
     std::string get_kernel_id() {
-        return INDUCTION_VAR;
+        return naming::INDUCTION_VAR;
     }
 
     /// run visitor and return code generation functions
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 25fb173842..589e069ec4 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -27,9 +27,6 @@ namespace nmodl {
 namespace codegen {
 
 
-static constexpr const char instance_struct_type_name[] = "__instance_var__type";
-
-
 /****************************************************************************************/
 /*                                  Helper routines                                     */
 /****************************************************************************************/
@@ -318,7 +315,7 @@ llvm::Type* CodegenLLVMVisitor::get_instance_struct_type() {
         }
     }
 
-    return ir_builder.get_struct_ptr_type(mod_filename + instance_struct_type_name, member_types);
+    return ir_builder.get_struct_ptr_type(instance_struct(), member_types);
 }
 
 int CodegenLLVMVisitor::get_num_elements(const ast::IndexedName& node) {
@@ -332,7 +329,7 @@ int CodegenLLVMVisitor::get_num_elements(const ast::IndexedName& node) {
         return integer->get_value();
 
     // Otherwise, the length is taken from the macro.
-    const auto& macro = sym_tab->lookup(integer->get_macro()->get_node_name());
+    const auto& macro = program_symtab->lookup(integer->get_macro()->get_node_name());
     return static_cast<int>(*macro->get_value());
 }
 
@@ -755,7 +752,7 @@ void CodegenLLVMVisitor::visit_function_call(const ast::FunctionCall& node) {
     if (func) {
         create_function_call(func, name, node.get_arguments());
     } else {
-        auto symbol = sym_tab->lookup(name);
+        auto symbol = program_symtab->lookup(name);
         if (symbol && symbol->has_any_property(symtab::syminfo::NmodlType::extern_method)) {
             create_external_function_call(name, node.get_arguments());
         } else {
@@ -844,11 +841,11 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     CodegenLLVMHelperVisitor v{platform};
     const auto& functions = v.get_codegen_functions(node);
     instance_var_helper = v.get_instance_var_helper();
-    sym_tab = node.get_symbol_table();
+    program_symtab = node.get_symbol_table();
     std::string kernel_id = v.get_kernel_id();
 
     // Initialize the builder for this NMODL program.
-    ir_builder.initialize(*sym_tab, kernel_id);
+    ir_builder.initialize(*program_symtab, kernel_id);
 
     // Create compile unit if adding debug information to the module.
     if (add_debug_information) {
@@ -861,6 +858,9 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
         create_function_declaration(*func);
     }
 
+    // Set the AST symbol table.
+    program_symtab = node.get_symbol_table();
+
     // Proceed with code generation. Right now, we do not do
     //     node.visit_children(*this);
     // The reason is that the node may contain AST nodes for which the visitor functions have been
@@ -921,6 +921,218 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     }
 
     logger->debug("Dumping generated IR...\n" + dump_module());
+    // Setup CodegenHelper for C++ wrapper file
+    setup(node);
+    print_wrapper_routines();
+    print_target_file();
+}
+
+void CodegenLLVMVisitor::print_mechanism_range_var_structure() {
+    printer->add_newline(2);
+    printer->add_line("/** Instance Struct passed as argument to LLVM IR kernels */");
+    printer->start_block("struct {} "_format(instance_struct()));
+    for (const auto& variable: instance_var_helper.instance->get_codegen_vars()) {
+        auto is_pointer = variable->get_is_pointer();
+        auto name = to_nmodl(variable->get_name());
+        auto qualifier = is_constant_variable(name) ? k_const() : "";
+        auto nmodl_type = variable->get_type()->get_type();
+        auto pointer = is_pointer ? "*" : "";
+        auto var_name = variable->get_node_name();
+        switch (nmodl_type) {
+#define DISPATCH(type, c_type)                                                              \
+    case type:                                                                              \
+        printer->add_line("{}{}{} {}{};"_format(                                            \
+            qualifier, c_type, pointer, is_pointer ? ptr_type_qualifier() : "", var_name)); \
+        break;
+
+            DISPATCH(ast::AstNodeType::DOUBLE, "double");
+            DISPATCH(ast::AstNodeType::INTEGER, "int");
+
+#undef DISPATCH
+        default:
+            throw std::runtime_error("Error: unsupported type found in instance struct");
+        }
+    }
+    printer->end_block();
+    printer->add_text(";");
+    printer->add_newline();
+}
+
+void CodegenLLVMVisitor::print_instance_variable_setup() {
+    if (range_variable_setup_required()) {
+        print_setup_range_variable();
+    }
+
+    if (shadow_vector_setup_required()) {
+        print_shadow_vector_setup();
+    }
+    printer->add_newline(2);
+    printer->add_line("/** initialize mechanism instance variables */");
+    printer->start_block("static inline void setup_instance(NrnThread* nt, Memb_list* ml) ");
+    printer->add_line("{0}* inst = ({0}*) mem_alloc(1, sizeof({0}));"_format(instance_struct()));
+    if (channel_task_dependency_enabled() && !info.codegen_shadow_variables.empty()) {
+        printer->add_line("setup_shadow_vectors(inst, ml);");
+    }
+
+    std::string stride;
+    printer->add_line("int pnodecount = ml->_nodecount_padded;");
+    stride = "*pnodecount";
+
+    printer->add_line("Datum* indexes = ml->pdata;");
+
+    std::string float_type = default_float_data_type();
+    std::string int_type = default_int_data_type();
+    std::string float_type_pointer = float_type + "*";
+    std::string int_type_pointer = int_type + "*";
+
+    int id = 0;
+    std::vector<std::string> variables_to_free;
+
+    for (auto& var: info.codegen_float_variables) {
+        auto name = var->get_name();
+        auto range_var_type = get_range_var_float_type(var);
+        if (float_type == range_var_type) {
+            auto variable = "ml->data+{}{}"_format(id, stride);
+            auto device_variable = get_variable_device_pointer(variable, float_type_pointer);
+            printer->add_line("inst->{} = {};"_format(name, device_variable));
+        } else {
+            printer->add_line("inst->{} = setup_range_variable(ml->data+{}{}, pnodecount);"_format(
+                name, id, stride));
+            variables_to_free.push_back(name);
+        }
+        id += var->get_length();
+    }
+
+    for (auto& var: info.codegen_int_variables) {
+        auto name = var.symbol->get_name();
+        std::string variable = name;
+        std::string type = "";
+        if (var.is_index || var.is_integer) {
+            variable = "ml->pdata";
+            type = int_type_pointer;
+        } else if (var.is_vdata) {
+            variable = "nt->_vdata";
+            type = "void**";
+        } else {
+            variable = "nt->_data";
+            type = info.artificial_cell ? "void*" : float_type_pointer;
+        }
+        auto device_variable = get_variable_device_pointer(variable, type);
+        printer->add_line("inst->{} = {};"_format(name, device_variable));
+    }
+
+    int index_id = 0;
+    // for integer variables, there should be index
+    for (const auto& int_var: info.codegen_int_variables) {
+        std::string var_name = int_var.symbol->get_name() + "_index";
+        // Create for loop that instantiates the ion_<var>_index with
+        // indexes[<var_id>*pdnodecount]
+        printer->add_line("inst->{} = indexes+{}*pnodecount;"_format(var_name, index_id));
+        index_id++;
+    }
+
+    // Pass voltage pointer to the the instance struct
+    printer->add_line("inst->voltage = nt->_actual_v;");
+
+    // Pass ml->nodeindices pointer to node_index
+    printer->add_line("inst->node_index = ml->nodeindices;");
+
+    // Setup global variables
+    printer->add_line("inst->{0} = nt->{0};"_format(naming::NTHREAD_T_VARIABLE));
+    printer->add_line("inst->{0} = nt->{0};"_format(naming::NTHREAD_DT_VARIABLE));
+    printer->add_line("inst->{0} = {0};"_format(naming::CELSIUS_VARIABLE));
+    printer->add_line("inst->{0} = {0};"_format(naming::SECOND_ORDER_VARIABLE));
+    printer->add_line("inst->{} = ml->nodecount;"_format(naming::MECH_NODECOUNT_VAR));
+
+    printer->add_line("ml->instance = inst;");
+    printer->end_block(3);
+
+    printer->add_line("/** cleanup mechanism instance variables */");
+    printer->start_block("static inline void cleanup_instance(Memb_list* ml) ");
+    printer->add_line("{0}* inst = ({0}*) ml->instance;"_format(instance_struct()));
+    if (range_variable_setup_required()) {
+        for (auto& var: variables_to_free) {
+            printer->add_line("mem_free((void*)inst->{});"_format(var));
+        }
+    }
+    printer->add_line("mem_free((void*)inst);");
+    printer->end_block(1);
+}
+
+CodegenLLVMVisitor::ParamVector CodegenLLVMVisitor::get_compute_function_parameter() {
+    auto params = ParamVector();
+    params.emplace_back(param_type_qualifier(),
+                        "{}*"_format(instance_struct()),
+                        ptr_type_qualifier(),
+                        "inst");
+    return params;
+}
+
+void CodegenLLVMVisitor::print_backend_compute_routine_decl() {
+    auto params = get_compute_function_parameter();
+    auto compute_function = compute_method_name(BlockType::Initial);
+
+    printer->add_newline(2);
+    printer->add_line("extern void {}({});"_format(compute_function, get_parameter_str(params)));
+
+    if (info.nrn_cur_required()) {
+        compute_function = compute_method_name(BlockType::Equation);
+        printer->add_line(
+            "extern void {}({});"_format(compute_function, get_parameter_str(params)));
+    }
+
+    if (info.nrn_state_required()) {
+        compute_function = compute_method_name(BlockType::State);
+        printer->add_line(
+            "extern void {}({});"_format(compute_function, get_parameter_str(params)));
+    }
+}
+
+// Copied from CodegenIspcVisitor
+void CodegenLLVMVisitor::print_wrapper_routine(const std::string& wrapper_function,
+                                               BlockType type) {
+    static const auto args = "NrnThread* nt, Memb_list* ml, int type";
+    const auto function_name = method_name(wrapper_function);
+    auto compute_function = compute_method_name(type);
+
+    printer->add_newline(2);
+    printer->start_block("void {}({})"_format(function_name, args));
+    printer->add_line("int nodecount = ml->nodecount;");
+    // clang-format off
+    printer->add_line("{0}* {1}inst = ({0}*) ml->instance;"_format(instance_struct(), ptr_type_qualifier()));
+    // clang-format on
+
+    if (type == BlockType::Initial) {
+        printer->add_newline();
+        printer->add_line("setup_instance(nt, ml);");
+        printer->add_newline();
+        printer->start_block("if (_nrn_skip_initmodel)");
+        printer->add_line("return;");
+        printer->end_block();
+        printer->add_newline();
+    }
+
+    printer->add_line("{}(inst);"_format(compute_function));
+    printer->end_block();
+    printer->add_newline();
+}
+
+void CodegenLLVMVisitor::print_nrn_init(bool skip_init_check) {
+    print_wrapper_routine(naming::NRN_INIT_METHOD, BlockType::Initial);
+}
+
+void CodegenLLVMVisitor::print_nrn_cur() {
+    print_wrapper_routine(naming::NRN_CUR_METHOD, BlockType::Equation);
+}
+
+void CodegenLLVMVisitor::print_nrn_state() {
+    print_wrapper_routine(naming::NRN_STATE_METHOD, BlockType::State);
+}
+
+void CodegenLLVMVisitor::print_wrapper_routines() {
+    printer = wrapper_printer;
+    wrapper_codegen = true;
+    CodegenCVisitor::print_codegen_routines();
 }
 
 void CodegenLLVMVisitor::visit_procedure_block(const ast::ProcedureBlock& node) {
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 27150ff296..683cc7972a 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -18,6 +18,7 @@
 #include <ostream>
 #include <string>
 
+#include "codegen/codegen_c_visitor.hpp"
 #include "codegen/llvm/codegen_llvm_helper_visitor.hpp"
 #include "codegen/llvm/llvm_debug_builder.hpp"
 #include "codegen/llvm/llvm_ir_builder.hpp"
@@ -49,13 +50,16 @@ namespace codegen {
  * \class CodegenLLVMVisitor
  * \brief %Visitor for transforming NMODL AST to LLVM IR
  */
-class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
+class CodegenLLVMVisitor: public CodegenCVisitor {
     /// Name of mod file (without .mod suffix).
     std::string mod_filename;
 
     /// Output directory for code generation.
     std::string output_dir;
 
+    /// flag to indicate if visitor should print the the wrapper code
+    bool wrapper_codegen = false;
+
   private:
     /// Underlying LLVM context.
     std::unique_ptr<llvm::LLVMContext> context = std::make_unique<llvm::LLVMContext>();
@@ -72,9 +76,6 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     /// Add debug information to the module.
     bool add_debug_information;
 
-    /// Pointer to AST symbol table.
-    symtab::SymbolTable* sym_tab;
-
     /// Instance variable helper.
     InstanceVarHelper instance_var_helper;
 
@@ -91,13 +92,45 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
                        int opt_level_ir,
                        bool add_debug_information = false,
                        std::vector<std::string> fast_math_flags = {})
-        : mod_filename(mod_filename)
+        : CodegenCVisitor(mod_filename,
+                          output_dir,
+                          platform.is_single_precision() ? "float" : "double",
+                          false,
+                          ".ll",
+                          ".cpp")
+        , mod_filename(mod_filename)
         , output_dir(output_dir)
         , platform(platform)
         , opt_level_ir(opt_level_ir)
         , add_debug_information(add_debug_information)
         , ir_builder(*context, platform, fast_math_flags)
-        , debug_builder(*module) {}
+        , debug_builder(*module) {
+        instance_struct_type_suffix = "_instance_var__type";
+        print_procedures_and_functions = false;
+    }
+
+    CodegenLLVMVisitor(const std::string& mod_filename,
+                       std::ostream& stream,
+                       Platform& platform,
+                       int opt_level_ir,
+                       bool add_debug_information = false,
+                       std::vector<std::string> fast_math_flags = {})
+        : CodegenCVisitor(mod_filename,
+                          stream,
+                          platform.is_single_precision() ? "float" : "double",
+                          false,
+                          ".ll",
+                          ".cpp")
+        , mod_filename(mod_filename)
+        , output_dir(".")
+        , platform(platform)
+        , opt_level_ir(opt_level_ir)
+        , add_debug_information(add_debug_information)
+        , ir_builder(*context, platform, fast_math_flags)
+        , debug_builder(*module) {
+        instance_struct_type_suffix = "_instance_var__type";
+        print_procedures_and_functions = false;
+    }
 
     /// Dumps the generated LLVM IR module to string.
     std::string dump_module() const {
@@ -108,6 +141,10 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
         return str;
     }
 
+    void print_target_file() const {
+        target_printer->add_multi_line(dump_module());
+    }
+
     /// Fills the container with the names of kernel functions from the MOD file.
     void find_kernel_names(std::vector<std::string>& container);
 
@@ -153,6 +190,113 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
     void visit_var_name(const ast::VarName& node) override;
     void visit_while_statement(const ast::WhileStatement& node) override;
 
+    /*
+     * Override functions from CodegenCVisitor to the ones from visitor::ConstsAstVisitor as it was
+     * originally for CodegenLLVMVisitor
+     */
+    void visit_binary_operator(const ast::BinaryOperator& node) override {
+        visitor::ConstAstVisitor::visit_binary_operator(node);
+    }
+    void visit_else_if_statement(const ast::ElseIfStatement& node) override {
+        visitor::ConstAstVisitor::visit_else_if_statement(node);
+    }
+    void visit_else_statement(const ast::ElseStatement& node) override {
+        visitor::ConstAstVisitor::visit_else_statement(node);
+    }
+    void visit_float(const ast::Float& node) override {
+        visitor::ConstAstVisitor::visit_float(node);
+    }
+    void visit_from_statement(const ast::FromStatement& node) override {
+        visitor::ConstAstVisitor::visit_from_statement(node);
+    }
+    void visit_eigen_newton_solver_block(const ast::EigenNewtonSolverBlock& node) override {
+        visitor::ConstAstVisitor::visit_eigen_newton_solver_block(node);
+    }
+    void visit_eigen_linear_solver_block(const ast::EigenLinearSolverBlock& node) override {
+        visitor::ConstAstVisitor::visit_eigen_linear_solver_block(node);
+    }
+    void visit_indexed_name(const ast::IndexedName& node) override {
+        visitor::ConstAstVisitor::visit_indexed_name(node);
+    }
+    void visit_local_list_statement(const ast::LocalListStatement& node) override {
+        visitor::ConstAstVisitor::visit_local_list_statement(node);
+    }
+    void visit_name(const ast::Name& node) override {
+        visitor::ConstAstVisitor::visit_name(node);
+    }
+    void visit_paren_expression(const ast::ParenExpression& node) override {
+        visitor::ConstAstVisitor::visit_paren_expression(node);
+    }
+    void visit_prime_name(const ast::PrimeName& node) override {
+        visitor::ConstAstVisitor::visit_prime_name(node);
+    }
+    void visit_string(const ast::String& node) override {
+        visitor::ConstAstVisitor::visit_string(node);
+    }
+    void visit_solution_expression(const ast::SolutionExpression& node) override {
+        visitor::ConstAstVisitor::visit_solution_expression(node);
+    }
+    void visit_unary_operator(const ast::UnaryOperator& node) override {
+        visitor::ConstAstVisitor::visit_unary_operator(node);
+    }
+    void visit_unit(const ast::Unit& node) override {
+        visitor::ConstAstVisitor::visit_unit(node);
+    }
+    void visit_verbatim(const ast::Verbatim& node) override {
+        visitor::ConstAstVisitor::visit_verbatim(node);
+    }
+    void visit_watch_statement(const ast::WatchStatement& node) override {
+        visitor::ConstAstVisitor::visit_watch_statement(node);
+    }
+    void visit_derivimplicit_callback(const ast::DerivimplicitCallback& node) override {
+        visitor::ConstAstVisitor::visit_derivimplicit_callback(node);
+    }
+    void visit_for_netcon(const ast::ForNetcon& node) override {
+        visitor::ConstAstVisitor::visit_for_netcon(node);
+    }
+
+    /*
+     * Functions related to printing the wrapper cpp file
+     */
+    void print_wrapper_routines() override;
+    void print_wrapper_headers_include();
+    void print_data_structures();
+    void print_mechanism_range_var_structure() override;
+    void print_instance_variable_setup() override;
+
+    /**
+     * Print the \c nrn\_init function definition
+     * \param skip_init_check \c true if we want the generated code to execute the initialization
+     *                        conditionally
+     */
+    void print_nrn_init(bool skip_init_check = true) override;
+    /**
+     * Print nrn_state / state update function definition
+     */
+    void print_nrn_state() override;
+    /**
+     * Print nrn_cur / current update function definition
+     */
+    void print_nrn_cur() override;
+    /*
+     * Declare the external compute functions (nrn_init, nrn_cur and nrn_state)
+     */
+    void print_backend_compute_routine_decl() override;
+    /*
+     * Define the wrappers for the external compute functions (nrn_init, nrn_cur and nrn_state)
+     */
+    void print_backend_compute_routine();
+    /*
+     * Print the wrapper routine based on the parameters given
+     * \param wrapper_function The name of the function to wrap
+     * \param type The \c BlockType that this function is based on
+     */
+    void print_wrapper_routine(const std::string& wrapper_function, BlockType type);
+    /*
+     * Function that returns a vector of Parameters needed to be passed to the compute routines.
+     * The first argument should be an object of \c mechanism_instance_struct_type_name
+     */
+    CodegenLLVMVisitor::ParamVector get_compute_function_parameter();
     /// Wraps all kernel function calls into wrapper functions that use `void*` to pass the data to
     /// the kernel.
     void wrap_kernel_functions();
diff --git a/src/printer/code_printer.hpp b/src/printer/code_printer.hpp
index f6b703d3da..4267f5f757 100644
--- a/src/printer/code_printer.hpp
+++ b/src/printer/code_printer.hpp
@@ -72,6 +72,8 @@ class CodePrinter {
     /// end a block and immediately start a new one (i.e. "[indent-1]} [expression] {\n")
     void restart_block(std::string const& expression);
 
+    void start_block(const std::string& text);
+
     void add_text(const std::string&);
 
     void add_line(const std::string&, int num_new_lines = 1);
diff --git a/test/integration/CMakeLists.txt b/test/integration/CMakeLists.txt
index 82c72a6a40..e0bb995c30 100644
--- a/test/integration/CMakeLists.txt
+++ b/test/integration/CMakeLists.txt
@@ -12,4 +12,7 @@ foreach(modfile ${modfiles})
   get_filename_component(modfile_name "${modfile}" NAME)
   add_test(NAME ${modfile_name} COMMAND ${CMAKE_BINARY_DIR}/bin/nmodl ${modfile})
   cpp_cc_configure_sanitizers(TEST ${modfile_name})
+  add_test(NAME ${modfile_name}_oacc COMMAND ${PROJECT_BINARY_DIR}/bin/nmodl ${modfile} host --c
+                                             acc --oacc)
+  add_test(NAME ${modfile_name}_ispc COMMAND ${PROJECT_BINARY_DIR}/bin/nmodl ${modfile} host --ispc)
 endforeach()
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index d9c7e2c349..a72f478c6e 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -113,8 +113,9 @@ if(NMODL_ENABLE_LLVM)
   add_library(benchmark_data STATIC codegen/codegen_data_helper.cpp)
   add_dependencies(benchmark_data lexer)
 
-  add_executable(testllvm visitor/main.cpp codegen/codegen_llvm_ir.cpp
-                          codegen/codegen_data_helper.cpp codegen/codegen_llvm_instance_struct.cpp)
+  add_executable(
+    testllvm visitor/main.cpp codegen/codegen_llvm_ir.cpp codegen/codegen_data_helper.cpp
+             codegen/codegen_llvm_instance_struct.cpp codegen/codegen_llvm_visitor.cpp)
   add_executable(test_llvm_runner visitor/main.cpp codegen/codegen_data_helper.cpp
                                   codegen/codegen_llvm_execution.cpp)
   if(NMODL_ENABLE_LLVM_CUDA)
@@ -160,8 +161,10 @@ set(test_env ${NMODL_SANITIZER_ENABLE_ENVIRONMENT})
 set(testvisitor_env "PYTHONPATH=${PROJECT_BINARY_DIR}/lib:$ENV{PYTHONPATH}")
 if(NOT LINK_AGAINST_PYTHON)
   list(APPEND testvisitor_env "NMODL_PYLIB=$ENV{NMODL_PYLIB}")
-  list(APPEND testvisitor_env
-       "NMODL_WRAPLIB=${PROJECT_BINARY_DIR}/lib/nmodl/libpywrapper${CMAKE_SHARED_LIBRARY_SUFFIX}")
+  list(
+    APPEND
+      testvisitor_env
+      "NMODL_WRAPLIB=${PROJECT_BINARY_DIR}/lib/nmodl/libpywrapper${CMAKE_SHARED_LIBRARY_SUFFIX}")
 endif()
 
 foreach(
diff --git a/test/unit/codegen/codegen_llvm_visitor.cpp b/test/unit/codegen/codegen_llvm_visitor.cpp
new file mode 100644
index 0000000000..d2a058b3c5
--- /dev/null
+++ b/test/unit/codegen/codegen_llvm_visitor.cpp
@@ -0,0 +1,231 @@
+/*************************************************************************
+ * Copyright (C) 2019-2021 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include <catch/catch.hpp>
+
+#include "ast/program.hpp"
+#include "codegen/codegen_helper_visitor.hpp"
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "config/config.h"
+#include "parser/nmodl_driver.hpp"
+#include "test/unit/utils/test_utils.hpp"
+#include "visitors/neuron_solve_visitor.hpp"
+#include "visitors/solve_block_visitor.hpp"
+#include "visitors/symtab_visitor.hpp"
+#include "visitors/units_visitor.hpp"
+
+using namespace nmodl;
+using namespace visitor;
+using namespace codegen;
+
+using nmodl::NrnUnitsLib;
+using nmodl::parser::NmodlDriver;
+using nmodl::test_utils::reindent_text;
+
+/// Run LLVM codegen visitor and get instance struct declaration and setup of C++ wrapper
+std::string get_wrapper_instance_struct(const std::string& nmodl_text) {
+    const auto& ast = NmodlDriver().parse_string(nmodl_text);
+    std::stringbuf strbuf;
+    std::ostream oss(&strbuf);
+    /// directory where units lib file is located
+    std::string units_dir(NrnUnitsLib::get_path());
+    /// parse units of text
+    UnitsVisitor(units_dir).visit_program(*ast);
+    SymtabVisitor().visit_program(*ast);
+    NeuronSolveVisitor().visit_program(*ast);
+    SolveBlockVisitor().visit_program(*ast);
+
+    /// create LLVM and C++ wrapper code generation visitor
+    codegen::Platform cpu_platform(/*use_single_precision=*/false, /*instruction_width=*/1);
+    codegen::CodegenLLVMVisitor llvm_visitor("hh.mod", oss, cpu_platform, 0);
+    llvm_visitor.visit_program(*ast);
+    strbuf.str("");
+    llvm_visitor.print_mechanism_range_var_structure();
+    llvm_visitor.print_instance_variable_setup();
+    return strbuf.str();
+}
+
+SCENARIO("Check instance struct declaration and setup in wrapper",
+         "[codegen][llvm][instance_struct]") {
+    GIVEN("hh: simple mod file") {
+        std::string nmodl_text = R"(
+            TITLE hh.mod   squid sodium, potassium, and leak channels
+
+            UNITS {
+                (mA) = (milliamp)
+                (mV) = (millivolt)
+                (S) = (siemens)
+            }
+
+            NEURON {
+                SUFFIX hh
+                USEION na READ ena WRITE ina
+                USEION k READ ek WRITE ik
+                NONSPECIFIC_CURRENT il
+                RANGE gnabar, gkbar, gl, el, gna, gk
+                RANGE minf, hinf, ninf, mtau, htau, ntau
+                THREADSAFE : assigned GLOBALs will be per thread
+            }
+
+            PARAMETER {
+                gnabar = .12 (S/cm2)    <0,1e9>
+                gkbar = .036 (S/cm2)    <0,1e9>
+                gl = .0003 (S/cm2)    <0,1e9>
+                el = -54.3 (mV)
+            }
+
+            STATE {
+                m h n
+            }
+
+            ASSIGNED {
+                v (mV)
+                celsius (degC)
+                ena (mV)
+                ek (mV)
+                gna (S/cm2)
+                gk (S/cm2)
+                ina (mA/cm2)
+                ik (mA/cm2)
+                il (mA/cm2)
+                minf hinf ninf
+                mtau (ms) htau (ms) ntau (ms)
+            }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+                gna = gnabar*m*m*m*h
+                ina = gna*(v - ena)
+                gk = gkbar*n*n*n*n
+                ik = gk*(v - ek)
+                il = gl*(v - el)
+            }
+
+            DERIVATIVE states {
+                m' =  (minf-m)/mtau
+                h' = (hinf-h)/htau
+                n' = (ninf-n)/ntau
+            }
+        )";
+
+        std::string generated_instance_struct_declaration = R"(
+            struct hh__instance_var__type  {
+                const double* __restrict__ gnabar;
+                const double* __restrict__ gkbar;
+                const double* __restrict__ gl;
+                const double* __restrict__ el;
+                double* __restrict__ gna;
+                double* __restrict__ gk;
+                double* __restrict__ il;
+                double* __restrict__ minf;
+                double* __restrict__ hinf;
+                double* __restrict__ ninf;
+                double* __restrict__ mtau;
+                double* __restrict__ htau;
+                double* __restrict__ ntau;
+                double* __restrict__ m;
+                double* __restrict__ h;
+                double* __restrict__ n;
+                double* __restrict__ Dm;
+                double* __restrict__ Dh;
+                double* __restrict__ Dn;
+                double* __restrict__ ena;
+                double* __restrict__ ek;
+                double* __restrict__ ina;
+                double* __restrict__ ik;
+                double* __restrict__ v_unused;
+                double* __restrict__ g_unused;
+                const double* __restrict__ ion_ena;
+                double* __restrict__ ion_ina;
+                double* __restrict__ ion_dinadv;
+                const double* __restrict__ ion_ek;
+                double* __restrict__ ion_ik;
+                double* __restrict__ ion_dikdv;
+                int* __restrict__ ion_ena_index;
+                int* __restrict__ ion_ina_index;
+                int* __restrict__ ion_dinadv_index;
+                int* __restrict__ ion_ek_index;
+                int* __restrict__ ion_ik_index;
+                int* __restrict__ ion_dikdv_index;
+                double* __restrict__ voltage;
+                int* __restrict__ node_index;
+                double t;
+                double dt;
+                double celsius;
+                int secondorder;
+                int node_count;
+            };
+        )";
+        std::string generated_instance_struct_setup = R"(
+            static inline void setup_instance(NrnThread* nt, Memb_list* ml)  {
+                hh__instance_var__type* inst = (hh__instance_var__type*) mem_alloc(1, sizeof(hh__instance_var__type));
+                int pnodecount = ml->_nodecount_padded;
+                Datum* indexes = ml->pdata;
+                inst->gnabar = ml->data+0*pnodecount;
+                inst->gkbar = ml->data+1*pnodecount;
+                inst->gl = ml->data+2*pnodecount;
+                inst->el = ml->data+3*pnodecount;
+                inst->gna = ml->data+4*pnodecount;
+                inst->gk = ml->data+5*pnodecount;
+                inst->il = ml->data+6*pnodecount;
+                inst->minf = ml->data+7*pnodecount;
+                inst->hinf = ml->data+8*pnodecount;
+                inst->ninf = ml->data+9*pnodecount;
+                inst->mtau = ml->data+10*pnodecount;
+                inst->htau = ml->data+11*pnodecount;
+                inst->ntau = ml->data+12*pnodecount;
+                inst->m = ml->data+13*pnodecount;
+                inst->h = ml->data+14*pnodecount;
+                inst->n = ml->data+15*pnodecount;
+                inst->Dm = ml->data+16*pnodecount;
+                inst->Dh = ml->data+17*pnodecount;
+                inst->Dn = ml->data+18*pnodecount;
+                inst->ena = ml->data+19*pnodecount;
+                inst->ek = ml->data+20*pnodecount;
+                inst->ina = ml->data+21*pnodecount;
+                inst->ik = ml->data+22*pnodecount;
+                inst->v_unused = ml->data+23*pnodecount;
+                inst->g_unused = ml->data+24*pnodecount;
+                inst->ion_ena = nt->_data;
+                inst->ion_ina = nt->_data;
+                inst->ion_dinadv = nt->_data;
+                inst->ion_ek = nt->_data;
+                inst->ion_ik = nt->_data;
+                inst->ion_dikdv = nt->_data;
+                inst->ion_ena_index = indexes+0*pnodecount;
+                inst->ion_ina_index = indexes+1*pnodecount;
+                inst->ion_dinadv_index = indexes+2*pnodecount;
+                inst->ion_ek_index = indexes+3*pnodecount;
+                inst->ion_ik_index = indexes+4*pnodecount;
+                inst->ion_dikdv_index = indexes+5*pnodecount;
+                inst->voltage = nt->_actual_v;
+                inst->node_index = ml->nodeindices;
+                inst->t = nt->t;
+                inst->dt = nt->dt;
+                inst->celsius = celsius;
+                inst->secondorder = secondorder;
+                inst->node_count = ml->nodecount;
+                ml->instance = inst;
+            }
+        )";
+
+        THEN("index and nt variables") {
+            auto result_instance_struct_declaration_setup = reindent_text(
+                get_wrapper_instance_struct(nmodl_text));
+            std::cout << "Result\n" << result_instance_struct_declaration_setup << std::endl;
+
+            auto expected_instance_struct_declaration = reindent_text(
+                generated_instance_struct_declaration);
+            auto expected_instance_struct_setup = reindent_text(generated_instance_struct_setup);
+
+            REQUIRE(result_instance_struct_declaration_setup.find(
+                        expected_instance_struct_declaration) != std::string::npos);
+            REQUIRE(result_instance_struct_declaration_setup.find(expected_instance_struct_setup) !=
+                    std::string::npos);
+        }
+    }
+}

From 84a85d4d702de2a57834e9ada6d15c88e0310b54 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Mon, 28 Mar 2022 13:08:09 +0200
Subject: [PATCH 081/105] [LLVM][GPU] NVPTX specific passes for code generation
 (#833)

* Added NVPTX-specific optimization passes for PTX generation
* Added tests
* Added CLI options to select 32- or 64-bit targets and target compute architecture (e.g. sm_70)

Co-authored-by: Ioannis Magkanaris <iomagkanaris@gmail.com>
---
 cmake/LLVMHelper.cmake                    |  3 +
 src/codegen/llvm/codegen_llvm_visitor.cpp | 24 ++++++-
 src/codegen/llvm/llvm_utils.cpp           | 81 +++++++++++++++++++++++
 src/codegen/llvm/llvm_utils.hpp           | 11 +++
 src/codegen/llvm/target_platform.cpp      | 12 ++++
 src/codegen/llvm/target_platform.hpp      | 25 ++++++-
 src/main.cpp                              |  7 +-
 test/unit/codegen/codegen_llvm_ir.cpp     | 45 ++++++++++++-
 8 files changed, 201 insertions(+), 7 deletions(-)

diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake
index 9e4af5d503..717a597f95 100644
--- a/cmake/LLVMHelper.cmake
+++ b/cmake/LLVMHelper.cmake
@@ -15,6 +15,9 @@ set(NMODL_LLVM_COMPONENTS
     ipo
     mc
     native
+    nvptxcodegen
+    nvptxdesc
+    nvptxinfo
     orcjit
     target
     transformutils
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 589e069ec4..d906e9bd44 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -883,8 +883,9 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
         throw std::runtime_error("Error: incorrect IR has been generated!\n" + ostream.str());
     }
 
-    if (opt_level_ir) {
-        logger->info("Running LLVM optimisation passes");
+    // Handle optimization passes for GPUs separately.
+    if (platform.is_cpu() && opt_level_ir) {
+        logger->info("Running LLVM optimisation passes for CPU platforms");
         utils::initialise_optimisation_passes();
         utils::optimise_module(*module, opt_level_ir);
     }
@@ -915,12 +916,29 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
 #endif
     }
 
+    // Handle GPU optimizations (CUDA platfroms only for now).
+    if (platform.is_gpu()) {
+        if (!platform.is_CUDA_gpu())
+            throw std::runtime_error("Error: unsupported GPU architecture!\n");
+
+        // We only support CUDA backends anyway, so this works for now.
+        utils::initialise_nvptx_passes();
+
+        std::string target_asm;
+        utils::optimise_module_for_nvptx(platform, *module, opt_level_ir, target_asm);
+
+        logger->debug("Dumping generated IR...\n" + dump_module());
+        logger->debug("Dumping generated PTX...\n" + target_asm);
+    } else {
+        // Workaround for debug outputs.
+        logger->debug("Dumping generated IR...\n" + dump_module());
+    }
+
     // If the output directory is specified, save the IR to .ll file.
     if (output_dir != ".") {
         utils::save_ir_to_ll_file(*module, output_dir + "/" + mod_filename);
     }
 
-    logger->debug("Dumping generated IR...\n" + dump_module());
     // Setup CodegenHelper for C++ wrapper file
     setup(node);
     print_wrapper_routines();
diff --git a/src/codegen/llvm/llvm_utils.cpp b/src/codegen/llvm/llvm_utils.cpp
index 59967c59c1..7086275557 100644
--- a/src/codegen/llvm/llvm_utils.cpp
+++ b/src/codegen/llvm/llvm_utils.cpp
@@ -13,8 +13,10 @@
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 
 namespace nmodl {
@@ -61,6 +63,85 @@ static void run_optimisation_passes(llvm::Module& module,
 /*                             Optimisation utils                                       */
 /****************************************************************************************/
 
+void initialise_nvptx_passes() {
+    // Register targets.
+    LLVMInitializeNVPTXTarget();
+    LLVMInitializeNVPTXTargetMC();
+    LLVMInitializeNVPTXTargetInfo();
+    LLVMInitializeNVPTXAsmPrinter();
+
+    // Initialize passes.
+    initialise_optimisation_passes();
+}
+
+void optimise_module_for_nvptx(codegen::Platform& platform,
+                               llvm::Module& module,
+                               int opt_level,
+                               std::string& target_asm) {
+    // CUDA target machine we generating code for.
+    std::unique_ptr<llvm::TargetMachine> tm;
+    std::string platform_name = platform.get_name();
+
+    // Target and layout information.
+    static const std::map<std::string, std::string> triple_str = {
+            {"nvptx", "nvptx-nvidia-cuda"},
+            {"nvptx64", "nvptx64-nvidia-cuda"}};
+    static const std::map<std::string, std::string> data_layout_str = {
+            {"nvptx", "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32"
+                      "-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32"
+                      "-v64:64:64-v128:128:128-n16:32:64"},
+            {"nvptx64", "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32"
+                        "-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32"
+                        "-v64:64:64-v128:128:128-n16:32:64"}};
+
+    // Set data layout and target triple information for the module.
+    auto triple = triple_str.at(platform_name);
+    module.setDataLayout(data_layout_str.at(platform_name));
+    module.setTargetTriple(triple);
+
+    std::string subtarget = platform.get_subtarget_name();
+    std::string features = "+ptx70";
+
+    // Find the specified target in registry.
+    std::string error_msg;
+    auto* target = llvm::TargetRegistry::lookupTarget(triple, error_msg);
+    if (!target)
+        throw std::runtime_error("Error: " + error_msg + "\n");
+
+    tm.reset(target->createTargetMachine(triple, subtarget, features, {}, {}));
+    if (!tm)
+        throw std::runtime_error("Error: creating target machine failed! Aborting.");
+
+    // Create pass managers.
+    llvm::legacy::FunctionPassManager func_pm(&module);
+    llvm::legacy::PassManager module_pm;
+    llvm::PassManagerBuilder pm_builder;
+    pm_builder.OptLevel = opt_level;
+    pm_builder.SizeLevel = 0;
+    pm_builder.Inliner = llvm::createFunctionInliningPass();
+
+    // Do not vectorize!
+    pm_builder.LoopVectorize = false;
+
+    // Adjusting pass manager adds target-specific IR transformations, e.g.
+    // inferring address spaces.
+    tm->adjustPassManager(pm_builder);
+    pm_builder.populateFunctionPassManager(func_pm);
+    pm_builder.populateModulePassManager(module_pm);
+
+    // This runs target-indepependent optimizations.
+    run_optimisation_passes(module, func_pm, module_pm);
+
+    // Now, we want to run target-specific (e.g. NVPTX) passes. In LLVM, this
+    // is done via `addPassesToEmitFile`.
+    llvm::raw_string_ostream stream(target_asm);
+    llvm::buffer_ostream pstream(stream);
+    llvm::legacy::PassManager codegen_pm;
+
+    tm->addPassesToEmitFile(codegen_pm, pstream, nullptr, llvm::CGFT_AssemblyFile);
+    codegen_pm.run(module);
+}
+
 void initialise_optimisation_passes() {
     auto& registry = *llvm::PassRegistry::getPassRegistry();
     llvm::initializeCore(registry);
diff --git a/src/codegen/llvm/llvm_utils.hpp b/src/codegen/llvm/llvm_utils.hpp
index 8e1e6e48dc..17be5073e2 100644
--- a/src/codegen/llvm/llvm_utils.hpp
+++ b/src/codegen/llvm/llvm_utils.hpp
@@ -7,6 +7,8 @@
 
 #pragma once
 
+#include "codegen/llvm/target_platform.hpp"
+
 #include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 
@@ -16,6 +18,15 @@ namespace utils {
 /// Initialises some LLVM optimisation passes.
 void initialise_optimisation_passes();
 
+/// Initialises NVPTX-specific optimisation passes.
+void initialise_nvptx_passes();
+
+/// Optimises the given LLVM IR module for NVPTX targets.
+void optimise_module_for_nvptx(codegen::Platform& platform,
+                               llvm::Module& module,
+                               int opt_level,
+                               std::string& target_asm);
+
 /// Optimises the given LLVM IR module.
 void optimise_module(llvm::Module& module, int opt_level, llvm::TargetMachine* tm = nullptr);
 
diff --git a/src/codegen/llvm/target_platform.cpp b/src/codegen/llvm/target_platform.cpp
index 6cb8c7bb2b..49a0db9a31 100644
--- a/src/codegen/llvm/target_platform.cpp
+++ b/src/codegen/llvm/target_platform.cpp
@@ -7,6 +7,8 @@
 
 #include "codegen/llvm/target_platform.hpp"
 
+#include <stdexcept>
+
 namespace nmodl {
 namespace codegen {
 
@@ -30,6 +32,10 @@ bool Platform::is_gpu() {
     return platform_id == PlatformID::GPU;
 }
 
+bool Platform::is_CUDA_gpu() {
+  return platform_id == PlatformID::GPU && (name == "nvptx" || name == "nvptx64");
+}
+
 bool Platform::is_single_precision() {
   return use_single_precision;
 }
@@ -38,6 +44,12 @@ std::string Platform::get_name() const {
     return name;
 }
 
+std::string Platform::get_subtarget_name() const {
+    if (platform_id != PlatformID::GPU)
+        throw std::runtime_error("Error: platform must be a GPU to query the subtarget!\n");
+    return subtarget_name;
+}
+
 std::string Platform::get_math_library() const {
     return math_library;
 }
diff --git a/src/codegen/llvm/target_platform.hpp b/src/codegen/llvm/target_platform.hpp
index 2eabbb1a4b..282f6943d7 100644
--- a/src/codegen/llvm/target_platform.hpp
+++ b/src/codegen/llvm/target_platform.hpp
@@ -32,11 +32,16 @@ class Platform {
     /// Name of the platform.
     const std::string name = Platform::DEFAULT_PLATFORM_NAME;
 
+    /// Target chip for GPUs.
+    /// TODO: this should only be available to GPUs! If we refactor target
+    /// classes so that GPUPlatform <: Platform, it will be nicer!
+    const std::string subtarget_name = "sm_70";
+
     /// Target-specific id to compare platforms easily.
     PlatformID platform_id;
 
     /// User-provided width that is used to construct LLVM instructions
-    //  and types.
+    ///  and types.
     int instruction_width = 1;
 
     /// Use single-precision floating-point types.
@@ -46,6 +51,19 @@ class Platform {
     std::string math_library = Platform::DEFAULT_MATH_LIBRARY;
 
   public:
+    Platform(PlatformID platform_id,
+             const std::string& name,
+             const std::string& subtarget_name,
+             std::string& math_library,
+             bool use_single_precision = false,
+             int instruction_width = 1)
+              : platform_id(platform_id)
+              , name(name)
+              , subtarget_name(subtarget_name)
+              , math_library(math_library)
+              , use_single_precision(use_single_precision)
+              , instruction_width(instruction_width) {}
+
     Platform(PlatformID platform_id,
              const std::string& name,
              std::string& math_library,
@@ -77,10 +95,15 @@ class Platform {
     /// Checks if this platform is a GPU.
     bool is_gpu();
 
+    /// Checks if this platform is CUDA platform.
+    bool is_CUDA_gpu();
+
     bool is_single_precision();
 
     std::string get_name() const;
 
+    std::string get_subtarget_name() const;
+
     std::string get_math_library() const;
 
     int get_instruction_width() const;
diff --git a/src/main.cpp b/src/main.cpp
index cb3588793d..de2e415058 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -365,9 +365,12 @@ int main(int argc, const char* argv[]) {
 
     auto gpu_opt = app.add_subcommand("gpu", "LLVM GPU option")->ignore_case();
     gpu_opt->needs(llvm_opt);
-    gpu_opt->add_option("--name",
+    auto gpu_target_name = gpu_opt->add_option("--name",
         llvm_gpu_name,
         "Name of GPU platform to use")->ignore_case();
+   gpu_opt->add_option("--target-chip",
+        llvm_cpu_name,
+        "Name of target chip to use")->ignore_case();
     auto gpu_math_library_opt = gpu_opt->add_option("--math-library",
         llvm_math_library,
         "Math library for GPU code generation ({})"_format(llvm_math_library));
@@ -716,7 +719,7 @@ int main(int argc, const char* argv[]) {
                                                           : PlatformID::GPU;
               const std::string name =
                   llvm_gpu_name == "default" ? llvm_cpu_name : llvm_gpu_name;
-              Platform platform(pid, name, llvm_math_library, llvm_float_type,
+              Platform platform(pid, name, llvm_cpu_name, llvm_math_library, llvm_float_type,
                                 llvm_vector_width);
 
               logger->info("Running LLVM backend code generator");
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index b010bdf61c..7cd425b8d8 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -49,7 +49,7 @@ std::string run_gpu_llvm_visitor(const std::string& text,
     NeuronSolveVisitor().visit_program(*ast);
     SolveBlockVisitor().visit_program(*ast);
 
-    codegen::Platform gpu_platform(codegen::PlatformID::GPU, /*name=*/"nvidia",
+    codegen::Platform gpu_platform(codegen::PlatformID::GPU, /*name=*/"nvptx64",
                                    math_library, use_single_precision, 1);
     codegen::CodegenLLVMVisitor llvm_visitor(
         /*mod_filename=*/"unknown",
@@ -1648,4 +1648,47 @@ SCENARIO("GPU kernel body IR generation", "[visitor][llvm][gpu]") {
             REQUIRE(std::regex_search(module_string, m, grid_dim));
         }
     }
+
+    GIVEN("When optimizing for GPU platforms") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+                RANGE x, y
+            }
+
+            ASSIGNED { x y }
+
+            STATE { m }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+            }
+
+            DERIVATIVE states {
+              m = y + 2
+            }
+        )";
+
+        THEN("address spaces are inferred and target information added") {
+            std::string module_string = run_gpu_llvm_visitor(nmodl_text,
+                                                             /*opt_level=*/3,
+                                                             /*use_single_precision=*/false);
+            std::smatch m;
+
+            // Check target information.
+            // TODO: this may change when more platforms are supported.
+            std::regex data_layout(R"(target datalayout = \"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64\")");
+            std::regex triple(R"(nvptx64-nvidia-cuda)");
+            REQUIRE(std::regex_search(module_string, m, data_layout));
+            REQUIRE(std::regex_search(module_string, m, triple));
+
+            // Check for address space casts and address spaces in general when loading data.
+            std::regex as_cast(R"(addrspacecast %.*__instance_var__type\* %.* to %.*__instance_var__type addrspace\(1\)\*)");
+            std::regex gep_as1(R"(getelementptr inbounds %.*__instance_var__type, %.*__instance_var__type addrspace\(1\)\* %.*, i64 0, i32 .*)");
+            std::regex load_as1(R"(load double\*, double\* addrspace\(1\)\* %.*)");
+            REQUIRE(std::regex_search(module_string, m, as_cast));
+            REQUIRE(std::regex_search(module_string, m, gep_as1));
+            REQUIRE(std::regex_search(module_string, m, load_as1));
+        }
+    }
 }

From afec6c6dddbd92bc34a15399ee597dc85c62a95d Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Tue, 5 Apr 2022 13:32:18 +0200
Subject: [PATCH 082/105] [LLVM] Code formatting changes (#838)

* Update hpc-coding-convention (#836)
* Run clang-format with clang-format-13
* Fix gitlab ci NMODL spack variants

Co-authored-by: Nicolas Cornu <nicolas.cornu@epfl.ch>
---
 .gitlab-ci.yml                                |  2 +-
 .../llvm/codegen_llvm_helper_visitor.cpp      | 71 +++++++++--------
 .../llvm/codegen_llvm_helper_visitor.hpp      |  5 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 14 ++--
 src/codegen/llvm/llvm_ir_builder.cpp          | 11 +--
 src/codegen/llvm/llvm_utils.cpp               | 20 ++---
 src/codegen/llvm/main.cpp                     |  5 +-
 src/codegen/llvm/target_platform.cpp          |  8 +-
 src/codegen/llvm/target_platform.hpp          | 43 +++++------
 src/main.cpp                                  | 77 +++++++++++--------
 test/unit/codegen/codegen_llvm_ir.cpp         | 34 +++++---
 11 files changed, 159 insertions(+), 131 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 2687b8ebb0..7af190b392 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -41,7 +41,7 @@ trigger cvf:
 .spack_nmodl:
   variables:
     SPACK_PACKAGE: nmodl
-    SPACK_PACKAGE_SPEC: ~legacy-unit+python
+    SPACK_PACKAGE_SPEC: ~legacy-unit+python+llvm
     SPACK_EXTRA_MODULES: llvm
     SPACK_INSTALL_EXTRA_FLAGS: -v
 
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index a23b24fa5b..fcec26f4e9 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -551,25 +551,27 @@ void CodegenLLVMHelperVisitor::visit_function_block(ast::FunctionBlock& node) {
     create_function_for_node(node);
 }
 
-std::shared_ptr<ast::Expression>
-CodegenLLVMHelperVisitor::loop_initialization_expression(const std::string& induction_var,
-                                                         bool is_remainder_loop) {
+std::shared_ptr<ast::Expression> CodegenLLVMHelperVisitor::loop_initialization_expression(
+    const std::string& induction_var,
+    bool is_remainder_loop) {
     if (platform.is_gpu()) {
         const auto& id = create_varname(induction_var);
         const auto& tid = new ast::CodegenThreadId();
-        return std::make_shared<ast::BinaryExpression>(id, ast::BinaryOperator(ast::BOP_ASSIGN), tid);
+        return std::make_shared<ast::BinaryExpression>(id,
+                                                       ast::BinaryOperator(ast::BOP_ASSIGN),
+                                                       tid);
     }
 
-  // Otherwise, platfrom is CPU. Since the loop can be a remainder loop, check if
-  // we need to initialize at all.
+    // Otherwise, platfrom is CPU. Since the loop can be a remainder loop, check if
+    // we need to initialize at all.
     if (is_remainder_loop)
         return nullptr;
     return int_initialization_expression(induction_var);
 }
 
-std::shared_ptr<ast::Expression>
-CodegenLLVMHelperVisitor::loop_increment_expression(const std::string& induction_var,
-                                                    bool is_remainder_loop) {
+std::shared_ptr<ast::Expression> CodegenLLVMHelperVisitor::loop_increment_expression(
+    const std::string& induction_var,
+    bool is_remainder_loop) {
     const auto& id = create_varname(induction_var);
 
     // For GPU platforms, increment by grid stride.
@@ -578,8 +580,8 @@ CodegenLLVMHelperVisitor::loop_increment_expression(const std::string& induction
         const auto& inc_expr =
             new ast::BinaryExpression(id, ast::BinaryOperator(ast::BOP_ADDITION), stride);
         return std::make_shared<ast::BinaryExpression>(id->clone(),
-                                                    ast::BinaryOperator(ast::BOP_ASSIGN),
-                                                    inc_expr);
+                                                       ast::BinaryOperator(ast::BOP_ASSIGN),
+                                                       inc_expr);
     }
 
     // Otherwise, proceed with increment for CPU loop.
@@ -592,10 +594,10 @@ CodegenLLVMHelperVisitor::loop_increment_expression(const std::string& induction
                                                    inc_expr);
 }
 
-std::shared_ptr<ast::Expression>
-CodegenLLVMHelperVisitor::loop_count_expression(const std::string& induction_var,
-                                                const std::string& node_count,
-                                                bool is_remainder_loop) {
+std::shared_ptr<ast::Expression> CodegenLLVMHelperVisitor::loop_count_expression(
+    const std::string& induction_var,
+    const std::string& node_count,
+    bool is_remainder_loop) {
     const int width = is_remainder_loop ? 1 : platform.get_instruction_width();
     const auto& id = create_varname(induction_var);
     const auto& mech_node_count = create_varname(node_count);
@@ -644,15 +646,11 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
         /// access node index and corresponding voltage
         index_statements.push_back(
             visitor::create_statement("node_id = node_index[{}]"_format(naming::INDUCTION_VAR)));
-        body_statements.push_back(
-            visitor::create_statement("v = {}[node_id]"_format(VOLTAGE_VAR)));
+        body_statements.push_back(visitor::create_statement("v = {}[node_id]"_format(VOLTAGE_VAR)));
 
         /// read ion variables
-        ion_read_statements(BlockType::State,
-                            int_variables,
-                            double_variables,
-                            index_statements,
-                            body_statements);
+        ion_read_statements(
+            BlockType::State, int_variables, double_variables, index_statements, body_statements);
 
         /// main compute node : extract solution expressions from the derivative block
         const auto& solutions = collect_nodes(node, {ast::AstNodeType::SOLUTION_EXPRESSION});
@@ -670,11 +668,8 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
         }
 
         /// write ion statements
-        ion_write_statements(BlockType::State,
-                             int_variables,
-                             double_variables,
-                             index_statements,
-                             body_statements);
+        ion_write_statements(
+            BlockType::State, int_variables, double_variables, index_statements, body_statements);
 
         // \todo handle process_shadow_update_statement and wrote_conc_call yet
     }
@@ -687,7 +682,7 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
 
     std::vector<std::string> induction_variables{naming::INDUCTION_VAR};
     function_statements.push_back(
-            create_local_variable_statement(induction_variables, INTEGER_TYPE));
+        create_local_variable_statement(induction_variables, INTEGER_TYPE));
 
     if (platform.is_gpu()) {
         create_gpu_compute_body(compute_body, function_statements, int_variables, double_variables);
@@ -736,7 +731,11 @@ void CodegenLLVMHelperVisitor::create_cpu_compute_body(ast::StatementVector& bod
     auto loop_block = std::make_shared<ast::StatementBlock>(body);
     create_compute_body_loop(loop_block, function_statements, int_variables, double_variables);
     if (platform.is_cpu_with_simd())
-        create_compute_body_loop(loop_block, function_statements, int_variables, double_variables, /*is_remainder_loop=*/true);
+        create_compute_body_loop(loop_block,
+                                 function_statements,
+                                 int_variables,
+                                 double_variables,
+                                 /*is_remainder_loop=*/true);
 }
 
 void CodegenLLVMHelperVisitor::create_compute_body_loop(std::shared_ptr<ast::StatementBlock>& block,
@@ -744,15 +743,19 @@ void CodegenLLVMHelperVisitor::create_compute_body_loop(std::shared_ptr<ast::Sta
                                                         std::vector<std::string>& int_variables,
                                                         std::vector<std::string>& double_variables,
                                                         bool is_remainder_loop) {
-    const auto& initialization = loop_initialization_expression(naming::INDUCTION_VAR, is_remainder_loop);
-    const auto& condition = loop_count_expression(naming::INDUCTION_VAR, NODECOUNT_VAR, is_remainder_loop);
+    const auto& initialization = loop_initialization_expression(naming::INDUCTION_VAR,
+                                                                is_remainder_loop);
+    const auto& condition =
+        loop_count_expression(naming::INDUCTION_VAR, NODECOUNT_VAR, is_remainder_loop);
     const auto& increment = loop_increment_expression(naming::INDUCTION_VAR, is_remainder_loop);
 
     // Clone the statement block if needed since it can be used by the remainder loop.
-    auto loop_block = (is_remainder_loop || !platform.is_cpu_with_simd()) ? block : std::shared_ptr<ast::StatementBlock>(block->clone());
+    auto loop_block = (is_remainder_loop || !platform.is_cpu_with_simd())
+                          ? block
+                          : std::shared_ptr<ast::StatementBlock>(block->clone());
 
-    // Convert local statement to use CodegenVar statements and create a FOR loop node. Also, if creating
-    // a remainder loop then rename variables to avoid conflicts.
+    // Convert local statement to use CodegenVar statements and create a FOR loop node. Also, if
+    // creating a remainder loop then rename variables to avoid conflicts.
     if (is_remainder_loop)
         rename_local_variables(*loop_block);
     convert_local_statement(*loop_block);
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index c2eb415cb2..aea2f5aea8 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -171,8 +171,9 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
 
   private:
     /// Methods to create target-specific loop constructs.
-    std::shared_ptr<ast::Expression> loop_initialization_expression(const std::string& induction_var,
-                                                                    bool is_remainder_loop);
+    std::shared_ptr<ast::Expression> loop_initialization_expression(
+        const std::string& induction_var,
+        bool is_remainder_loop);
     std::shared_ptr<ast::Expression> loop_count_expression(const std::string& induction_var,
                                                            const std::string& node_count,
                                                            bool is_remainder_loop);
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index d906e9bd44..42ddc04b64 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -62,11 +62,10 @@ static bool can_vectorize(const ast::CodegenForStatement& statement, symtab::Sym
 }
 
 void CodegenLLVMVisitor::annotate_kernel_with_nvvm(llvm::Function* kernel) {
-    llvm::Metadata* metadata[] = {
-        llvm::ValueAsMetadata::get(kernel),
-        llvm::MDString::get(*context, "kernel"),
-        llvm::ValueAsMetadata::get(
-            llvm::ConstantInt::get(llvm::Type::getInt32Ty(*context), 1))};
+    llvm::Metadata* metadata[] = {llvm::ValueAsMetadata::get(kernel),
+                                  llvm::MDString::get(*context, "kernel"),
+                                  llvm::ValueAsMetadata::get(
+                                      llvm::ConstantInt::get(llvm::Type::getInt32Ty(*context), 1))};
     llvm::MDNode* node = llvm::MDNode::get(*context, metadata);
     module->getOrInsertNamedMetadata("nvvm.annotations")->addOperand(node);
 }
@@ -121,7 +120,8 @@ void CodegenLLVMVisitor::add_vectorizable_functions_from_vec_lib(llvm::TargetLib
             {"SVML", VecLib::SVML}};
         const auto& library = llvm_supported_vector_libraries.find(platform.get_math_library());
         if (library == llvm_supported_vector_libraries.end())
-            throw std::runtime_error("Error: unknown vector library - " + platform.get_math_library() + "\n");
+            throw std::runtime_error("Error: unknown vector library - " +
+                                     platform.get_math_library() + "\n");
 
         // Add vectorizable functions to the target library info.
         switch (library->second) {
@@ -682,7 +682,7 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
         } else if (platform.is_gpu()) {
             block->accept(*this);
             annotate_kernel_with_nvvm(func);
-        } else { // scalar
+        } else {  // scalar
             block->accept(*this);
         }
     } else {
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index c851f02970..b99cc81817 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -451,7 +451,8 @@ void IRBuilder::create_scalar_or_vector_alloca(const std::string& name,
     // Even if generating vectorised code, some variables still need to be scalar. Particularly, the
     // induction variable "id" and remainder loop variables (that start with "epilogue" prefix).
     llvm::Type* type;
-    if (platform.is_cpu_with_simd() && vectorize && name != kernel_id && name.rfind("epilogue", 0)) {
+    if (platform.is_cpu_with_simd() && vectorize && name != kernel_id &&
+        name.rfind("epilogue", 0)) {
         int vector_width = platform.get_instruction_width();
         type = llvm::FixedVectorType::get(element_or_scalar_type, vector_width);
     } else {
@@ -558,8 +559,8 @@ void IRBuilder::maybe_replicate_value(llvm::Value* value) {
 void IRBuilder::create_grid_stride() {
     llvm::Module* m = builder.GetInsertBlock()->getParent()->getParent();
     auto create_call = [&](llvm::Intrinsic::ID id) {
-      llvm::Function* intrinsic = llvm::Intrinsic::getDeclaration(m, id);
-      return builder.CreateCall(intrinsic, {});
+        llvm::Function* intrinsic = llvm::Intrinsic::getDeclaration(m, id);
+        return builder.CreateCall(intrinsic, {});
     };
 
     llvm::Value* block_dim = create_call(llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x);
@@ -572,8 +573,8 @@ void IRBuilder::create_grid_stride() {
 void IRBuilder::create_thread_id() {
     llvm::Module* m = builder.GetInsertBlock()->getParent()->getParent();
     auto create_call = [&](llvm::Intrinsic::ID id) {
-      llvm::Function* intrinsic = llvm::Intrinsic::getDeclaration(m, id);
-      return builder.CreateCall(intrinsic, {});
+        llvm::Function* intrinsic = llvm::Intrinsic::getDeclaration(m, id);
+        return builder.CreateCall(intrinsic, {});
     };
 
     // For now, this function only supports NVPTX backend, however it can be easily
diff --git a/src/codegen/llvm/llvm_utils.cpp b/src/codegen/llvm/llvm_utils.cpp
index 7086275557..4168612790 100644
--- a/src/codegen/llvm/llvm_utils.cpp
+++ b/src/codegen/llvm/llvm_utils.cpp
@@ -83,16 +83,18 @@ void optimise_module_for_nvptx(codegen::Platform& platform,
     std::string platform_name = platform.get_name();
 
     // Target and layout information.
-    static const std::map<std::string, std::string> triple_str = {
-            {"nvptx", "nvptx-nvidia-cuda"},
-            {"nvptx64", "nvptx64-nvidia-cuda"}};
+    static const std::map<std::string, std::string> triple_str = {{"nvptx", "nvptx-nvidia-cuda"},
+                                                                  {"nvptx64",
+                                                                   "nvptx64-nvidia-cuda"}};
     static const std::map<std::string, std::string> data_layout_str = {
-            {"nvptx", "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32"
-                      "-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32"
-                      "-v64:64:64-v128:128:128-n16:32:64"},
-            {"nvptx64", "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32"
-                        "-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32"
-                        "-v64:64:64-v128:128:128-n16:32:64"}};
+        {"nvptx",
+         "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32"
+         "-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32"
+         "-v64:64:64-v128:128:128-n16:32:64"},
+        {"nvptx64",
+         "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32"
+         "-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32"
+         "-v64:64:64-v128:128:128-n16:32:64"}};
 
     // Set data layout and target triple information for the module.
     auto triple = triple_str.at(platform_name);
diff --git a/src/codegen/llvm/main.cpp b/src/codegen/llvm/main.cpp
index 92d8a486c1..cd2ec2cb12 100644
--- a/src/codegen/llvm/main.cpp
+++ b/src/codegen/llvm/main.cpp
@@ -51,7 +51,10 @@ int main(int argc, const char* argv[]) {
     codegen::Platform platform;
 
     logger->info("Running LLVM Visitor");
-    codegen::CodegenLLVMVisitor llvm_visitor(filename, /*output_dir=*/".", platform, /*opt_level_ir=*/0);
+    codegen::CodegenLLVMVisitor llvm_visitor(filename,
+                                             /*output_dir=*/".",
+                                             platform,
+                                             /*opt_level_ir=*/0);
     llvm_visitor.visit_program(*ast);
     std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
 
diff --git a/src/codegen/llvm/target_platform.cpp b/src/codegen/llvm/target_platform.cpp
index 49a0db9a31..fff195d6b8 100644
--- a/src/codegen/llvm/target_platform.cpp
+++ b/src/codegen/llvm/target_platform.cpp
@@ -17,7 +17,7 @@ const std::string Platform::DEFAULT_MATH_LIBRARY = "none";
 
 bool Platform::is_default_platform() {
     // Default platform is a CPU.
-    return platform_id == PlatformID::CPU &&  name == Platform::DEFAULT_PLATFORM_NAME;
+    return platform_id == PlatformID::CPU && name == Platform::DEFAULT_PLATFORM_NAME;
 }
 
 bool Platform::is_cpu() {
@@ -33,11 +33,11 @@ bool Platform::is_gpu() {
 }
 
 bool Platform::is_CUDA_gpu() {
-  return platform_id == PlatformID::GPU && (name == "nvptx" || name == "nvptx64");
+    return platform_id == PlatformID::GPU && (name == "nvptx" || name == "nvptx64");
 }
 
 bool Platform::is_single_precision() {
-  return use_single_precision;
+    return use_single_precision;
 }
 
 std::string Platform::get_name() const {
@@ -59,7 +59,7 @@ int Platform::get_instruction_width() const {
 }
 
 int Platform::get_precision() const {
-    return use_single_precision? 32 : 64;
+    return use_single_precision ? 32 : 64;
 }
 
 }  // namespace codegen
diff --git a/src/codegen/llvm/target_platform.hpp b/src/codegen/llvm/target_platform.hpp
index 282f6943d7..bed9e8923f 100644
--- a/src/codegen/llvm/target_platform.hpp
+++ b/src/codegen/llvm/target_platform.hpp
@@ -12,10 +12,7 @@
 namespace nmodl {
 namespace codegen {
 
-enum PlatformID {
-    CPU,
-    GPU
-};
+enum PlatformID { CPU, GPU };
 
 /**
  * \class Platform
@@ -57,31 +54,31 @@ class Platform {
              std::string& math_library,
              bool use_single_precision = false,
              int instruction_width = 1)
-              : platform_id(platform_id)
-              , name(name)
-              , subtarget_name(subtarget_name)
-              , math_library(math_library)
-              , use_single_precision(use_single_precision)
-              , instruction_width(instruction_width) {}
+        : platform_id(platform_id)
+        , name(name)
+        , subtarget_name(subtarget_name)
+        , math_library(math_library)
+        , use_single_precision(use_single_precision)
+        , instruction_width(instruction_width) {}
 
     Platform(PlatformID platform_id,
              const std::string& name,
              std::string& math_library,
              bool use_single_precision = false,
              int instruction_width = 1)
-              : platform_id(platform_id)
-              , name(name)
-              , math_library(math_library)
-              , use_single_precision(use_single_precision)
-              , instruction_width(instruction_width) {}
-
-    Platform(bool use_single_precision,
-             int instruction_width)
-            : platform_id(PlatformID::CPU)
-            , use_single_precision(use_single_precision)
-            , instruction_width(instruction_width) {}
-
-    Platform() : platform_id(PlatformID::CPU) {}
+        : platform_id(platform_id)
+        , name(name)
+        , math_library(math_library)
+        , use_single_precision(use_single_precision)
+        , instruction_width(instruction_width) {}
+
+    Platform(bool use_single_precision, int instruction_width)
+        : platform_id(PlatformID::CPU)
+        , use_single_precision(use_single_precision)
+        , instruction_width(instruction_width) {}
+
+    Platform()
+        : platform_id(PlatformID::CPU) {}
 
     /// Checks if this platform is a default platform.
     bool is_default_platform();
diff --git a/src/main.cpp b/src/main.cpp
index de2e415058..1aa1de992e 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -710,40 +710,51 @@ int main(int argc, const char* argv[]) {
 
 #ifdef NMODL_LLVM_BACKEND
             if (llvm_ir || llvm_benchmark) {
-              // If benchmarking, we want to optimize the IR with target
-              // information and not in LLVM visitor.
-              int llvm_opt_level = llvm_benchmark ? 0 : llvm_opt_level_ir;
-
-              // Create platform abstraction.
-              PlatformID pid = llvm_gpu_name == "default" ? PlatformID::CPU
-                                                          : PlatformID::GPU;
-              const std::string name =
-                  llvm_gpu_name == "default" ? llvm_cpu_name : llvm_gpu_name;
-              Platform platform(pid, name, llvm_cpu_name, llvm_math_library, llvm_float_type,
-                                llvm_vector_width);
-
-              logger->info("Running LLVM backend code generator");
-              CodegenLLVMVisitor visitor(modfile, output_dir, platform,
-                                         llvm_opt_level, !llvm_no_debug,
-                                         llvm_fast_math_flags);
-              visitor.visit_program(*ast);
-              ast_to_nmodl(*ast, filepath("llvm", "mod"));
-              ast_to_json(*ast, filepath("llvm", "json"));
-
-              if (llvm_benchmark) {
-                // \todo integrate Platform class here
-                if (llvm_gpu_name != "default") {
-                  logger->warn("GPU benchmarking is not supported, targeting "
-                               "CPU instead");
+                // If benchmarking, we want to optimize the IR with target
+                // information and not in LLVM visitor.
+                int llvm_opt_level = llvm_benchmark ? 0 : llvm_opt_level_ir;
+
+                // Create platform abstraction.
+                PlatformID pid = llvm_gpu_name == "default" ? PlatformID::CPU : PlatformID::GPU;
+                const std::string name = llvm_gpu_name == "default" ? llvm_cpu_name : llvm_gpu_name;
+                Platform platform(pid,
+                                  name,
+                                  llvm_cpu_name,
+                                  llvm_math_library,
+                                  llvm_float_type,
+                                  llvm_vector_width);
+
+                logger->info("Running LLVM backend code generator");
+                CodegenLLVMVisitor visitor(modfile,
+                                           output_dir,
+                                           platform,
+                                           llvm_opt_level,
+                                           !llvm_no_debug,
+                                           llvm_fast_math_flags);
+                visitor.visit_program(*ast);
+                ast_to_nmodl(*ast, filepath("llvm", "mod"));
+                ast_to_json(*ast, filepath("llvm", "json"));
+
+                if (llvm_benchmark) {
+                    // \todo integrate Platform class here
+                    if (llvm_gpu_name != "default") {
+                        logger->warn(
+                            "GPU benchmarking is not supported, targeting "
+                            "CPU instead");
+                    }
+
+                    logger->info("Running LLVM benchmark");
+                    benchmark::LLVMBenchmark benchmark(visitor,
+                                                       modfile,
+                                                       output_dir,
+                                                       shared_lib_paths,
+                                                       num_experiments,
+                                                       instance_size,
+                                                       llvm_cpu_name,
+                                                       llvm_opt_level_ir,
+                                                       llvm_opt_level_codegen);
+                    benchmark.run(ast);
                 }
-
-                logger->info("Running LLVM benchmark");
-                benchmark::LLVMBenchmark benchmark(
-                    visitor, modfile, output_dir, shared_lib_paths,
-                    num_experiments, instance_size, llvm_cpu_name,
-                    llvm_opt_level_ir, llvm_opt_level_codegen);
-                benchmark.run(ast);
-              }
             }
 #endif
         }
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 7cd425b8d8..5eb4132800 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -49,11 +49,13 @@ std::string run_gpu_llvm_visitor(const std::string& text,
     NeuronSolveVisitor().visit_program(*ast);
     SolveBlockVisitor().visit_program(*ast);
 
-    codegen::Platform gpu_platform(codegen::PlatformID::GPU, /*name=*/"nvptx64",
-                                   math_library, use_single_precision, 1);
+    codegen::Platform gpu_platform(
+        codegen::PlatformID::GPU, /*name=*/"nvptx64", math_library, use_single_precision, 1);
     codegen::CodegenLLVMVisitor llvm_visitor(
         /*mod_filename=*/"unknown",
-        /*output_dir=*/".", gpu_platform, opt_level,
+        /*output_dir=*/".",
+        gpu_platform,
+        opt_level,
         /*add_debug_information=*/false);
 
     llvm_visitor.visit_program(*ast);
@@ -77,12 +79,15 @@ std::string run_llvm_visitor(const std::string& text,
     NeuronSolveVisitor().visit_program(*ast);
     SolveBlockVisitor().visit_program(*ast);
 
-    codegen::Platform cpu_platform(codegen::PlatformID::CPU, /*name=*/"default",
-                                   vec_lib, use_single_precision, vector_width);
+    codegen::Platform cpu_platform(
+        codegen::PlatformID::CPU, /*name=*/"default", vec_lib, use_single_precision, vector_width);
     codegen::CodegenLLVMVisitor llvm_visitor(
         /*mod_filename=*/"unknown",
-        /*output_dir=*/".", cpu_platform, opt_level,
-        /*add_debug_information=*/false, fast_math_flags);
+        /*output_dir=*/".",
+        cpu_platform,
+        opt_level,
+        /*add_debug_information=*/false,
+        fast_math_flags);
 
     llvm_visitor.visit_program(*ast);
     return llvm_visitor.dump_module();
@@ -1306,7 +1311,8 @@ SCENARIO("Vectorised derivative block", "[visitor][llvm][derivative]") {
 
 
         THEN("vector and epilogue scalar loops are constructed") {
-            codegen::Platform simd_platform(/*use_single_precision=*/false, /*instruction_width=*/8);
+            codegen::Platform simd_platform(/*use_single_precision=*/false,
+                                            /*instruction_width=*/8);
             auto result = run_llvm_visitor_helper(nmodl_text,
                                                   simd_platform,
                                                   {ast::AstNodeType::CODEGEN_FOR_STATEMENT});
@@ -1633,7 +1639,8 @@ SCENARIO("GPU kernel body IR generation", "[visitor][llvm][gpu]") {
 
             // Check kernel annotations are correclty created.
             std::regex annotations(R"(!nvvm\.annotations = !\{!0\})");
-            std::regex kernel_data(R"(!0 = !\{void \(%.*__instance_var__type\*\)\* @nrn_state_.*, !\"kernel\", i32 1\})");
+            std::regex kernel_data(
+                R"(!0 = !\{void \(%.*__instance_var__type\*\)\* @nrn_state_.*, !\"kernel\", i32 1\})");
             REQUIRE(std::regex_search(module_string, m, annotations));
             REQUIRE(std::regex_search(module_string, m, kernel_data));
 
@@ -1677,14 +1684,17 @@ SCENARIO("GPU kernel body IR generation", "[visitor][llvm][gpu]") {
 
             // Check target information.
             // TODO: this may change when more platforms are supported.
-            std::regex data_layout(R"(target datalayout = \"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64\")");
+            std::regex data_layout(
+                R"(target datalayout = \"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64\")");
             std::regex triple(R"(nvptx64-nvidia-cuda)");
             REQUIRE(std::regex_search(module_string, m, data_layout));
             REQUIRE(std::regex_search(module_string, m, triple));
 
             // Check for address space casts and address spaces in general when loading data.
-            std::regex as_cast(R"(addrspacecast %.*__instance_var__type\* %.* to %.*__instance_var__type addrspace\(1\)\*)");
-            std::regex gep_as1(R"(getelementptr inbounds %.*__instance_var__type, %.*__instance_var__type addrspace\(1\)\* %.*, i64 0, i32 .*)");
+            std::regex as_cast(
+                R"(addrspacecast %.*__instance_var__type\* %.* to %.*__instance_var__type addrspace\(1\)\*)");
+            std::regex gep_as1(
+                R"(getelementptr inbounds %.*__instance_var__type, %.*__instance_var__type addrspace\(1\)\* %.*, i64 0, i32 .*)");
             std::regex load_as1(R"(load double\*, double\* addrspace\(1\)\* %.*)");
             REQUIRE(std::regex_search(module_string, m, as_cast));
             REQUIRE(std::regex_search(module_string, m, gep_as1));

From ac6d731a57ff3c5b3e50a99f7fea583a6e595d95 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Fri, 8 Apr 2022 09:30:56 +0200
Subject: [PATCH 083/105] [LLVM][GPU][+refactoring] Replacement of math
 intrinsics with library calls (#835)

Added an LLVM pass that replaces math intrinsics
with calls to math library. In particular:

* Functionality of replacement with SIMD functions is factored
out into a separate file and LLVM version dependencies are
dropped (LLVM 13 is already used anyway).

* A pass to replace intrinsics with libdevice calls when targeting
CUDA platforms has been added. So far only `exp` and `pow` are
 supported (single and double precision).

* Added a test to check the replacement

Co-authored-by: Ioannis Magkanaris <iomagkanaris@gmail.com>
---
 src/codegen/llvm/CMakeLists.txt               |   2 +
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 100 +--------
 src/codegen/llvm/codegen_llvm_visitor.hpp     |   7 -
 src/codegen/llvm/llvm_utils.cpp               |   7 +
 src/codegen/llvm/llvm_utils.hpp               |   5 +-
 .../llvm/replace_with_lib_functions.cpp       | 210 ++++++++++++++++++
 .../llvm/replace_with_lib_functions.hpp       |  65 ++++++
 src/codegen/llvm/target_platform.cpp          |  10 +-
 src/codegen/llvm/target_platform.hpp          |  10 +-
 test/unit/codegen/codegen_llvm_ir.cpp         |  45 +++-
 10 files changed, 343 insertions(+), 118 deletions(-)
 create mode 100644 src/codegen/llvm/replace_with_lib_functions.cpp
 create mode 100644 src/codegen/llvm/replace_with_lib_functions.hpp

diff --git a/src/codegen/llvm/CMakeLists.txt b/src/codegen/llvm/CMakeLists.txt
index 792591c447..44f42a5313 100644
--- a/src/codegen/llvm/CMakeLists.txt
+++ b/src/codegen/llvm/CMakeLists.txt
@@ -12,6 +12,8 @@ set(LLVM_CODEGEN_SOURCE_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_ir_builder.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_utils.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_utils.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/replace_with_lib_functions.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/replace_with_lib_functions.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/target_platform.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/target_platform.hpp)
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 42ddc04b64..ca3b405be3 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -12,16 +12,9 @@
 #include "visitors/rename_visitor.hpp"
 #include "visitors/visitor_utils.hpp"
 
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Type.h"
-#include "llvm/Support/Host.h"
-
-#if LLVM_VERSION_MAJOR >= 13
-#include "llvm/CodeGen/ReplaceWithVeclib.h"
-#endif
 
 namespace nmodl {
 namespace codegen {
@@ -70,72 +63,6 @@ void CodegenLLVMVisitor::annotate_kernel_with_nvvm(llvm::Function* kernel) {
     module->getOrInsertNamedMetadata("nvvm.annotations")->addOperand(node);
 }
 
-#if LLVM_VERSION_MAJOR >= 13
-void CodegenLLVMVisitor::add_vectorizable_functions_from_vec_lib(llvm::TargetLibraryInfoImpl& tli,
-                                                                 llvm::Triple& triple) {
-    // Since LLVM does not support SLEEF as a vector library yet, process it separately.
-    if (platform.get_math_library() == "SLEEF") {
-// clang-format off
-#define FIXED(w) llvm::ElementCount::getFixed(w)
-// clang-format on
-#define DISPATCH(func, vec_func, width) {func, vec_func, width},
-
-        // Populate function definitions of only exp and pow (for now)
-        const llvm::VecDesc aarch64_functions[] = {
-            // clang-format off
-            DISPATCH("llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4))
-            DISPATCH("llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2))
-            DISPATCH("llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4))
-            DISPATCH("llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2))
-            // clang-format on
-        };
-        const llvm::VecDesc x86_functions[] = {
-            // clang-format off
-            DISPATCH("llvm.exp.f64", "_ZGVbN2v_exp", FIXED(2))
-            DISPATCH("llvm.exp.f64", "_ZGVdN4v_exp", FIXED(4))
-            DISPATCH("llvm.exp.f64", "_ZGVeN8v_exp", FIXED(8))
-            DISPATCH("llvm.pow.f64", "_ZGVbN2vv_pow", FIXED(2))
-            DISPATCH("llvm.pow.f64", "_ZGVdN4vv_pow", FIXED(4))
-            DISPATCH("llvm.pow.f64", "_ZGVeN8vv_pow", FIXED(8))
-            // clang-format on
-        };
-#undef DISPATCH
-
-        if (triple.isAArch64()) {
-            tli.addVectorizableFunctions(aarch64_functions);
-        }
-        if (triple.isX86() && triple.isArch64Bit()) {
-            tli.addVectorizableFunctions(x86_functions);
-        }
-
-    } else {
-        // A map to query vector library by its string value.
-        using VecLib = llvm::TargetLibraryInfoImpl::VectorLibrary;
-        static const std::map<std::string, VecLib> llvm_supported_vector_libraries = {
-            {"Accelerate", VecLib::Accelerate},
-            {"libmvec", VecLib::LIBMVEC_X86},
-            {"libsystem_m", VecLib ::DarwinLibSystemM},
-            {"MASSV", VecLib::MASSV},
-            {"none", VecLib::NoLibrary},
-            {"SVML", VecLib::SVML}};
-        const auto& library = llvm_supported_vector_libraries.find(platform.get_math_library());
-        if (library == llvm_supported_vector_libraries.end())
-            throw std::runtime_error("Error: unknown vector library - " +
-                                     platform.get_math_library() + "\n");
-
-        // Add vectorizable functions to the target library info.
-        switch (library->second) {
-        case VecLib::LIBMVEC_X86:
-            if (!triple.isX86() || !triple.isArch64Bit())
-                break;
-        default:
-            tli.addVectorizableFunctionsFromVecLib(library->second);
-            break;
-        }
-    }
-}
-#endif
-
 llvm::Value* CodegenLLVMVisitor::accept_and_get(const std::shared_ptr<ast::Node>& node) {
     node->accept(*this);
     return ir_builder.pop_last_value();
@@ -890,31 +817,8 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
         utils::optimise_module(*module, opt_level_ir);
     }
 
-    // Optionally, replace LLVM math intrinsics with vector library calls.
-    if (platform.is_cpu_with_simd()) {
-#if LLVM_VERSION_MAJOR < 13
-        logger->warn(
-            "This version of LLVM does not support replacement of LLVM intrinsics with vector "
-            "library calls");
-#else
-        // First, get the target library information and add vectorizable functions for the
-        // specified vector library.
-        llvm::Triple triple(llvm::sys::getDefaultTargetTriple());
-        llvm::TargetLibraryInfoImpl target_lib_info = llvm::TargetLibraryInfoImpl(triple);
-        add_vectorizable_functions_from_vec_lib(target_lib_info, triple);
-
-        // Run passes that replace math intrinsics.
-        llvm::legacy::FunctionPassManager fpm(module.get());
-        fpm.add(new llvm::TargetLibraryInfoWrapperPass(target_lib_info));
-        fpm.add(new llvm::ReplaceWithVeclibLegacy);
-        fpm.doInitialization();
-        for (auto& function: module->getFunctionList()) {
-            if (!function.isDeclaration())
-                fpm.run(function);
-        }
-        fpm.doFinalization();
-#endif
-    }
+    // Optionally, replace LLVM math intrinsics with library calls.
+    utils::replace_with_lib_functions(platform, *module);
 
     // Handle GPU optimizations (CUDA platfroms only for now).
     if (platform.is_gpu()) {
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 683cc7972a..299071ae80 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -26,7 +26,6 @@
 #include "utils/logger.hpp"
 #include "visitors/ast_visitor.hpp"
 
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
@@ -305,12 +304,6 @@ class CodegenLLVMVisitor: public CodegenCVisitor {
     // Annotates kernel function with NVVM metadata.
     void annotate_kernel_with_nvvm(llvm::Function* kernel);
 
-#if LLVM_VERSION_MAJOR >= 13
-    /// Populates target library info with the vector library definitions.
-    void add_vectorizable_functions_from_vec_lib(llvm::TargetLibraryInfoImpl& tli,
-                                                 llvm::Triple& triple);
-#endif
-
     /// Accepts the given AST node and returns the processed value.
     llvm::Value* accept_and_get(const std::shared_ptr<ast::Node>& node);
 
diff --git a/src/codegen/llvm/llvm_utils.cpp b/src/codegen/llvm/llvm_utils.cpp
index 4168612790..bd4feee32f 100644
--- a/src/codegen/llvm/llvm_utils.cpp
+++ b/src/codegen/llvm/llvm_utils.cpp
@@ -6,6 +6,7 @@
  *************************************************************************/
 
 #include "codegen/llvm/llvm_utils.hpp"
+#include "codegen/llvm/replace_with_lib_functions.hpp"
 
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
@@ -162,6 +163,12 @@ void optimise_module(llvm::Module& module, int opt_level, llvm::TargetMachine* t
     run_optimisation_passes(module, func_pm, module_pm);
 }
 
+void replace_with_lib_functions(codegen::Platform& platform, llvm::Module& module) {
+    llvm::legacy::PassManager pm;
+    pm.add(new llvm::ReplaceMathFunctions(platform));
+    pm.run(module);
+}
+
 /****************************************************************************************/
 /*                                    File utils                                        */
 /****************************************************************************************/
diff --git a/src/codegen/llvm/llvm_utils.hpp b/src/codegen/llvm/llvm_utils.hpp
index 17be5073e2..3394463317 100644
--- a/src/codegen/llvm/llvm_utils.hpp
+++ b/src/codegen/llvm/llvm_utils.hpp
@@ -21,6 +21,9 @@ void initialise_optimisation_passes();
 /// Initialises NVPTX-specific optimisation passes.
 void initialise_nvptx_passes();
 
+/// Replaces calls to LLVM intrinsics with appropriate library calls.
+void replace_with_lib_functions(codegen::Platform& platform, llvm::Module& module);
+
 /// Optimises the given LLVM IR module for NVPTX targets.
 void optimise_module_for_nvptx(codegen::Platform& platform,
                                llvm::Module& module,
@@ -30,7 +33,7 @@ void optimise_module_for_nvptx(codegen::Platform& platform,
 /// Optimises the given LLVM IR module.
 void optimise_module(llvm::Module& module, int opt_level, llvm::TargetMachine* tm = nullptr);
 
-///
+/// Saves generated IR module to .ll file.
 void save_ir_to_ll_file(llvm::Module& module, const std::string& filename);
 
 }  // namespace utils
diff --git a/src/codegen/llvm/replace_with_lib_functions.cpp b/src/codegen/llvm/replace_with_lib_functions.cpp
new file mode 100644
index 0000000000..6d98dd3eb0
--- /dev/null
+++ b/src/codegen/llvm/replace_with_lib_functions.cpp
@@ -0,0 +1,210 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include "codegen/llvm/replace_with_lib_functions.hpp"
+
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/CodeGen/ReplaceWithVeclib.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/LegacyPassManager.h"
+
+namespace llvm {
+
+char ReplaceMathFunctions::ID = 0;
+
+bool ReplaceMathFunctions::runOnModule(Module& module) {
+    legacy::FunctionPassManager fpm(&module);
+    bool modified = false;
+
+    // If the platform supports SIMD, replace math intrinsics with library
+    // functions.
+    if (platform->is_cpu_with_simd()) {
+        // First, get the target library information and add vectorizable functions for the
+        // specified vector library.
+        Triple triple(sys::getDefaultTargetTriple());
+        TargetLibraryInfoImpl tli = TargetLibraryInfoImpl(triple);
+        add_vectorizable_functions_from_vec_lib(tli, triple);
+
+        // Add passes that replace math intrinsics with calls.
+        fpm.add(new TargetLibraryInfoWrapperPass(tli));
+        fpm.add(new ReplaceWithVeclibLegacy);
+    }
+
+    // For CUDA GPUs, replace with calls to libdevice.
+    if (platform->is_CUDA_gpu()) {
+        fpm.add(new ReplaceWithLibdevice);
+    }
+
+    // Run passes.
+    fpm.doInitialization();
+    for (auto& function: module.getFunctionList()) {
+        if (!function.isDeclaration())
+            modified |= fpm.run(function);
+    }
+    fpm.doFinalization();
+
+    return modified;
+}
+
+void ReplaceMathFunctions::add_vectorizable_functions_from_vec_lib(TargetLibraryInfoImpl& tli,
+                                                                   Triple& triple) {
+    // Since LLVM does not support SLEEF as a vector library yet, process it separately.
+    if (platform->get_math_library() == "SLEEF") {
+// clang-format off
+#define FIXED(w) ElementCount::getFixed(w)
+// clang-format on
+#define DISPATCH(func, vec_func, width) {func, vec_func, width},
+
+        // Populate function definitions of only exp and pow (for now).
+        const VecDesc aarch64_functions[] = {
+            // clang-format off
+            DISPATCH("llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4))
+            DISPATCH("llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2))
+            DISPATCH("llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4))
+            DISPATCH("llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2))
+            // clang-format on
+        };
+        const VecDesc x86_functions[] = {
+            // clang-format off
+            DISPATCH("llvm.exp.f64", "_ZGVbN2v_exp", FIXED(2))
+            DISPATCH("llvm.exp.f64", "_ZGVdN4v_exp", FIXED(4))
+            DISPATCH("llvm.exp.f64", "_ZGVeN8v_exp", FIXED(8))
+            DISPATCH("llvm.pow.f64", "_ZGVbN2vv_pow", FIXED(2))
+            DISPATCH("llvm.pow.f64", "_ZGVdN4vv_pow", FIXED(4))
+            DISPATCH("llvm.pow.f64", "_ZGVeN8vv_pow", FIXED(8))
+            // clang-format on
+        };
+#undef DISPATCH
+#undef FIXED
+
+        if (triple.isAArch64()) {
+            tli.addVectorizableFunctions(aarch64_functions);
+        }
+        if (triple.isX86() && triple.isArch64Bit()) {
+            tli.addVectorizableFunctions(x86_functions);
+        }
+
+    } else {
+        // A map to query vector library by its string value.
+        using VecLib = TargetLibraryInfoImpl::VectorLibrary;
+        static const std::map<std::string, VecLib> llvm_supported_vector_libraries = {
+            {"Accelerate", VecLib::Accelerate},
+            {"libmvec", VecLib::LIBMVEC_X86},
+            {"libsystem_m", VecLib ::DarwinLibSystemM},
+            {"MASSV", VecLib::MASSV},
+            {"none", VecLib::NoLibrary},
+            {"SVML", VecLib::SVML}};
+
+        const auto& library = llvm_supported_vector_libraries.find(platform->get_math_library());
+        if (library == llvm_supported_vector_libraries.end())
+            throw std::runtime_error("Error: unknown vector library - " +
+                                     platform->get_math_library() + "\n");
+
+        // Add vectorizable functions to the target library info.
+        if (library->second != VecLib::LIBMVEC_X86 || (triple.isX86() && triple.isArch64Bit())) {
+            tli.addVectorizableFunctionsFromVecLib(library->second);
+        }
+    }
+}
+
+void ReplaceWithLibdevice::getAnalysisUsage(AnalysisUsage& au) const {
+    au.setPreservesCFG();
+    au.addPreserved<ScalarEvolutionWrapperPass>();
+    au.addPreserved<AAResultsWrapperPass>();
+    au.addPreserved<LoopAccessLegacyAnalysis>();
+    au.addPreserved<DemandedBitsWrapperPass>();
+    au.addPreserved<OptimizationRemarkEmitterWrapperPass>();
+    au.addPreserved<GlobalsAAWrapperPass>();
+}
+
+bool ReplaceWithLibdevice::runOnFunction(Function& function) {
+    bool modified = false;
+
+    // Try to replace math intrinsics.
+    std::vector<CallInst*> replaced_calls;
+    for (auto& instruction: instructions(function)) {
+        if (auto* call_inst = dyn_cast<CallInst>(&instruction)) {
+            if (replace_call(*call_inst)) {
+                replaced_calls.push_back(call_inst);
+                modified = true;
+            }
+        }
+    }
+
+    // Remove calls to replaced intrinsics.
+    for (auto* call_inst: replaced_calls) {
+        call_inst->eraseFromParent();
+    }
+
+    return modified;
+}
+
+bool ReplaceWithLibdevice::replace_call(CallInst& call_inst) {
+    Module* m = call_inst.getModule();
+    Function* function = call_inst.getCalledFunction();
+
+    // Replace math intrinsics only!
+    auto id = function->getIntrinsicID();
+    bool is_nvvm_intrinsic = id == Intrinsic::nvvm_read_ptx_sreg_ntid_x ||
+                             id == Intrinsic::nvvm_read_ptx_sreg_nctaid_x ||
+                             id == Intrinsic::nvvm_read_ptx_sreg_ctaid_x ||
+                             id == Intrinsic::nvvm_read_ptx_sreg_tid_x;
+    if (id == Intrinsic::not_intrinsic || is_nvvm_intrinsic)
+        return false;
+
+    // Map of supported replacements. For now it is only exp and pow.
+    static const std::map<std::string, std::string> libdevice_name = {{"llvm.exp.f32", "__nv_expf"},
+                                                                      {"llvm.exp.f64", "__nv_exp"},
+                                                                      {"llvm.pow.f32", "__nv_powf"},
+                                                                      {"llvm.pow.f64", "__nv_pow"}};
+
+    // If replacement is not supported, abort.
+    std::string old_name = function->getName().str();
+    auto it = libdevice_name.find(old_name);
+    if (it == libdevice_name.end())
+        throw std::runtime_error("Error: replacements for " + old_name + " are not supported!\n");
+
+    // Get (or create) libdevice function.
+    Function* libdevice_func = m->getFunction(it->second);
+    if (!libdevice_func) {
+        libdevice_func = Function::Create(function->getFunctionType(),
+                                          Function::ExternalLinkage,
+                                          it->second,
+                                          *m);
+        libdevice_func->copyAttributesFrom(function);
+    }
+
+    // Create a call to libdevice function with the same operands.
+    IRBuilder<> builder(&call_inst);
+    std::vector<Value*> args(call_inst.arg_operands().begin(), call_inst.arg_operands().end());
+    SmallVector<OperandBundleDef, 1> op_bundles;
+    call_inst.getOperandBundlesAsDefs(op_bundles);
+    CallInst* new_call = builder.CreateCall(libdevice_func, args, op_bundles);
+
+    // Replace all uses of old instruction with the new one. Also, copy
+    // fast math flags if necessary.
+    call_inst.replaceAllUsesWith(new_call);
+    if (isa<FPMathOperator>(new_call)) {
+        new_call->copyFastMathFlags(&call_inst);
+    }
+
+    return true;
+}
+
+char ReplaceWithLibdevice::ID = 0;
+static RegisterPass<ReplaceWithLibdevice> X("libdevice-replacement",
+                                            "Pass replacing math functions with calls to libdevice",
+                                            false,
+                                            false);
+
+}  // namespace llvm
diff --git a/src/codegen/llvm/replace_with_lib_functions.hpp b/src/codegen/llvm/replace_with_lib_functions.hpp
new file mode 100644
index 0000000000..5bf38ba85f
--- /dev/null
+++ b/src/codegen/llvm/replace_with_lib_functions.hpp
@@ -0,0 +1,65 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+#include "codegen/llvm/target_platform.hpp"
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Host.h"
+
+using nmodl::codegen::Platform;
+
+namespace llvm {
+
+/**
+ * \class ReplaceMathFunctions
+ * \brief A module LLVM pass that replaces math intrinsics with
+ * SIMD or libdevice library calls.
+ */
+class ReplaceMathFunctions: public ModulePass {
+  private:
+    const Platform* platform;
+
+  public:
+    static char ID;
+
+    ReplaceMathFunctions(const Platform& platform)
+        : ModulePass(ID)
+        , platform(&platform) {}
+
+    bool runOnModule(Module& module) override;
+
+  private:
+    /// Populates `tli` with vectorizable function definitions.
+    void add_vectorizable_functions_from_vec_lib(TargetLibraryInfoImpl& tli, Triple& triple);
+};
+
+/**
+ * \class ReplaceWithLibdevice
+ * \brief A function LLVM pass that replaces math intrinsics with
+ * libdevice library calls.
+ */
+class ReplaceWithLibdevice: public FunctionPass {
+  public:
+    static char ID;
+
+    ReplaceWithLibdevice()
+        : llvm::FunctionPass(ID) {}
+
+    void getAnalysisUsage(AnalysisUsage& au) const override;
+
+    bool runOnFunction(Function& function) override;
+
+  private:
+    /// Replaces call instruction to intrinsic with libdevice call.
+    bool replace_call(CallInst& call_inst);
+};
+
+}  // namespace llvm
diff --git a/src/codegen/llvm/target_platform.cpp b/src/codegen/llvm/target_platform.cpp
index fff195d6b8..bcab739fb3 100644
--- a/src/codegen/llvm/target_platform.cpp
+++ b/src/codegen/llvm/target_platform.cpp
@@ -15,24 +15,24 @@ namespace codegen {
 const std::string Platform::DEFAULT_PLATFORM_NAME = "default";
 const std::string Platform::DEFAULT_MATH_LIBRARY = "none";
 
-bool Platform::is_default_platform() {
+bool Platform::is_default_platform() const {
     // Default platform is a CPU.
     return platform_id == PlatformID::CPU && name == Platform::DEFAULT_PLATFORM_NAME;
 }
 
-bool Platform::is_cpu() {
+bool Platform::is_cpu() const {
     return platform_id == PlatformID::CPU;
 }
 
-bool Platform::is_cpu_with_simd() {
+bool Platform::is_cpu_with_simd() const {
     return platform_id == PlatformID::CPU && instruction_width > 1;
 }
 
-bool Platform::is_gpu() {
+bool Platform::is_gpu() const {
     return platform_id == PlatformID::GPU;
 }
 
-bool Platform::is_CUDA_gpu() {
+bool Platform::is_CUDA_gpu() const {
     return platform_id == PlatformID::GPU && (name == "nvptx" || name == "nvptx64");
 }
 
diff --git a/src/codegen/llvm/target_platform.hpp b/src/codegen/llvm/target_platform.hpp
index bed9e8923f..8676f176b4 100644
--- a/src/codegen/llvm/target_platform.hpp
+++ b/src/codegen/llvm/target_platform.hpp
@@ -81,19 +81,19 @@ class Platform {
         : platform_id(PlatformID::CPU) {}
 
     /// Checks if this platform is a default platform.
-    bool is_default_platform();
+    bool is_default_platform() const;
 
     /// Checks if this platform is a CPU.
-    bool is_cpu();
+    bool is_cpu() const;
 
     /// Checks if this platform is a CPU with SIMD support.
-    bool is_cpu_with_simd();
+    bool is_cpu_with_simd() const;
 
     /// Checks if this platform is a GPU.
-    bool is_gpu();
+    bool is_gpu() const;
 
     /// Checks if this platform is CUDA platform.
-    bool is_CUDA_gpu();
+    bool is_CUDA_gpu() const;
 
     bool is_single_precision();
 
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 5eb4132800..3edc6c198f 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -1366,7 +1366,6 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
             REQUIRE(std::regex_search(no_library_module_str, m, exp_decl));
             REQUIRE(std::regex_search(no_library_module_str, m, exp_call));
 
-#if LLVM_VERSION_MAJOR >= 13
             // Check exponential calls are replaced with calls to SVML library.
             std::string svml_library_module_str = run_llvm_visitor(nmodl_text,
                                                                    /*opt_level=*/0,
@@ -1444,7 +1443,6 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
             REQUIRE(std::regex_search(libsystem_m_library_module_str, m, libsystem_m_exp_decl));
             REQUIRE(std::regex_search(libsystem_m_library_module_str, m, libsystem_m_exp_call));
             REQUIRE(!std::regex_search(libsystem_m_library_module_str, m, fexp_call));
-#endif
         }
     }
 }
@@ -1701,4 +1699,47 @@ SCENARIO("GPU kernel body IR generation", "[visitor][llvm][gpu]") {
             REQUIRE(std::regex_search(module_string, m, load_as1));
         }
     }
+
+    GIVEN("When using math functions") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+                RANGE x, y
+            }
+
+            ASSIGNED { x y }
+
+            STATE { m }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+            }
+
+            DERIVATIVE states {
+              m = exp(y) + x ^ 2
+            }
+        )";
+
+        THEN("calls to libdevice are created") {
+            std::string module_string = run_gpu_llvm_visitor(nmodl_text,
+                                                             /*opt_level=*/3,
+                                                             /*use_single_precision=*/false,
+                                                             /*math_library=*/"libdevice");
+            std::smatch m;
+
+            // Check if exp and pow intrinsics have been replaced.
+            std::regex exp_declaration(R"(declare double @__nv_exp\(double\))");
+            std::regex exp_new_call(R"(call double @__nv_exp\(double %.*\))");
+            std::regex exp_old_call(R"(call double @llvm\.exp\.f64\(double %.*\))");
+            std::regex pow_declaration(R"(declare double @__nv_pow\(double, double\))");
+            std::regex pow_new_call(R"(call double @__nv_pow\(double %.*, double .*\))");
+            std::regex pow_old_call(R"(call double @llvm\.pow\.f64\(double %.*, double .*\))");
+            REQUIRE(std::regex_search(module_string, m, exp_declaration));
+            REQUIRE(std::regex_search(module_string, m, exp_new_call));
+            REQUIRE(!std::regex_search(module_string, m, exp_old_call));
+            REQUIRE(std::regex_search(module_string, m, pow_declaration));
+            REQUIRE(std::regex_search(module_string, m, pow_new_call));
+            REQUIRE(!std::regex_search(module_string, m, pow_old_call));
+        }
+    }
 }

From 5ffc590fb8d15770cc7e507c1a85d0051ee6fc00 Mon Sep 17 00:00:00 2001
From: Omar Awile <omar.awile@epfl.ch>
Date: Wed, 27 Apr 2022 14:32:20 +0200
Subject: [PATCH 084/105] JIT invocation from python for benchmarks (#832)

- Created CodegenDriver class to factor out ast preparation
- Created pybind wrappers for Jit and Codegen configuration options
- Updated benchmark runner to return runtime stats
- Return benchmark results to python
- Addressed @iomaganaris' comments.
- Add a PyJIT integration test
* Re-enable python bindings by default
---
 CMakeLists.txt                                |   2 +-
 src/codegen/CMakeLists.txt                    |   3 +-
 src/codegen/codegen_c_visitor.hpp             |   1 +
 src/codegen/codegen_driver.cpp                | 278 ++++++++
 src/codegen/codegen_driver.hpp                | 168 +++++
 src/codegen/llvm/codegen_llvm_visitor.cpp     |   1 +
 src/codegen/llvm/codegen_llvm_visitor.hpp     |   1 -
 src/codegen/llvm/target_platform.hpp          |   4 +-
 src/main.cpp                                  | 605 ++++--------------
 src/pybind/CMakeLists.txt                     |   6 +
 src/pybind/pynmodl.cpp                        | 111 ++++
 test/benchmark/CMakeLists.txt                 |  17 +
 test/benchmark/benchmark.py                   |  25 +
 test/benchmark/llvm_benchmark.cpp             |  50 +-
 test/benchmark/llvm_benchmark.hpp             |  14 +-
 test/unit/CMakeLists.txt                      |   6 +-
 test/unit/codegen/codegen_data_helper.hpp     |   7 +-
 test/unit/codegen/codegen_llvm_execution.cpp  |   8 +-
 .../codegen/codegen_llvm_instance_struct.cpp  |   2 +-
 19 files changed, 788 insertions(+), 521 deletions(-)
 create mode 100644 src/codegen/codegen_driver.cpp
 create mode 100644 src/codegen/codegen_driver.hpp
 create mode 100644 test/benchmark/benchmark.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8e4221ac17..550034d098 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,7 +20,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
 # =============================================================================
 # Build options for NMODL
 # =============================================================================
-option(NMODL_ENABLE_PYTHON_BINDINGS "Enable pybind11 based python bindings" OFF)
+option(NMODL_ENABLE_PYTHON_BINDINGS "Enable pybind11 based python bindings" ON)
 option(NMODL_ENABLE_LEGACY_UNITS "Use original faraday, R, etc. instead of 2019 nist constants" OFF)
 option(NMODL_ENABLE_LLVM "Enable LLVM based code generation" ON)
 option(NMODL_ENABLE_LLVM_GPU "Enable LLVM based GPU code generation" ON)
diff --git a/src/codegen/CMakeLists.txt b/src/codegen/CMakeLists.txt
index 214d38c077..8ae5e8b140 100644
--- a/src/codegen/CMakeLists.txt
+++ b/src/codegen/CMakeLists.txt
@@ -11,7 +11,8 @@ add_library(
   codegen_helper_visitor.cpp
   codegen_info.cpp
   codegen_ispc_visitor.cpp
-  codegen_utils.cpp)
+  codegen_utils.cpp
+  codegen_driver.cpp)
 add_dependencies(codegen lexer util visitor)
 target_link_libraries(codegen PRIVATE util)
 
diff --git a/src/codegen/codegen_c_visitor.hpp b/src/codegen/codegen_c_visitor.hpp
index b945554ba6..bf336819f3 100644
--- a/src/codegen/codegen_c_visitor.hpp
+++ b/src/codegen/codegen_c_visitor.hpp
@@ -36,6 +36,7 @@ namespace nmodl {
 /// encapsulates code generation backend implementations
 namespace codegen {
 
+using namespace fmt::literals;
 /**
  * @defgroup codegen Code Generation Implementation
  * @brief Implementations of code generation backends
diff --git a/src/codegen/codegen_driver.cpp b/src/codegen/codegen_driver.cpp
new file mode 100644
index 0000000000..542c880557
--- /dev/null
+++ b/src/codegen/codegen_driver.cpp
@@ -0,0 +1,278 @@
+/*************************************************************************
+ * Copyright (C) 2018-2022 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include <string>
+
+#include "codegen/codegen_driver.hpp"
+#include "codegen_compatibility_visitor.hpp"
+#include "utils/logger.hpp"
+#include "visitors/after_cvode_to_cnexp_visitor.hpp"
+#include "visitors/ast_visitor.hpp"
+#include "visitors/constant_folder_visitor.hpp"
+#include "visitors/global_var_visitor.hpp"
+#include "visitors/inline_visitor.hpp"
+#include "visitors/ispc_rename_visitor.hpp"
+#include "visitors/kinetic_block_visitor.hpp"
+#include "visitors/local_to_assigned_visitor.hpp"
+#include "visitors/local_var_rename_visitor.hpp"
+#include "visitors/localize_visitor.hpp"
+#include "visitors/loop_unroll_visitor.hpp"
+#include "visitors/neuron_solve_visitor.hpp"
+#include "visitors/nmodl_visitor.hpp"
+#include "visitors/perf_visitor.hpp"
+#include "visitors/semantic_analysis_visitor.hpp"
+#include "visitors/solve_block_visitor.hpp"
+#include "visitors/steadystate_visitor.hpp"
+#include "visitors/sympy_conductance_visitor.hpp"
+#include "visitors/sympy_solver_visitor.hpp"
+#include "visitors/symtab_visitor.hpp"
+#include "visitors/units_visitor.hpp"
+#include "visitors/verbatim_var_rename_visitor.hpp"
+
+using namespace nmodl;
+using namespace codegen;
+using namespace visitor;
+
+bool CodegenDriver::prepare_mod(std::shared_ptr<ast::Program> node) {
+    /// whether to update existing symbol table or create new
+    /// one whenever we run symtab visitor.
+    bool update_symtab = false;
+
+    std::string modfile;
+    std::string scratch_dir = "tmp";
+    auto filepath = [scratch_dir, modfile](const std::string& suffix, const std::string& ext) {
+        static int count = 0;
+        return fmt::format(
+            "{}/{}.{}.{}.{}", scratch_dir, modfile, std::to_string(count++), suffix, ext);
+    };
+
+    /// just visit the ast
+    AstVisitor().visit_program(*node);
+
+    /// construct symbol table
+    {
+        logger->info("Running symtab visitor");
+        SymtabVisitor(update_symtab).visit_program(*node);
+    }
+
+    /// Check some rules that ast should follow
+    {
+        logger->info("Running semantic analysis visitor");
+        if (SemanticAnalysisVisitor().check(*node)) {
+            return false;
+        }
+    }
+
+    /// use cnexp instead of after_cvode solve method
+    {
+        logger->info("Running CVode to cnexp visitor");
+        AfterCVodeToCnexpVisitor().visit_program(*node);
+        ast_to_nmodl(*node, filepath("after_cvode_to_cnexp", "mod"));
+    }
+
+    /// Rename variables that match ISPC compiler double constants
+    if (cfg.ispc_backend) {
+        logger->info("Running ISPC variables rename visitor");
+        IspcRenameVisitor(node).visit_program(*node);
+        SymtabVisitor(update_symtab).visit_program(*node);
+        ast_to_nmodl(*node, filepath("ispc_double_rename", "mod"));
+    }
+
+    /// GLOBAL to RANGE rename visitor
+    if (cfg.nmodl_global_to_range) {
+        // make sure to run perf visitor because code generator
+        // looks for read/write counts const/non-const declaration
+        PerfVisitor().visit_program(*node);
+        // make sure to run the GlobalToRange visitor after all the
+        // reinitializations of Symtab
+        logger->info("Running GlobalToRange visitor");
+        GlobalToRangeVisitor(*node).visit_program(*node);
+        SymtabVisitor(update_symtab).visit_program(*node);
+        ast_to_nmodl(*node, filepath("ispc_double_rename", "mod"));
+    }
+
+    /// LOCAL to ASSIGNED visitor
+    if (cfg.nmodl_local_to_range) {
+        logger->info("Running LOCAL to ASSIGNED visitor");
+        PerfVisitor().visit_program(*node);
+        LocalToAssignedVisitor().visit_program(*node);
+        SymtabVisitor(update_symtab).visit_program(*node);
+        ast_to_nmodl(*node, filepath("global_to_range", "mod"));
+    }
+
+    {
+        // Compatibility Checking
+        logger->info("Running code compatibility checker");
+        // run perfvisitor to update read/write counts
+        PerfVisitor().visit_program(*node);
+
+        auto ast_has_unhandled_nodes = CodegenCompatibilityVisitor().find_unhandled_ast_nodes(
+            *node);
+        // If we want to just check compatibility we return the result
+        if (cfg.only_check_compatibility) {
+            return !ast_has_unhandled_nodes;  // negate since this function returns false on failure
+        }
+
+        // If there is an incompatible construct and code generation is not forced exit NMODL
+        if (ast_has_unhandled_nodes && !cfg.force_codegen) {
+            return false;
+        }
+    }
+
+    ast_to_nmodl(*node, filepath("ast", "mod"));
+    ast_to_json(*node, filepath("ast", "json"));
+
+    if (cfg.verbatim_rename) {
+        logger->info("Running verbatim rename visitor");
+        VerbatimVarRenameVisitor().visit_program(*node);
+        ast_to_nmodl(*node, filepath("verbatim_rename", "mod"));
+    }
+
+    if (cfg.nmodl_const_folding) {
+        logger->info("Running nmodl constant folding visitor");
+        ConstantFolderVisitor().visit_program(*node);
+        ast_to_nmodl(*node, filepath("constfold", "mod"));
+    }
+
+    if (cfg.nmodl_unroll) {
+        logger->info("Running nmodl loop unroll visitor");
+        LoopUnrollVisitor().visit_program(*node);
+        ConstantFolderVisitor().visit_program(*node);
+        ast_to_nmodl(*node, filepath("unroll", "mod"));
+        SymtabVisitor(update_symtab).visit_program(*node);
+    }
+
+    /// note that we can not symtab visitor in update mode as we
+    /// replace kinetic block with derivative block of same name
+    /// in global scope
+    {
+        logger->info("Running KINETIC block visitor");
+        auto kineticBlockVisitor = KineticBlockVisitor();
+        kineticBlockVisitor.visit_program(*node);
+        SymtabVisitor(update_symtab).visit_program(*node);
+        const auto filename = filepath("kinetic", "mod");
+        ast_to_nmodl(*node, filename);
+        if (cfg.nmodl_ast && kineticBlockVisitor.get_conserve_statement_count()) {
+            logger->warn(
+                fmt::format("{} presents non-standard CONSERVE statements in DERIVATIVE blocks. Use it only for debugging/developing",
+                    filename));
+        }
+    }
+
+    {
+        logger->info("Running STEADYSTATE visitor");
+        SteadystateVisitor().visit_program(*node);
+        SymtabVisitor(update_symtab).visit_program(*node);
+        ast_to_nmodl(*node, filepath("steadystate", "mod"));
+    }
+
+    /// Parsing units fron "nrnunits.lib" and mod files
+    {
+        logger->info("Parsing Units");
+        UnitsVisitor(cfg.units_dir).visit_program(*node);
+    }
+
+    /// once we start modifying (especially removing) older constructs
+    /// from ast then we should run symtab visitor in update mode so
+    /// that old symbols (e.g. prime variables) are not lost
+    update_symtab = true;
+
+    if (cfg.nmodl_inline) {
+        logger->info("Running nmodl inline visitor");
+        InlineVisitor().visit_program(*node);
+        ast_to_nmodl(*node, filepath("inline", "mod"));
+    }
+
+    if (cfg.local_rename) {
+        logger->info("Running local variable rename visitor");
+        LocalVarRenameVisitor().visit_program(*node);
+        SymtabVisitor(update_symtab).visit_program(*node);
+        ast_to_nmodl(*node, filepath("local_rename", "mod"));
+    }
+
+    if (cfg.nmodl_localize) {
+        // localize pass must follow rename pass to avoid conflict
+        logger->info("Running localize visitor");
+        LocalizeVisitor(cfg.localize_verbatim).visit_program(*node);
+        LocalVarRenameVisitor().visit_program(*node);
+        SymtabVisitor(update_symtab).visit_program(*node);
+        ast_to_nmodl(*node, filepath("localize", "mod"));
+    }
+
+    if (cfg.sympy_conductance) {
+        logger->info("Running sympy conductance visitor");
+        SympyConductanceVisitor().visit_program(*node);
+        SymtabVisitor(update_symtab).visit_program(*node);
+        ast_to_nmodl(*node, filepath("sympy_conductance", "mod"));
+    }
+
+    if (cfg.sympy_analytic || sparse_solver_exists(*node)) {
+        if (!cfg.sympy_analytic) {
+            logger->info(
+                "Automatically enable sympy_analytic because it exists solver of type sparse");
+        }
+        logger->info("Running sympy solve visitor");
+        SympySolverVisitor(cfg.sympy_pade, cfg.sympy_cse).visit_program(*node);
+        SymtabVisitor(update_symtab).visit_program(*node);
+        ast_to_nmodl(*node, filepath("sympy_solve", "mod"));
+    }
+
+    {
+        logger->info("Running cnexp visitor");
+        NeuronSolveVisitor().visit_program(*node);
+        ast_to_nmodl(*node, filepath("cnexp", "mod"));
+    }
+
+    {
+        SolveBlockVisitor().visit_program(*node);
+        SymtabVisitor(update_symtab).visit_program(*node);
+        ast_to_nmodl(*node, filepath("solveblock", "mod"));
+    }
+
+    if (json_perfstat) {
+        std::string file{scratch_dir};
+        file.append("/");
+        file.append(modfile);
+        file.append(".perf.json");
+        logger->info("Writing performance statistics to {}", file);
+        PerfVisitor(file).visit_program(*ast);
+    }
+
+    {
+        // Add implicit arguments (like celsius, nt) to NEURON functions (like
+        // nrn_ghk, at_time) whose signatures we have to massage.
+        ImplicitArgumentVisitor{}.visit_program(*ast);
+        SymtabVisitor(update_symtab).visit_program(*ast);
+    }
+
+    {
+        // make sure to run perf visitor because code generator
+        // looks for read/write counts const/non-const declaration
+        PerfVisitor().visit_program(*node);
+    }
+
+    {
+        CodegenTransformVisitor{}.visit_program(*ast);
+        ast_to_nmodl(*ast, filepath("TransformVisitor"));
+        SymtabVisitor(update_symtab).visit_program(*ast);
+    }
+    return true;
+}
+
+void CodegenDriver::ast_to_nmodl(Program& ast, const std::string& filepath) const {
+    if (cfg.nmodl_ast) {
+        NmodlPrintVisitor(filepath).visit_program(ast);
+        logger->info("AST to NMODL transformation written to {}", filepath);
+    }
+};
+
+void CodegenDriver::ast_to_json(ast::Program& ast, const std::string& filepath) const {
+    if (cfg.json_ast) {
+        JSONVisitor(filepath).write(ast);
+        logger->info("AST to JSON transformation written to {}", filepath);
+    }
+};
diff --git a/src/codegen/codegen_driver.hpp b/src/codegen/codegen_driver.hpp
new file mode 100644
index 0000000000..aa2f9921d0
--- /dev/null
+++ b/src/codegen/codegen_driver.hpp
@@ -0,0 +1,168 @@
+/*************************************************************************
+ * Copyright (C) 2018-2022 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+
+#pragma once
+
+#include <string>
+#include <utility>
+
+#include "ast/program.hpp"
+#include "config/config.h"
+
+namespace nmodl {
+namespace codegen {
+
+struct CodeGenConfig {
+    /// true if serial c code to be generated
+    bool c_backend = true;
+
+    /// true if c code with openmp to be generated
+    bool omp_backend = false;
+
+    /// true if ispc code to be generated
+    bool ispc_backend = false;
+
+    /// true if c code with openacc to be generated
+    bool oacc_backend = false;
+
+    /// true if cuda code to be generated
+    bool cuda_backend = false;
+
+    /// true if llvm code to be generated
+    bool llvm_backend = false;
+
+    /// true if sympy should be used for solving ODEs analytically
+    bool sympy_analytic = false;
+
+    /// true if Pade approximation to be used
+    bool sympy_pade = false;
+
+    /// true if CSE (temp variables) to be used
+    bool sympy_cse = false;
+
+    /// true if conductance keyword can be added to breakpoint
+    bool sympy_conductance = false;
+
+    /// true if inlining at nmodl level to be done
+    bool nmodl_inline = false;
+
+    /// true if unroll at nmodl level to be done
+    bool nmodl_unroll = false;
+
+    /// true if perform constant folding at nmodl level to be done
+    bool nmodl_const_folding = false;
+
+    /// true if range variables to be converted to local
+    bool nmodl_localize = false;
+
+    /// true if global variables to be converted to range
+    bool nmodl_global_to_range = false;
+
+    /// true if top level local variables to be converted to range
+    bool nmodl_local_to_range = false;
+
+    /// true if localize variables even if verbatim block is used
+    bool localize_verbatim = false;
+
+    /// true if local variables to be renamed
+    bool local_rename = true;
+
+    /// true if inline even if verbatim block exist
+    bool verbatim_inline = false;
+
+    /// true if verbatim blocks
+    bool verbatim_rename = true;
+
+    /// true if code generation is forced to happen even if there
+    /// is any incompatibility
+    bool force_codegen = false;
+
+    /// true if we want to only check compatibility without generating code
+    bool only_check_compatibility = false;
+
+    /// true if ion variable copies should be avoided
+    bool optimize_ionvar_copies_codegen = false;
+
+    /// directory where code will be generated
+    std::string output_dir = ".";
+
+    /// directory where intermediate file will be generated
+    std::string scratch_dir = "tmp";
+
+    /// directory where units lib file is located
+    std::string units_dir = NrnUnitsLib::get_path();
+
+    /// floating point data type
+    std::string data_type = "double";
+
+    /// true if ast should be converted to nmodl
+    bool nmodl_ast = false;
+
+    /// true if ast should be converted to json
+    bool json_ast = false;
+
+    /// true if performance stats should be converted to json
+    bool json_perfstat = false;
+
+#ifdef NMODL_LLVM_BACKEND
+    /// generate llvm IR
+    bool llvm_ir = false;
+
+    /// use single precision floating-point types
+    bool llvm_float_type = false;
+
+    /// optimisation level for IR generation
+    int llvm_opt_level_ir = 0;
+
+    /// math library name
+    std::string llvm_math_library = "none";
+
+    /// disable debug information generation for the IR
+    bool llvm_no_debug = false;
+
+    /// fast math flags for LLVM backend
+    std::vector<std::string> llvm_fast_math_flags;
+
+    /// traget CPU platform name
+    std::string llvm_cpu_name = "default";
+
+    /// traget GPU platform name
+    std::string llvm_gpu_name = "default";
+
+    /// GPU target architecture
+    std::string llvm_gpu_target_architecture = "sm_70";
+
+    /// llvm vector width if generating code for CPUs
+    int llvm_vector_width = 1;
+
+    /// optimisation level for machine code generation
+    int llvm_opt_level_codegen = 0;
+
+    /// list of shared libraries to link against in JIT
+    std::vector<std::string> shared_lib_paths;
+#endif
+};
+
+class CodegenDriver {
+  public:
+    explicit CodegenDriver(CodeGenConfig _cfg)
+        : cfg(std::move(_cfg)) {}
+
+    bool prepare_mod(std::shared_ptr<nmodl::ast::Program> node);
+
+  private:
+    CodeGenConfig cfg;
+
+
+    /// write ast to nmodl
+    void ast_to_nmodl(ast::Program& ast, const std::string& filepath) const;
+    void ast_to_json(ast::Program& ast, const std::string& filepath) const;
+};
+
+}  // namespace codegen
+}  // namespace nmodl
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index ca3b405be3..8dfb66e9e0 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -9,6 +9,7 @@
 #include "codegen/llvm/llvm_utils.hpp"
 
 #include "ast/all.hpp"
+#include "utils/logger.hpp"
 #include "visitors/rename_visitor.hpp"
 #include "visitors/visitor_utils.hpp"
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 299071ae80..a7af83721c 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -23,7 +23,6 @@
 #include "codegen/llvm/llvm_debug_builder.hpp"
 #include "codegen/llvm/llvm_ir_builder.hpp"
 #include "symtab/symbol_table.hpp"
-#include "utils/logger.hpp"
 #include "visitors/ast_visitor.hpp"
 
 #include "llvm/IR/DIBuilder.h"
diff --git a/src/codegen/llvm/target_platform.hpp b/src/codegen/llvm/target_platform.hpp
index 8676f176b4..d15f220d1c 100644
--- a/src/codegen/llvm/target_platform.hpp
+++ b/src/codegen/llvm/target_platform.hpp
@@ -27,12 +27,12 @@ class Platform {
 
   private:
     /// Name of the platform.
-    const std::string name = Platform::DEFAULT_PLATFORM_NAME;
+    std::string name = Platform::DEFAULT_PLATFORM_NAME;
 
     /// Target chip for GPUs.
     /// TODO: this should only be available to GPUs! If we refactor target
     /// classes so that GPUPlatform <: Platform, it will be nicer!
-    const std::string subtarget_name = "sm_70";
+    std::string subtarget_name = "sm_70";
 
     /// Target-specific id to compare platforms easily.
     PlatformID platform_id;
diff --git a/src/main.cpp b/src/main.cpp
index 1aa1de992e..55cea74673 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -10,10 +10,8 @@
 
 #include <CLI/CLI.hpp>
 
-#include "ast/program.hpp"
 #include "codegen/codegen_acc_visitor.hpp"
 #include "codegen/codegen_c_visitor.hpp"
-#include "codegen/codegen_compatibility_visitor.hpp"
 #include "codegen/codegen_cuda_visitor.hpp"
 #include "codegen/codegen_ispc_visitor.hpp"
 #include "codegen/codegen_transform_visitor.hpp"
@@ -22,37 +20,14 @@
 #include "test/benchmark/llvm_benchmark.hpp"
 #endif
 
+#include "codegen/codegen_driver.hpp"
 #include "config/config.h"
 #include "parser/nmodl_driver.hpp"
 #include "pybind/pyembed.hpp"
 #include "utils/common_utils.hpp"
 #include "utils/logger.hpp"
-#include "visitors/after_cvode_to_cnexp_visitor.hpp"
-#include "visitors/ast_visitor.hpp"
-#include "visitors/constant_folder_visitor.hpp"
-#include "visitors/global_var_visitor.hpp"
-#include "visitors/implicit_argument_visitor.hpp"
-#include "visitors/indexedname_visitor.hpp"
-#include "visitors/inline_visitor.hpp"
-#include "visitors/ispc_rename_visitor.hpp"
 #include "visitors/json_visitor.hpp"
-#include "visitors/kinetic_block_visitor.hpp"
-#include "visitors/local_to_assigned_visitor.hpp"
-#include "visitors/local_var_rename_visitor.hpp"
-#include "visitors/localize_visitor.hpp"
-#include "visitors/loop_unroll_visitor.hpp"
-#include "visitors/neuron_solve_visitor.hpp"
 #include "visitors/nmodl_visitor.hpp"
-#include "visitors/perf_visitor.hpp"
-#include "visitors/semantic_analysis_visitor.hpp"
-#include "visitors/solve_block_visitor.hpp"
-#include "visitors/steadystate_visitor.hpp"
-#include "visitors/sympy_conductance_visitor.hpp"
-#include "visitors/sympy_solver_visitor.hpp"
-#include "visitors/symtab_visitor.hpp"
-#include "visitors/units_visitor.hpp"
-#include "visitors/verbatim_var_rename_visitor.hpp"
-#include "visitors/verbatim_visitor.hpp"
 
 /**
  * \dir
@@ -75,91 +50,6 @@ int main(int argc, const char* argv[]) {
     /// true if debug logger statements should be shown
     std::string verbose("info");
 
-    /// true if serial c code to be generated
-    bool c_backend(true);
-
-    /// true if ispc code to be generated
-    bool ispc_backend(false);
-
-    /// true if c code with openacc to be generated
-    bool oacc_backend(false);
-
-    /// true if cuda code to be generated
-    bool cuda_backend(false);
-
-    /// true if llvm code to be generated
-    bool llvm_backend(false);
-
-    /// true if sympy should be used for solving ODEs analytically
-    bool sympy_analytic(false);
-
-    /// true if Pade approximation to be used
-    bool sympy_pade(false);
-
-    /// true if CSE (temp variables) to be used
-    bool sympy_cse(false);
-
-    /// true if conductance keyword can be added to breakpoint
-    bool sympy_conductance(false);
-
-    /// true if inlining at nmodl level to be done
-    bool nmodl_inline(false);
-
-    /// true if unroll at nmodl level to be done
-    bool nmodl_unroll(false);
-
-    /// true if perform constant folding at nmodl level to be done
-    bool nmodl_const_folding(false);
-
-    /// true if range variables to be converted to local
-    bool nmodl_localize(false);
-
-    /// true if global variables to be converted to range
-    bool nmodl_global_to_range(false);
-
-    /// true if top level local variables to be converted to range
-    bool nmodl_local_to_range(false);
-
-    /// true if localize variables even if verbatim block is used
-    bool localize_verbatim(false);
-
-    /// true if local variables to be renamed
-    bool local_rename(true);
-
-    /// true if inline even if verbatim block exist
-    bool verbatim_inline(false);
-
-    /// true if verbatim blocks
-    bool verbatim_rename(true);
-
-    /// true if code generation is forced to happen even if there
-    /// is any incompatibility
-    bool force_codegen(false);
-
-    /// true if we want to only check compatibility without generating code
-    bool only_check_compatibility(false);
-
-    /// true if ion variable copies should be avoided
-    bool optimize_ionvar_copies_codegen(false);
-
-    /// directory where code will be generated
-    std::string output_dir(".");
-
-    /// directory where intermediate file will be generated
-    std::string scratch_dir("tmp");
-
-    /// directory where units lib file is located
-    std::string units_dir(NrnUnitsLib::get_path());
-
-    /// true if ast should be converted to json
-    bool json_ast(false);
-
-    /// true if ast should be converted to nmodl
-    bool nmodl_ast(false);
-
-    /// true if performance stats should be converted to json
-    bool json_perfstat(false);
-
     /// true if symbol table should be printed
     bool show_symtab(false);
 
@@ -167,42 +57,9 @@ int main(int argc, const char* argv[]) {
     std::string data_type("double");
 
 #ifdef NMODL_LLVM_BACKEND
-    /// generate llvm IR
-    bool llvm_ir(false);
-
-    /// use single precision floating-point types
-    bool llvm_float_type(false);
-
-    /// optimisation level for IR generation
-    int llvm_opt_level_ir = 0;
-
-    /// math library name
-    std::string llvm_math_library("none");
-
-    /// disable debug information generation for the IR
-    bool llvm_no_debug(false);
-
-    /// fast math flags for LLVM backend
-    std::vector<std::string> llvm_fast_math_flags;
-
-    /// traget CPU platform name
-    std::string llvm_cpu_name = "default";
-
-    /// traget GPU platform name
-    std::string llvm_gpu_name = "default";
-
-    /// llvm vector width if generating code for CPUs
-    int llvm_vector_width = 1;
-
     /// run llvm benchmark
     bool llvm_benchmark(false);
 
-    /// optimisation level for machine code generation
-    int llvm_opt_level_codegen = 0;
-
-    /// list of shared libraries to link against in JIT
-    std::vector<std::string> shared_lib_paths;
-
     /// the size of the instance struct for the benchmark
     int instance_size = 10000;
 
@@ -210,6 +67,8 @@ int main(int argc, const char* argv[]) {
     int num_experiments = 100;
 #endif
 
+    CodeGenConfig cfg;
+
     // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers,readability-magic-numbers)
     app.get_formatter()->column_width(40);
     app.set_help_all_flag("-H,--help-all", "Print this help message including all sub-commands");
@@ -224,156 +83,156 @@ int main(int argc, const char* argv[]) {
         ->required()
         ->check(CLI::ExistingFile);
 
-    app.add_option("-o,--output", output_dir, "Directory for backend code output")
+    app.add_option("-o,--output", cfg.output_dir, "Directory for backend code output")
         ->capture_default_str()
         ->ignore_case();
-    app.add_option("--scratch", scratch_dir, "Directory for intermediate code output")
+    app.add_option("--scratch", cfg.scratch_dir, "Directory for intermediate code output")
         ->capture_default_str()
         ->ignore_case();
-    app.add_option("--units", units_dir, "Directory of units lib file")
+    app.add_option("--units", cfg.units_dir, "Directory of units lib file")
         ->capture_default_str()
         ->ignore_case();
 
     auto host_opt = app.add_subcommand("host", "HOST/CPU code backends")->ignore_case();
-    host_opt->add_flag("--c", c_backend, fmt::format("C/C++ backend ({})", c_backend))
+    host_opt->add_flag("--c", cfg.c_backend, fmt::format("C/C++ backend ({})", cfg.c_backend))
         ->ignore_case();
     host_opt
         ->add_flag("--ispc",
-                   ispc_backend,
-                   fmt::format("C/C++ backend with ISPC ({})", ispc_backend))
+                   cfg.ispc_backend,
+                   fmt::format("C/C++ backend with ISPC ({})", cfg.ispc_backend))
         ->ignore_case();
 
     auto acc_opt = app.add_subcommand("acc", "Accelerator code backends")->ignore_case();
     acc_opt
         ->add_flag("--oacc",
-                   oacc_backend,
-                   fmt::format("C/C++ backend with OpenACC ({})", oacc_backend))
+                   cfg.oacc_backend,
+                   fmt::format("C/C++ backend with OpenACC ({})", cfg.oacc_backend))
         ->ignore_case();
     acc_opt
         ->add_flag("--cuda",
-                   cuda_backend,
-                   fmt::format("C/C++ backend with CUDA ({})", cuda_backend))
+                   cfg.cuda_backend,
+                   fmt::format("C/C++ backend with CUDA ({})", cfg.cuda_backend))
         ->ignore_case();
 
     // clang-format off
     auto sympy_opt = app.add_subcommand("sympy", "SymPy based analysis and optimizations")->ignore_case();
     sympy_opt->add_flag("--analytic",
-        sympy_analytic,
-        fmt::format("Solve ODEs using SymPy analytic integration ({})", sympy_analytic))->ignore_case();
+        cfg.sympy_analytic,
+        fmt::format("Solve ODEs using SymPy analytic integration ({})", cfg.sympy_analytic))->ignore_case();
     sympy_opt->add_flag("--pade",
-        sympy_pade,
-        fmt::format("Pade approximation in SymPy analytic integration ({})", sympy_pade))->ignore_case();
+        cfg.sympy_pade,
+        fmt::format("Pade approximation in SymPy analytic integration ({})", cfg.sympy_pade))->ignore_case();
     sympy_opt->add_flag("--cse",
-        sympy_cse,
-        fmt::format("CSE (Common Subexpression Elimination) in SymPy analytic integration ({})", sympy_cse))->ignore_case();
+        cfg.sympy_cse,
+        fmt::format("CSE (Common Subexpression Elimination) in SymPy analytic integration ({})", cfg.sympy_cse))->ignore_case();
     sympy_opt->add_flag("--conductance",
-        sympy_conductance,
-        fmt::format("Add CONDUCTANCE keyword in BREAKPOINT ({})", sympy_conductance))->ignore_case();
+        cfg.sympy_conductance,
+        fmt::format("Add CONDUCTANCE keyword in BREAKPOINT ({})", cfg.sympy_conductance))->ignore_case();
 
     auto passes_opt = app.add_subcommand("passes", "Analyse/Optimization passes")->ignore_case();
     passes_opt->add_flag("--inline",
-        nmodl_inline,
-        fmt::format("Perform inlining at NMODL level ({})", nmodl_inline))->ignore_case();
+        cfg.nmodl_inline,
+        fmt::format("Perform inlining at NMODL level ({})", cfg.nmodl_inline))->ignore_case();
     passes_opt->add_flag("--unroll",
-        nmodl_unroll,
-        fmt::format("Perform loop unroll at NMODL level ({})", nmodl_unroll))->ignore_case();
+        cfg.nmodl_unroll,
+        fmt::format("Perform loop unroll at NMODL level ({})", cfg.nmodl_unroll))->ignore_case();
     passes_opt->add_flag("--const-folding",
-        nmodl_const_folding,
-        fmt::format("Perform constant folding at NMODL level ({})", nmodl_const_folding))->ignore_case();
+        cfg.nmodl_const_folding,
+        fmt::format("Perform constant folding at NMODL level ({})", cfg.nmodl_const_folding))->ignore_case();
     passes_opt->add_flag("--localize",
-        nmodl_localize,
-        fmt::format("Convert RANGE variables to LOCAL ({})", nmodl_localize))->ignore_case();
+        cfg.nmodl_localize,
+        fmt::format("Convert RANGE variables to LOCAL ({})", cfg.nmodl_localize))->ignore_case();
     passes_opt->add_flag("--global-to-range",
-         nmodl_global_to_range,
-         fmt::format("Convert GLOBAL variables to RANGE ({})", nmodl_global_to_range))->ignore_case();
+         cfg.nmodl_global_to_range,
+         fmt::format("Convert GLOBAL variables to RANGE ({})", cfg.nmodl_global_to_range))->ignore_case();
     passes_opt->add_flag("--local-to-range",
-         nmodl_local_to_range,
-         fmt::format("Convert top level LOCAL variables to RANGE ({})", nmodl_local_to_range))->ignore_case();
+         cfg.nmodl_local_to_range,
+         fmt::format("Convert top level LOCAL variables to RANGE ({})", cfg.nmodl_local_to_range))->ignore_case();
     passes_opt->add_flag("--localize-verbatim",
-        localize_verbatim,
-        fmt::format("Convert RANGE variables to LOCAL even if verbatim block exist ({})", localize_verbatim))->ignore_case();
+        cfg.localize_verbatim,
+        fmt::format("Convert RANGE variables to LOCAL even if verbatim block exist ({})", cfg.localize_verbatim))->ignore_case();
     passes_opt->add_flag("--local-rename",
-        local_rename,
-        fmt::format("Rename LOCAL variable if variable of same name exist in global scope ({})", local_rename))->ignore_case();
+        cfg.local_rename,
+        fmt::format("Rename LOCAL variable if variable of same name exist in global scope ({})", cfg.local_rename))->ignore_case();
     passes_opt->add_flag("--verbatim-inline",
-        verbatim_inline,
-        fmt::format("Inline even if verbatim block exist ({})", verbatim_inline))->ignore_case();
+        cfg.verbatim_inline,
+        fmt::format("Inline even if verbatim block exist ({})", cfg.verbatim_inline))->ignore_case();
     passes_opt->add_flag("--verbatim-rename",
-        verbatim_rename,
-        fmt::format("Rename variables in verbatim block ({})", verbatim_rename))->ignore_case();
+        cfg.verbatim_rename,
+        fmt::format("Rename variables in verbatim block ({})", cfg.verbatim_rename))->ignore_case();
     passes_opt->add_flag("--json-ast",
-        json_ast,
-        fmt::format("Write AST to JSON file ({})", json_ast))->ignore_case();
+        cfg.json_ast,
+        fmt::format("Write AST to JSON file ({})", cfg.json_ast))->ignore_case();
     passes_opt->add_flag("--nmodl-ast",
-        nmodl_ast,
-        fmt::format("Write AST to NMODL file ({})", nmodl_ast))->ignore_case();
+        cfg.nmodl_ast,
+        fmt::format("Write AST to NMODL file ({})", cfg.nmodl_ast))->ignore_case();
     passes_opt->add_flag("--json-perf",
-        json_perfstat,
-        fmt::format("Write performance statistics to JSON file ({})", json_perfstat))->ignore_case();
+        cfg.json_perfstat,
+        fmt::format("Write performance statistics to JSON file ({})", cfg.json_perfstat))->ignore_case();
     passes_opt->add_flag("--show-symtab",
         show_symtab,
         fmt::format("Write symbol table to stdout ({})", show_symtab))->ignore_case();
 
     auto codegen_opt = app.add_subcommand("codegen", "Code generation options")->ignore_case();
     codegen_opt->add_option("--datatype",
-        data_type,
+        cfg.data_type,
         "Data type for floating point variables")->capture_default_str()->ignore_case()->check(CLI::IsMember({"float", "double"}));
     codegen_opt->add_flag("--force",
-        force_codegen,
+        cfg.force_codegen,
         "Force code generation even if there is any incompatibility");
     codegen_opt->add_flag("--only-check-compatibility",
-                          only_check_compatibility,
+                          cfg.only_check_compatibility,
                           "Check compatibility and return without generating code");
     codegen_opt->add_flag("--opt-ionvar-copy",
-        optimize_ionvar_copies_codegen,
-        fmt::format("Optimize copies of ion variables ({})", optimize_ionvar_copies_codegen))->ignore_case();
+        cfg.optimize_ionvar_copies_codegen,
+        fmt::format("Optimize copies of ion variables ({})", cfg.optimize_ionvar_copies_codegen))->ignore_case();
 
 #ifdef NMODL_LLVM_BACKEND
 
     // LLVM IR code generation options.
     auto llvm_opt = app.add_subcommand("llvm", "LLVM code generation option")->ignore_case();
     auto llvm_ir_opt = llvm_opt->add_flag("--ir",
-        llvm_ir,
-        fmt::format("Generate LLVM IR ({})", llvm_ir))->ignore_case();
+        cfg.llvm_ir,
+        fmt::format("Generate LLVM IR ({})", cfg.llvm_ir))->ignore_case();
     llvm_ir_opt->required(true);
     llvm_opt->add_flag("--no-debug",
-        llvm_no_debug,
-        fmt::format("Disable debug information ({})", llvm_no_debug))->ignore_case();
+        cfg.llvm_no_debug,
+        fmt::format("Disable debug information ({})", cfg.llvm_no_debug))->ignore_case();
     llvm_opt->add_option("--opt-level-ir",
-        llvm_opt_level_ir,
-        fmt::format("LLVM IR optimisation level (O{})", llvm_opt_level_ir))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"}));
+        cfg.llvm_opt_level_ir,
+        fmt::format("LLVM IR optimisation level (O{})", cfg.llvm_opt_level_ir))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"}));
     llvm_opt->add_flag("--single-precision",
-        llvm_float_type,
-        fmt::format("Use single precision floating-point types ({})", llvm_float_type))->ignore_case();
+        cfg.llvm_float_type,
+        fmt::format("Use single precision floating-point types ({})", cfg.llvm_float_type))->ignore_case();
     llvm_opt->add_option("--fmf",
-        llvm_fast_math_flags,
+        cfg.llvm_fast_math_flags,
         "Fast math flags for floating-point optimizations (none)")->check(CLI::IsMember({"afn", "arcp", "contract", "ninf", "nnan", "nsz", "reassoc", "fast"}));
 
     // Platform options for LLVM code generation.
     auto cpu_opt = app.add_subcommand("cpu", "LLVM CPU option")->ignore_case();
     cpu_opt->needs(llvm_opt);
     cpu_opt->add_option("--name",
-        llvm_cpu_name,
+        cfg.llvm_cpu_name,
         "Name of CPU platform to use")->ignore_case();
     auto simd_math_library_opt = cpu_opt->add_option("--math-library",
-        llvm_math_library,
-        "Math library for SIMD code generation ({})"_format(llvm_math_library));
+        cfg.llvm_math_library,
+        "Math library for SIMD code generation ({})"_format(cfg.llvm_math_library));
     simd_math_library_opt->check(CLI::IsMember({"Accelerate", "libmvec", "libsystem_m", "MASSV", "SLEEF", "SVML", "none"}));
     cpu_opt->add_option("--vector-width",
-        llvm_vector_width,
-        "Explicit vectorization width for IR generation ({})"_format(llvm_vector_width))->ignore_case();
+        cfg.llvm_vector_width,
+        "Explicit vectorization width for IR generation ({})"_format(cfg.llvm_vector_width))->ignore_case();
 
     auto gpu_opt = app.add_subcommand("gpu", "LLVM GPU option")->ignore_case();
     gpu_opt->needs(llvm_opt);
     auto gpu_target_name = gpu_opt->add_option("--name",
-        llvm_gpu_name,
+        cfg.llvm_gpu_name,
         "Name of GPU platform to use")->ignore_case();
    gpu_opt->add_option("--target-chip",
-        llvm_cpu_name,
+        cfg.llvm_gpu_target_architecture,
         "Name of target chip to use")->ignore_case();
     auto gpu_math_library_opt = gpu_opt->add_option("--math-library",
-        llvm_math_library,
-        "Math library for GPU code generation ({})"_format(llvm_math_library));
+        cfg.llvm_math_library,
+        "Math library for GPU code generation ({})"_format(cfg.llvm_math_library));
     gpu_math_library_opt->check(CLI::IsMember({"libdevice"}));
 
     // Allow only one platform at a time.
@@ -387,9 +246,9 @@ int main(int argc, const char* argv[]) {
                             llvm_benchmark,
                             fmt::format("Run LLVM benchmark ({})", llvm_benchmark))->ignore_case();
     benchmark_opt->add_option("--opt-level-codegen",
-                              llvm_opt_level_codegen,
-                              fmt::format("Machine code optimisation level (O{})", llvm_opt_level_codegen))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"}));
-    benchmark_opt->add_option("--libs", shared_lib_paths, "Shared libraries to link IR against")
+                              cfg.llvm_opt_level_codegen,
+                              fmt::format("Machine code optimisation level (O{})", cfg.llvm_opt_level_codegen))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"}));
+    benchmark_opt->add_option("--libs", cfg.shared_lib_paths, "Shared libraries to link IR against")
             ->ignore_case()
             ->check(CLI::ExistingFile);
     benchmark_opt->add_option("--instance-size",
@@ -404,12 +263,12 @@ int main(int argc, const char* argv[]) {
     CLI11_PARSE(app, argc, argv);
 
     // if any of the other backends is used we force the C backend to be off.
-    if (ispc_backend) {
+    if (cfg.ispc_backend) {
         c_backend = false;
     }
 
-    utils::make_path(output_dir);
-    utils::make_path(scratch_dir);
+    utils::make_path(cfg.output_dir);
+    utils::make_path(cfg.scratch_dir);
 
     if (sympy_opt) {
         nmodl::pybind_wrappers::EmbeddedPythonLoader::get_instance()
@@ -419,21 +278,6 @@ int main(int argc, const char* argv[]) {
 
     logger->set_level(spdlog::level::from_str(verbose));
 
-    /// write ast to nmodl
-    const auto ast_to_nmodl = [nmodl_ast](ast::Program& ast, const std::string& filepath) {
-        if (nmodl_ast) {
-            NmodlPrintVisitor(filepath).visit_program(ast);
-            logger->info("AST to NMODL transformation written to {}", filepath);
-        }
-    };
-
-    /// write ast to nmodl
-    const auto ast_to_json = [json_ast](ast::Program& ast, const std::string& filepath) {
-        if (json_ast) {
-            JSONVisitor(filepath).write(ast);
-            logger->info("AST to JSON transformation written to {}", filepath);
-        }
-    };
 
     for (const auto& file: mod_files) {
         logger->info("Processing {}", file);
@@ -441,92 +285,20 @@ int main(int argc, const char* argv[]) {
         const auto modfile = utils::remove_extension(utils::base_name(file));
 
         /// create file path for nmodl file
-        auto filepath = [scratch_dir, modfile](const std::string& suffix, const std::string& ext) {
+        auto filepath = [cfg, modfile](const std::string& suffix, const std::string& ext) {
             static int count = 0;
             return fmt::format(
-                "{}/{}.{}.{}.{}", scratch_dir, modfile, std::to_string(count++), suffix, ext);
+                "{}/{}.{}.{}.{}", cfg.scratch_dir, modfile, std::to_string(count++), suffix, ext);
         };
 
-        /// driver object creates lexer and parser, just call parser method
-        NmodlDriver driver;
+        /// nmodl_driver object creates lexer and parser, just call parser method
+        NmodlDriver nmodl_driver;
 
         /// parse mod file and construct ast
-        const auto& ast = driver.parse_file(file);
-
-        /// whether to update existing symbol table or create new
-        /// one whenever we run symtab visitor.
-        bool update_symtab = false;
+        const auto& ast = nmodl_driver.parse_file(file);
 
-        /// just visit the ast
-        AstVisitor().visit_program(*ast);
-
-        /// construct symbol table
-        {
-            logger->info("Running symtab visitor");
-            SymtabVisitor(update_symtab).visit_program(*ast);
-        }
-
-        /// Check some rules that ast should follow
-        {
-            logger->info("Running semantic analysis visitor");
-            if (SemanticAnalysisVisitor().check(*ast)) {
-                return 1;
-            }
-        }
-
-        /// use cnexp instead of after_cvode solve method
-        {
-            logger->info("Running CVode to cnexp visitor");
-            AfterCVodeToCnexpVisitor().visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("after_cvode_to_cnexp", "mod"));
-        }
-
-        /// Rename variables that match ISPC compiler double constants
-        if (ispc_backend) {
-            logger->info("Running ISPC variables rename visitor");
-            IspcRenameVisitor(ast).visit_program(*ast);
-            SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("ispc_double_rename", "mod"));
-        }
-
-        /// GLOBAL to RANGE rename visitor
-        if (nmodl_global_to_range) {
-            // make sure to run perf visitor because code generator
-            // looks for read/write counts const/non-const declaration
-            PerfVisitor().visit_program(*ast);
-            // make sure to run the GlobalToRange visitor after all the
-            // reinitializations of Symtab
-            logger->info("Running GlobalToRange visitor");
-            GlobalToRangeVisitor(*ast).visit_program(*ast);
-            SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("global_to_range", "mod"));
-        }
-
-        /// LOCAL to ASSIGNED visitor
-        if (nmodl_local_to_range) {
-            logger->info("Running LOCAL to ASSIGNED visitor");
-            PerfVisitor().visit_program(*ast);
-            LocalToAssignedVisitor().visit_program(*ast);
-            SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("local_to_assigned", "mod"));
-        }
-
-        {
-            // Compatibility Checking
-            logger->info("Running code compatibility checker");
-            // run perfvisitor to update read/write counts
-            PerfVisitor().visit_program(*ast);
-
-            // If we want to just check compatibility we return the result
-            if (only_check_compatibility) {
-                return CodegenCompatibilityVisitor().find_unhandled_ast_nodes(*ast);
-            }
-
-            // If there is an incompatible construct and code generation is not forced exit NMODL
-            if (CodegenCompatibilityVisitor().find_unhandled_ast_nodes(*ast) && !force_codegen) {
-                return 1;
-            }
-        }
+        auto cg_driver = CodegenDriver(cfg);
+        auto success = cg_driver.prepare_mod(ast);
 
         if (show_symtab) {
             logger->info("Printing symbol table");
@@ -534,210 +306,89 @@ int main(int argc, const char* argv[]) {
             symtab->print(std::cout);
         }
 
-        ast_to_nmodl(*ast, filepath("ast", "mod"));
-        ast_to_json(*ast, filepath("ast", "json"));
-
-        if (verbatim_rename) {
-            logger->info("Running verbatim rename visitor");
-            VerbatimVarRenameVisitor().visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("verbatim_rename", "mod"));
-        }
-
-        if (nmodl_const_folding) {
-            logger->info("Running nmodl constant folding visitor");
-            ConstantFolderVisitor().visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("constfold", "mod"));
-        }
-
-        if (nmodl_unroll) {
-            logger->info("Running nmodl loop unroll visitor");
-            LoopUnrollVisitor().visit_program(*ast);
-            ConstantFolderVisitor().visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("unroll", "mod"));
-            SymtabVisitor(update_symtab).visit_program(*ast);
-        }
-
-        /// note that we can not symtab visitor in update mode as we
-        /// replace kinetic block with derivative block of same name
-        /// in global scope
-        {
-            logger->info("Running KINETIC block visitor");
-            auto kineticBlockVisitor = KineticBlockVisitor();
-            kineticBlockVisitor.visit_program(*ast);
-            SymtabVisitor(update_symtab).visit_program(*ast);
-            const auto filename = filepath("kinetic", "mod");
-            ast_to_nmodl(*ast, filename);
-            if (nmodl_ast && kineticBlockVisitor.get_conserve_statement_count()) {
-                logger->warn(
-                    fmt::format("{} presents non-standard CONSERVE statements in DERIVATIVE "
-                                "blocks. Use it only for debugging/developing",
-                                filename));
-            }
-        }
-
-        {
-            logger->info("Running STEADYSTATE visitor");
-            SteadystateVisitor().visit_program(*ast);
-            SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("steadystate", "mod"));
-        }
-
-        /// Parsing units fron "nrnunits.lib" and mod files
-        {
-            logger->info("Parsing Units");
-            UnitsVisitor(units_dir).visit_program(*ast);
-        }
-
-        /// once we start modifying (especially removing) older constructs
-        /// from ast then we should run symtab visitor in update mode so
-        /// that old symbols (e.g. prime variables) are not lost
-        update_symtab = true;
-
-        if (nmodl_inline) {
-            logger->info("Running nmodl inline visitor");
-            InlineVisitor().visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("inline", "mod"));
-        }
-
-        if (local_rename) {
-            logger->info("Running local variable rename visitor");
-            LocalVarRenameVisitor().visit_program(*ast);
-            SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("local_rename", "mod"));
-        }
-
-        if (nmodl_localize) {
-            // localize pass must follow rename pass to avoid conflict
-            logger->info("Running localize visitor");
-            LocalizeVisitor(localize_verbatim).visit_program(*ast);
-            LocalVarRenameVisitor().visit_program(*ast);
-            SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("localize", "mod"));
-        }
-
-        if (sympy_conductance) {
-            logger->info("Running sympy conductance visitor");
-            SympyConductanceVisitor().visit_program(*ast);
-            SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("sympy_conductance", "mod"));
+        if (cfg.only_check_compatibility) {
+            return !success;
         }
-
-        if (sympy_analytic || sparse_solver_exists(*ast)) {
-            if (!sympy_analytic) {
-                logger->info(
-                    "Automatically enable sympy_analytic because it exists solver of type sparse");
-            }
-            logger->info("Running sympy solve visitor");
-            SympySolverVisitor(sympy_pade, sympy_cse).visit_program(*ast);
-            SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("sympy_solve", "mod"));
+        if (!success && !cfg.force_codegen) {
+            return 1;
         }
 
         {
-            logger->info("Running cnexp visitor");
-            NeuronSolveVisitor().visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("cnexp", "mod"));
-        }
-
-        {
-            SolveBlockVisitor().visit_program(*ast);
-            SymtabVisitor(update_symtab).visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("solveblock", "mod"));
-        }
-
-        if (json_perfstat) {
-            std::string file{scratch_dir};
-            file.append("/");
-            file.append(modfile);
-            file.append(".perf.json");
-            logger->info("Writing performance statistics to {}", file);
-            PerfVisitor(file).visit_program(*ast);
-        }
-
-        // Add implicit arguments (like celsius, nt) to NEURON functions (like
-        // nrn_ghk, at_time) whose signatures we have to massage.
-        ImplicitArgumentVisitor{}.visit_program(*ast);
-        SymtabVisitor(update_symtab).visit_program(*ast);
-
-        {
-            // make sure to run perf visitor because code generator
-            // looks for read/write counts const/non-const declaration
-            PerfVisitor().visit_program(*ast);
-        }
-
-        {
-            CodegenTransformVisitor{}.visit_program(*ast);
-            ast_to_nmodl(*ast, filepath("TransformVisitor"));
-            SymtabVisitor(update_symtab).visit_program(*ast);
-        }
-
-        {
-            if (ispc_backend) {
+            if (cfg.ispc_backend) {
                 logger->info("Running ISPC backend code generator");
                 CodegenIspcVisitor visitor(modfile,
-                                           output_dir,
+                                           cfg.output_dir,
                                            data_type,
-                                           optimize_ionvar_copies_codegen);
+                                           cfg.optimize_ionvar_copies_codegen);
                 visitor.visit_program(*ast);
             }
 
-            else if (oacc_backend) {
+            else if (cfg.oacc_backend) {
                 logger->info("Running OpenACC backend code generator");
                 CodegenAccVisitor visitor(modfile,
-                                          output_dir,
+                                          cfg.output_dir,
                                           data_type,
-                                          optimize_ionvar_copies_codegen);
+                                          cfg.optimize_ionvar_copies_codegen);
                 visitor.visit_program(*ast);
             }
 
-            else if (c_backend) {
+            else if (cfg.c_backend) {
                 logger->info("Running C backend code generator");
                 CodegenCVisitor visitor(modfile,
-                                        output_dir,
+                                        cfg.output_dir,
                                         data_type,
-                                        optimize_ionvar_copies_codegen);
+                                        cfg.optimize_ionvar_copies_codegen);
                 visitor.visit_program(*ast);
             }
 
-            if (cuda_backend) {
+            if (cfg.cuda_backend) {
                 logger->info("Running CUDA backend code generator");
                 CodegenCudaVisitor visitor(modfile,
-                                           output_dir,
+                                           cfg.output_dir,
                                            data_type,
-                                           optimize_ionvar_copies_codegen);
+                                           cfg.optimize_ionvar_copies_codegen);
                 visitor.visit_program(*ast);
             }
 
 #ifdef NMODL_LLVM_BACKEND
-            if (llvm_ir || llvm_benchmark) {
+            if (cfg.llvm_ir || llvm_benchmark) {
                 // If benchmarking, we want to optimize the IR with target
                 // information and not in LLVM visitor.
-                int llvm_opt_level = llvm_benchmark ? 0 : llvm_opt_level_ir;
+                int llvm_opt_level = llvm_benchmark ? 0 : cfg.llvm_opt_level_ir;
 
                 // Create platform abstraction.
-                PlatformID pid = llvm_gpu_name == "default" ? PlatformID::CPU : PlatformID::GPU;
-                const std::string name = llvm_gpu_name == "default" ? llvm_cpu_name : llvm_gpu_name;
+                PlatformID pid = cfg.llvm_gpu_name == "default" ? PlatformID::CPU : PlatformID::GPU;
+                const std::string name = cfg.llvm_gpu_name == "default" ? cfg.llvm_cpu_name
+                                                                        : cfg.llvm_gpu_name;
                 Platform platform(pid,
                                   name,
-                                  llvm_cpu_name,
-                                  llvm_math_library,
-                                  llvm_float_type,
-                                  llvm_vector_width);
+                                  cfg.llvm_cpu_name,
+                                  cfg.llvm_math_library,
+                                  cfg.llvm_float_type,
+                                  cfg.llvm_vector_width);
 
                 logger->info("Running LLVM backend code generator");
                 CodegenLLVMVisitor visitor(modfile,
-                                           output_dir,
+                                           cfg.output_dir,
                                            platform,
                                            llvm_opt_level,
-                                           !llvm_no_debug,
-                                           llvm_fast_math_flags);
+                                           !cfg.llvm_no_debug,
+                                           cfg.llvm_fast_math_flags);
                 visitor.visit_program(*ast);
-                ast_to_nmodl(*ast, filepath("llvm", "mod"));
-                ast_to_json(*ast, filepath("llvm", "json"));
+                if (cfg.nmodl_ast) {
+                    NmodlPrintVisitor(filepath("llvm", "mod")).visit_program(*ast);
+                    logger->info("AST to NMODL transformation written to {}",
+                                 filepath("llvm", "mod"));
+                }
+                if (cfg.json_ast) {
+                    JSONVisitor(filepath("llvm", "json")).write(*ast);
+                    logger->info("AST to JSON transformation written to {}",
+                                 filepath("llvm", "json"));
+                }
 
                 if (llvm_benchmark) {
                     // \todo integrate Platform class here
-                    if (llvm_gpu_name != "default") {
+                    if (cfg.llvm_gpu_name != "default") {
                         logger->warn(
                             "GPU benchmarking is not supported, targeting "
                             "CPU instead");
@@ -746,14 +397,14 @@ int main(int argc, const char* argv[]) {
                     logger->info("Running LLVM benchmark");
                     benchmark::LLVMBenchmark benchmark(visitor,
                                                        modfile,
-                                                       output_dir,
-                                                       shared_lib_paths,
+                                                       cfg.output_dir,
+                                                       cfg.shared_lib_paths,
                                                        num_experiments,
                                                        instance_size,
-                                                       llvm_cpu_name,
-                                                       llvm_opt_level_ir,
-                                                       llvm_opt_level_codegen);
-                    benchmark.run(ast);
+                                                       cfg.llvm_cpu_name,
+                                                       cfg.llvm_opt_level_ir,
+                                                       cfg.llvm_opt_level_codegen);
+                    benchmark.run();
                 }
             }
 #endif
diff --git a/src/pybind/CMakeLists.txt b/src/pybind/CMakeLists.txt
index a78174cefd..d89c68ba86 100644
--- a/src/pybind/CMakeLists.txt
+++ b/src/pybind/CMakeLists.txt
@@ -63,6 +63,12 @@ if(NMODL_ENABLE_PYTHON_BINDINGS)
   add_dependencies(_nmodl lexer pyastgen util)
   target_link_libraries(_nmodl PRIVATE printer symtab visitor pyembed)
 
+  # Additional options are needed when LLVM JIT functionality is built
+  if(NMODL_ENABLE_LLVM)
+    set_property(TARGET codegen llvm_codegen llvm_benchmark benchmark_data PROPERTY POSITION_INDEPENDENT_CODE ON)
+    target_link_libraries(_nmodl PRIVATE codegen llvm_codegen llvm_benchmark benchmark_data
+                                         ${LLVM_LIBS_TO_LINK})
+  endif()
   # in case of wheel, python module shouldn't link to wrapper library
   if(LINK_AGAINST_PYTHON)
     target_link_libraries(_nmodl PRIVATE pywrapper)
diff --git a/src/pybind/pynmodl.cpp b/src/pybind/pynmodl.cpp
index 259df6bb8b..3779dccc33 100644
--- a/src/pybind/pynmodl.cpp
+++ b/src/pybind/pynmodl.cpp
@@ -5,9 +5,12 @@
  * Lesser General Public License. See top-level LICENSE file for details.
  *************************************************************************/
 #include "ast/program.hpp"
+#include "codegen/codegen_driver.hpp"
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "config/config.h"
 #include "parser/nmodl_driver.hpp"
 #include "pybind/pybind_utils.hpp"
+#include "test/benchmark/llvm_benchmark.hpp"
 #include "visitors/visitor_utils.hpp"
 
 #include <pybind11/iostream.h>
@@ -106,6 +109,10 @@ static const char* const to_json = R"(
     '{"Program":[{"NeuronBlock":[{"StatementBlock":[]}]}]}'
 )";
 
+static const char* jit = R"(
+    This is the Jit class documentation
+)";
+
 }  // namespace docstring
 
 /**
@@ -128,6 +135,57 @@ class PyNmodlDriver: public nmodl::parser::NmodlDriver {
     }
 };
 
+class JitDriver {
+  private:
+    nmodl::codegen::Platform platform;
+
+    nmodl::codegen::CodeGenConfig cfg;
+    nmodl::codegen::CodegenDriver cg_driver;
+
+    void init_platform() {
+        // Create platform abstraction.
+        nmodl::codegen::PlatformID pid = cfg.llvm_gpu_name == "default"
+                                             ? nmodl::codegen::PlatformID::CPU
+                                             : nmodl::codegen::PlatformID::GPU;
+        const std::string name = cfg.llvm_gpu_name == "default" ? cfg.llvm_cpu_name
+                                                                : cfg.llvm_gpu_name;
+        platform = nmodl::codegen::Platform(
+            pid, name, cfg.llvm_math_library, cfg.llvm_float_type, cfg.llvm_vector_width);
+    }
+
+  public:
+    JitDriver()
+        : cg_driver(cfg) {
+        init_platform();
+    }
+
+    explicit JitDriver(const nmodl::codegen::CodeGenConfig& cfg)
+        : cfg(cfg)
+        , cg_driver(cfg) {
+        init_platform();
+    }
+
+
+    benchmark::BenchmarkResults run(std::shared_ptr<nmodl::ast::Program> node,
+                                    std::string& modname,
+                                    int num_experiments,
+                                    int instance_size) {
+        cg_driver.prepare_mod(node);
+        nmodl::codegen::CodegenLLVMVisitor visitor(modname, cfg.output_dir, platform, 0);
+        visitor.visit_program(*node);
+        nmodl::benchmark::LLVMBenchmark benchmark(visitor,
+                                                  modname,
+                                                  cfg.output_dir,
+                                                  cfg.shared_lib_paths,
+                                                  num_experiments,
+                                                  instance_size,
+                                                  cfg.llvm_cpu_name,
+                                                  cfg.llvm_opt_level_ir,
+                                                  cfg.llvm_opt_level_codegen);
+        return benchmark.run();
+    }
+};
+
 }  // namespace nmodl
 
 // forward declaration of submodule init functions
@@ -160,6 +218,59 @@ PYBIND11_MODULE(_nmodl, m_nmodl) {
              nmodl::docstring::driver_parse_stream)
         .def("get_ast", &nmodl::PyNmodlDriver::get_ast, nmodl::docstring::driver_ast);
 
+    py::class_<nmodl::codegen::CodeGenConfig> cfg(m_nmodl, "CodeGenConfig");
+    cfg.def(py::init([]() {
+           auto cfg = std::make_unique<nmodl::codegen::CodeGenConfig>();
+           // set to more sensible defaults for python binding
+           cfg->llvm_backend = true;
+           return cfg;
+       }))
+        .def_readwrite("sympy_analytic", &nmodl::codegen::CodeGenConfig::sympy_analytic)
+        .def_readwrite("sympy_pade", &nmodl::codegen::CodeGenConfig::sympy_pade)
+        .def_readwrite("sympy_cse", &nmodl::codegen::CodeGenConfig::sympy_cse)
+        .def_readwrite("sympy_conductance", &nmodl::codegen::CodeGenConfig::sympy_conductance)
+        .def_readwrite("nmodl_inline", &nmodl::codegen::CodeGenConfig::nmodl_inline)
+        .def_readwrite("nmodl_unroll", &nmodl::codegen::CodeGenConfig::nmodl_unroll)
+        .def_readwrite("nmodl_const_folding", &nmodl::codegen::CodeGenConfig::nmodl_const_folding)
+        .def_readwrite("nmodl_localize", &nmodl::codegen::CodeGenConfig::nmodl_localize)
+        .def_readwrite("nmodl_global_to_range",
+                       &nmodl::codegen::CodeGenConfig::nmodl_global_to_range)
+        .def_readwrite("nmodl_local_to_range", &nmodl::codegen::CodeGenConfig::nmodl_local_to_range)
+        .def_readwrite("localize_verbatim", &nmodl::codegen::CodeGenConfig::localize_verbatim)
+        .def_readwrite("local_rename", &nmodl::codegen::CodeGenConfig::local_rename)
+        .def_readwrite("verbatim_inline", &nmodl::codegen::CodeGenConfig::verbatim_inline)
+        .def_readwrite("verbatim_rename", &nmodl::codegen::CodeGenConfig::verbatim_rename)
+        .def_readwrite("force_codegen", &nmodl::codegen::CodeGenConfig::force_codegen)
+        .def_readwrite("only_check_compatibility",
+                       &nmodl::codegen::CodeGenConfig::only_check_compatibility)
+        .def_readwrite("optimize_ionvar_copies_codegen",
+                       &nmodl::codegen::CodeGenConfig::optimize_ionvar_copies_codegen)
+        .def_readwrite("output_dir", &nmodl::codegen::CodeGenConfig::output_dir)
+        .def_readwrite("scratch_dir", &nmodl::codegen::CodeGenConfig::scratch_dir)
+        .def_readwrite("data_type", &nmodl::codegen::CodeGenConfig::data_type)
+        .def_readwrite("llvm_ir", &nmodl::codegen::CodeGenConfig::llvm_ir)
+        .def_readwrite("llvm_float_type", &nmodl::codegen::CodeGenConfig::llvm_float_type)
+        .def_readwrite("llvm_opt_level_ir", &nmodl::codegen::CodeGenConfig::llvm_opt_level_ir)
+        .def_readwrite("llvm_math_library", &nmodl::codegen::CodeGenConfig::llvm_math_library)
+        .def_readwrite("llvm_no_debug", &nmodl::codegen::CodeGenConfig::llvm_no_debug)
+        .def_readwrite("llvm_fast_math_flags", &nmodl::codegen::CodeGenConfig::llvm_fast_math_flags)
+        .def_readwrite("llvm_cpu_name", &nmodl::codegen::CodeGenConfig::llvm_cpu_name)
+        .def_readwrite("llvm_gpu_name", &nmodl::codegen::CodeGenConfig::llvm_gpu_name)
+        .def_readwrite("llvm_vector_width", &nmodl::codegen::CodeGenConfig::llvm_vector_width)
+        .def_readwrite("llvm_opt_level_codegen",
+                       &nmodl::codegen::CodeGenConfig::llvm_opt_level_codegen)
+        .def_readwrite("shared_lib_paths", &nmodl::codegen::CodeGenConfig::shared_lib_paths);
+
+    py::class_<nmodl::JitDriver> jit_driver(m_nmodl, "Jit", nmodl::docstring::jit);
+    jit_driver.def(py::init<>())
+        .def(py::init<const nmodl::codegen::CodeGenConfig&>())
+        .def("run",
+             &nmodl::JitDriver::run,
+             "node"_a,
+             "modname"_a,
+             "num_experiments"_a,
+             "instance_size"_a);
+
     m_nmodl.def("to_nmodl",
                 static_cast<std::string (*)(const nmodl::ast::Ast&,
                                             const std::set<nmodl::ast::AstNodeType>&)>(
diff --git a/test/benchmark/CMakeLists.txt b/test/benchmark/CMakeLists.txt
index 4441d53251..cc3e26bb35 100644
--- a/test/benchmark/CMakeLists.txt
+++ b/test/benchmark/CMakeLists.txt
@@ -15,3 +15,20 @@ add_dependencies(llvm_benchmark lexer util visitor)
 if(NMODL_ENABLE_JIT_EVENT_LISTENERS)
   target_compile_definitions(llvm_benchmark PUBLIC NMODL_HAVE_JIT_EVENT_LISTENERS)
 endif()
+
+# =============================================================================
+# LLVM pyjit
+# =============================================================================
+
+if(NMODL_ENABLE_PYTHON_BINDINGS)
+  file(GLOB modfiles "${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/kernels/*.mod")
+  foreach(modfile ${modfiles})
+    get_filename_component(modfile_name "${modfile}" NAME)
+    add_test(NAME "PyJIT/${modfile_name}"
+             COMMAND ${PYTHON_EXECUTABLE} ${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/benchmark.py
+                     ${modfile})
+    set_tests_properties(
+      "PyJIT/${modfile_name}" PROPERTIES ENVIRONMENT
+                                         PYTHONPATH=${PROJECT_BINARY_DIR}/lib:$ENV{PYTHONPATH})
+  endforeach()
+endif()
diff --git a/test/benchmark/benchmark.py b/test/benchmark/benchmark.py
new file mode 100644
index 0000000000..c39f97002a
--- /dev/null
+++ b/test/benchmark/benchmark.py
@@ -0,0 +1,25 @@
+import sys
+
+import nmodl.dsl as nmodl
+from nmodl import ast, visitor
+
+def main():
+    driver = nmodl.NmodlDriver()
+    lookup_visitor = visitor.AstLookupVisitor()
+
+    cfg = nmodl.CodeGenConfig()
+    cfg.llvm_vector_width = 4
+    cfg.llvm_opt_level_ir = 2
+    fname = sys.argv[1]
+    with open(fname) as f:
+        hh = f.read()
+        modast = driver.parse_string(hh)
+        modname = lookup_visitor.lookup(modast, ast.AstNodeType.SUFFIX)[0].get_node_name()
+        jit = nmodl.Jit(cfg)
+
+        res = jit.run(modast, modname, 1000, 1000)
+        print(res)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/test/benchmark/llvm_benchmark.cpp b/test/benchmark/llvm_benchmark.cpp
index 0e94ae231b..87d7e34512 100644
--- a/test/benchmark/llvm_benchmark.cpp
+++ b/test/benchmark/llvm_benchmark.cpp
@@ -6,10 +6,12 @@
  *************************************************************************/
 
 #include <chrono>
+#include <cmath>
+#include <numeric>
 
-#include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "llvm_benchmark.hpp"
 #include "test/benchmark/jit_driver.hpp"
+#include "utils/logger.hpp"
 #include "llvm/Support/Host.h"
 
 #include "test/unit/codegen/codegen_data_helper.hpp"
@@ -18,14 +20,14 @@
 namespace nmodl {
 namespace benchmark {
 
-void LLVMBenchmark::run(const std::shared_ptr<ast::Program>& node) {
+BenchmarkResults LLVMBenchmark::run() {
     // create functions
-    generate_llvm(node);
+    generate_llvm();
     // Finally, run the benchmark and log the measurements.
-    run_benchmark(node);
+    return run_benchmark();
 }
 
-void LLVMBenchmark::generate_llvm(const std::shared_ptr<ast::Program>& node) {
+void LLVMBenchmark::generate_llvm() {
     // First, visit the AST to build the LLVM IR module and wrap the kernel function calls.
     auto start = std::chrono::steady_clock::now();
     llvm_visitor.wrap_kernel_functions();
@@ -36,9 +38,9 @@ void LLVMBenchmark::generate_llvm(const std::shared_ptr<ast::Program>& node) {
     logger->info("Created LLVM IR module from NMODL AST in {} sec", diff.count());
 }
 
-void LLVMBenchmark::run_benchmark(const std::shared_ptr<ast::Program>& node) {
+BenchmarkResults LLVMBenchmark::run_benchmark() {
     // Set the codegen data helper and find the kernels.
-    auto codegen_data = codegen::CodegenDataHelper(node, llvm_visitor.get_instance_struct_ptr());
+    auto codegen_data = codegen::CodegenDataHelper(llvm_visitor.get_instance_struct_ptr());
     std::vector<std::string> kernel_names;
     llvm_visitor.find_kernel_names(kernel_names);
 
@@ -55,13 +57,11 @@ void LLVMBenchmark::run_benchmark(const std::shared_ptr<ast::Program>& node) {
         std::move(m), filename, output_dir, cpu_name, shared_libs, opt_level_ir, opt_level_codegen);
     runner.initialize_driver();
 
+    BenchmarkResults results{};
     // Benchmark every kernel.
     for (const auto& kernel_name: kernel_names) {
-        // For every kernel run the benchmark `num_experiments` times.
-        double time_min = std::numeric_limits<double>::max();
-        double time_max = 0.0;
-        double time_sum = 0.0;
-        double time_squared_sum = 0.0;
+        // For every kernel run the benchmark `num_experiments` times and collect runtimes.
+        auto times = std::vector<double>(num_experiments, 0.0);
         for (int i = 0; i < num_experiments; ++i) {
             // Initialise the data.
             auto instance_data = codegen_data.create_data(instance_size, /*seed=*/1);
@@ -80,22 +80,30 @@ void LLVMBenchmark::run_benchmark(const std::shared_ptr<ast::Program>& node) {
             std::chrono::duration<double> diff = end - start;
 
             // Log the time taken for each run.
-            logger->info("Experiment {} compute time = {:.6f} sec", i, diff.count());
+            logger->debug("Experiment {} compute time = {:.6f} sec", i, diff.count());
 
-            // Update statistics.
-            time_sum += diff.count();
-            time_squared_sum += diff.count() * diff.count();
-            time_min = std::min(time_min, diff.count());
-            time_max = std::max(time_max, diff.count());
+            times[i] = diff.count();
         }
+        // Calculate statistics
+        double time_mean = std::accumulate(times.begin(), times.end(), 0.0) / num_experiments;
+        double time_var = std::accumulate(times.begin(),
+                                          times.end(),
+                                          0.0,
+                                          [time_mean](const double& pres, const double& e) {
+                                              return (e - time_mean) * (e - time_mean);
+                                          }) /
+                          num_experiments;
+        double time_stdev = std::sqrt(time_var);
+        double time_min = *std::min_element(times.begin(), times.end());
+        double time_max = *std::max_element(times.begin(), times.end());
         // Log the average time taken for the kernel.
-        double time_mean = time_sum / num_experiments;
         logger->info("Average compute time = {:.6f}", time_mean);
-        logger->info("Compute time variance = {:g}",
-                     time_squared_sum / num_experiments - time_mean * time_mean);
+        logger->info("Compute time standard deviation = {:8f}", time_stdev);
         logger->info("Minimum compute time = {:.6f}", time_min);
         logger->info("Maximum compute time = {:.6f}\n", time_max);
+        results[kernel_name] = {time_mean, time_stdev, time_min, time_max};
     }
+    return results;
 }
 
 }  // namespace benchmark
diff --git a/test/benchmark/llvm_benchmark.hpp b/test/benchmark/llvm_benchmark.hpp
index cc9dd3bcf0..f79cad62e5 100644
--- a/test/benchmark/llvm_benchmark.hpp
+++ b/test/benchmark/llvm_benchmark.hpp
@@ -8,14 +8,20 @@
 #pragma once
 
 #include <fstream>
+#include <map>
 #include <string>
+#include <tuple>
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
-#include "utils/logger.hpp"
 
 namespace nmodl {
 namespace benchmark {
 
+/**
+ * map of {name: [avg, stdev, min, max]}
+ */
+using BenchmarkResults = std::map<std::string, std::tuple<double, double, double, double>>;
+
 /**
  * \class LLVMBenchmark
  * \brief A wrapper to execute MOD file kernels via LLVM IR backend, and
@@ -74,14 +80,14 @@ class LLVMBenchmark {
         , opt_level_codegen(opt_level_codegen) {}
 
     /// Runs the benchmark.
-    void run(const std::shared_ptr<ast::Program>& node);
+    BenchmarkResults run();
 
   private:
     /// Visits the AST to construct the LLVM IR module.
-    void generate_llvm(const std::shared_ptr<ast::Program>& node);
+    void generate_llvm();
 
     /// Runs the main body of the benchmark, executing the compute kernels.
-    void run_benchmark(const std::shared_ptr<ast::Program>& node);
+    BenchmarkResults run_benchmark();
 
     /// Sets the log output stream (file or console).
     void set_log_output();
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index a72f478c6e..e0d9104b7c 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -161,10 +161,8 @@ set(test_env ${NMODL_SANITIZER_ENABLE_ENVIRONMENT})
 set(testvisitor_env "PYTHONPATH=${PROJECT_BINARY_DIR}/lib:$ENV{PYTHONPATH}")
 if(NOT LINK_AGAINST_PYTHON)
   list(APPEND testvisitor_env "NMODL_PYLIB=$ENV{NMODL_PYLIB}")
-  list(
-    APPEND
-      testvisitor_env
-      "NMODL_WRAPLIB=${PROJECT_BINARY_DIR}/lib/nmodl/libpywrapper${CMAKE_SHARED_LIBRARY_SUFFIX}")
+  list(APPEND testvisitor_env
+       "NMODL_WRAPLIB=${PROJECT_BINARY_DIR}/lib/nmodl/libpywrapper${CMAKE_SHARED_LIBRARY_SUFFIX}")
 endif()
 
 foreach(
diff --git a/test/unit/codegen/codegen_data_helper.hpp b/test/unit/codegen/codegen_data_helper.hpp
index 76c4f422d9..c356a898ce 100644
--- a/test/unit/codegen/codegen_data_helper.hpp
+++ b/test/unit/codegen/codegen_data_helper.hpp
@@ -96,15 +96,12 @@ std::vector<T> generate_dummy_data(size_t initial_value, size_t num_elements) {
  * to the MOD file.
  */
 class CodegenDataHelper {
-    std::shared_ptr<ast::Program> program;
     std::shared_ptr<ast::InstanceStruct> instance;
 
   public:
     CodegenDataHelper() = delete;
-    CodegenDataHelper(const std::shared_ptr<ast::Program>& program,
-                      const std::shared_ptr<ast::InstanceStruct>& instance)
-        : program(program)
-        , instance(instance) {}
+    CodegenDataHelper(const std::shared_ptr<ast::InstanceStruct>& instance)
+        : instance(instance) {}
 
     CodegenInstanceData create_data(size_t num_elements, size_t seed);
 };
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index c10cf0fdc0..f1c30e0a9f 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -321,7 +321,7 @@ SCENARIO("Simple scalar kernel", "[llvm][runner]") {
         // Create the instance struct data.
         int num_elements = 4;
         const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
-        auto codegen_data = codegen::CodegenDataHelper(ast, generated_instance_struct);
+        auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct);
         auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1);
 
         // Fill the instance struct data with some values.
@@ -405,7 +405,7 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
         // Create the instance struct data.
         int num_elements = 10;
         const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
-        auto codegen_data = codegen::CodegenDataHelper(ast, generated_instance_struct);
+        auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct);
         auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1);
 
         // Fill the instance struct data with some values for unit testing.
@@ -489,7 +489,7 @@ SCENARIO("Vectorised kernel with scatter instruction", "[llvm][runner]") {
         // Create the instance struct data.
         int num_elements = 5;
         const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
-        auto codegen_data = codegen::CodegenDataHelper(ast, generated_instance_struct);
+        auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct);
         auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1);
 
         // Fill the instance struct data with some values.
@@ -582,7 +582,7 @@ SCENARIO("Vectorised kernel with simple control flow", "[llvm][runner]") {
         // Create the instance struct data.
         int num_elements = 5;
         const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
-        auto codegen_data = codegen::CodegenDataHelper(ast, generated_instance_struct);
+        auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct);
         auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1);
 
         // Fill the instance struct data with some values.
diff --git a/test/unit/codegen/codegen_llvm_instance_struct.cpp b/test/unit/codegen/codegen_llvm_instance_struct.cpp
index fbb07dfbcd..401e0a6c63 100644
--- a/test/unit/codegen/codegen_llvm_instance_struct.cpp
+++ b/test/unit/codegen/codegen_llvm_instance_struct.cpp
@@ -47,7 +47,7 @@ codegen::CodegenInstanceData generate_instance_data(const std::string& text,
     llvm_visitor.visit_program(*ast);
     llvm_visitor.dump_module();
     const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
-    auto codegen_data = codegen::CodegenDataHelper(ast, generated_instance_struct);
+    auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct);
     auto instance_data = codegen_data.create_data(num_elements, seed);
     return instance_data;
 }

From af1ff70fd244788fd72fff06f01c2072d15762e6 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Fri, 29 Apr 2022 22:19:11 +0300
Subject: [PATCH 085/105] Fixes issue with debug printing of visitors (#854)

* Fixes issue with debug printing of the various stages of code generation in files
* Small fix and comment addition
---
 src/codegen/codegen_driver.cpp |  5 ++---
 src/codegen/codegen_driver.hpp |  2 +-
 src/main.cpp                   |  2 +-
 src/pybind/pynmodl.cpp         | 10 +++++++++-
 test/benchmark/benchmark.py    |  3 ++-
 test/benchmark/jit_driver.hpp  |  4 ++--
 6 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/codegen/codegen_driver.cpp b/src/codegen/codegen_driver.cpp
index 542c880557..b264c47720 100644
--- a/src/codegen/codegen_driver.cpp
+++ b/src/codegen/codegen_driver.cpp
@@ -37,13 +37,12 @@ using namespace nmodl;
 using namespace codegen;
 using namespace visitor;
 
-bool CodegenDriver::prepare_mod(std::shared_ptr<ast::Program> node) {
+bool CodegenDriver::prepare_mod(std::shared_ptr<ast::Program> node, const std::string& modfile) {
     /// whether to update existing symbol table or create new
     /// one whenever we run symtab visitor.
     bool update_symtab = false;
 
-    std::string modfile;
-    std::string scratch_dir = "tmp";
+    const auto scratch_dir = cfg.scratch_dir;
     auto filepath = [scratch_dir, modfile](const std::string& suffix, const std::string& ext) {
         static int count = 0;
         return fmt::format(
diff --git a/src/codegen/codegen_driver.hpp b/src/codegen/codegen_driver.hpp
index aa2f9921d0..78c95421da 100644
--- a/src/codegen/codegen_driver.hpp
+++ b/src/codegen/codegen_driver.hpp
@@ -153,7 +153,7 @@ class CodegenDriver {
     explicit CodegenDriver(CodeGenConfig _cfg)
         : cfg(std::move(_cfg)) {}
 
-    bool prepare_mod(std::shared_ptr<nmodl::ast::Program> node);
+    bool prepare_mod(std::shared_ptr<nmodl::ast::Program> node, const std::string& modfile);
 
   private:
     CodeGenConfig cfg;
diff --git a/src/main.cpp b/src/main.cpp
index 55cea74673..8731077d14 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -298,7 +298,7 @@ int main(int argc, const char* argv[]) {
         const auto& ast = nmodl_driver.parse_file(file);
 
         auto cg_driver = CodegenDriver(cfg);
-        auto success = cg_driver.prepare_mod(ast);
+        auto success = cg_driver.prepare_mod(ast, modfile);
 
         if (show_symtab) {
             logger->info("Printing symbol table");
diff --git a/src/pybind/pynmodl.cpp b/src/pybind/pynmodl.cpp
index 3779dccc33..9cd350ad5f 100644
--- a/src/pybind/pynmodl.cpp
+++ b/src/pybind/pynmodl.cpp
@@ -170,7 +170,12 @@ class JitDriver {
                                     std::string& modname,
                                     int num_experiments,
                                     int instance_size) {
-        cg_driver.prepare_mod(node);
+        // New directory is needed to be created otherwise the directory cannot be created
+        // automatically through python
+        if (cfg.nmodl_ast || cfg.json_ast || cfg.json_perfstat) {
+            utils::make_path(cfg.scratch_dir);
+        }
+        cg_driver.prepare_mod(node, modname);
         nmodl::codegen::CodegenLLVMVisitor visitor(modname, cfg.output_dir, platform, 0);
         visitor.visit_program(*node);
         nmodl::benchmark::LLVMBenchmark benchmark(visitor,
@@ -248,6 +253,9 @@ PYBIND11_MODULE(_nmodl, m_nmodl) {
         .def_readwrite("output_dir", &nmodl::codegen::CodeGenConfig::output_dir)
         .def_readwrite("scratch_dir", &nmodl::codegen::CodeGenConfig::scratch_dir)
         .def_readwrite("data_type", &nmodl::codegen::CodeGenConfig::data_type)
+        .def_readwrite("nmodl_ast", &nmodl::codegen::CodeGenConfig::nmodl_ast)
+        .def_readwrite("json_ast", &nmodl::codegen::CodeGenConfig::json_ast)
+        .def_readwrite("json_perfstat", &nmodl::codegen::CodeGenConfig::json_perfstat)
         .def_readwrite("llvm_ir", &nmodl::codegen::CodeGenConfig::llvm_ir)
         .def_readwrite("llvm_float_type", &nmodl::codegen::CodeGenConfig::llvm_float_type)
         .def_readwrite("llvm_opt_level_ir", &nmodl::codegen::CodeGenConfig::llvm_opt_level_ir)
diff --git a/test/benchmark/benchmark.py b/test/benchmark/benchmark.py
index c39f97002a..c133f8d59c 100644
--- a/test/benchmark/benchmark.py
+++ b/test/benchmark/benchmark.py
@@ -10,6 +10,7 @@ def main():
     cfg = nmodl.CodeGenConfig()
     cfg.llvm_vector_width = 4
     cfg.llvm_opt_level_ir = 2
+    cfg.nmodl_ast = True
     fname = sys.argv[1]
     with open(fname) as f:
         hh = f.read()
@@ -22,4 +23,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/test/benchmark/jit_driver.hpp b/test/benchmark/jit_driver.hpp
index 7106311523..ed86684f76 100644
--- a/test/benchmark/jit_driver.hpp
+++ b/test/benchmark/jit_driver.hpp
@@ -76,7 +76,7 @@ class JITDriver {
         if (!expected_symbol)
             throw std::runtime_error("Error: entry-point symbol not found in JIT\n");
 
-        auto (*res)() = (ReturnType(*)())(intptr_t) expected_symbol->getAddress();
+        auto(*res)() = (ReturnType(*)())(intptr_t) expected_symbol->getAddress();
         ReturnType result = res();
         return result;
     }
@@ -88,7 +88,7 @@ class JITDriver {
         if (!expected_symbol)
             throw std::runtime_error("Error: entry-point symbol not found in JIT\n");
 
-        auto (*res)(ArgType) = (ReturnType(*)(ArgType))(intptr_t) expected_symbol->getAddress();
+        auto(*res)(ArgType) = (ReturnType(*)(ArgType))(intptr_t) expected_symbol->getAddress();
         ReturnType result = res(arg);
         return result;
     }

From 49a13af8ed1e26bcca9c93031c67fc1f66b81ca9 Mon Sep 17 00:00:00 2001
From: Pramod Kumbhar <pramod.kumbhar@epfl.ch>
Date: Mon, 2 May 2022 11:51:00 +0200
Subject: [PATCH 086/105] Support for Breakpoint block (nrn_cur) for code
 generation (#645)

* Support for Breakpoint block (nrn_cur) for code generation
* similar to DERIVATIVE (nrn_state), handle BREAKPOINT (nrn_cur)
   blocks with AST level transformation
* Move common code from CodegenCVisitor to CodegenInfo
* Add tests

fixes #644

Co-authored-by: George Mitenkov <georgemitenk0v@gmail.com>
---
 src/codegen/codegen_acc_visitor.cpp           |   8 +-
 src/codegen/codegen_c_visitor.cpp             |  45 +-
 src/codegen/codegen_c_visitor.hpp             |  17 -
 src/codegen/codegen_cuda_visitor.cpp          |   8 +-
 src/codegen/codegen_driver.cpp                |   2 +-
 src/codegen/codegen_info.cpp                  |  29 ++
 src/codegen/codegen_info.hpp                  |  30 ++
 src/codegen/codegen_ispc_visitor.cpp          |   4 +-
 src/codegen/codegen_naming.hpp                |   6 +
 .../llvm/codegen_llvm_helper_visitor.cpp      | 410 ++++++++++++++++--
 .../llvm/codegen_llvm_helper_visitor.hpp      |  21 +
 src/codegen/llvm/codegen_llvm_visitor.cpp     |   6 +
 src/codegen/llvm/codegen_llvm_visitor.hpp     |   3 +
 src/codegen/llvm/llvm_ir_builder.cpp          |  11 +-
 src/language/nmodl.yaml                       |  21 +-
 test/unit/codegen/codegen_llvm_execution.cpp  |   2 +
 test/unit/codegen/codegen_llvm_ir.cpp         |  89 +++-
 test/unit/codegen/codegen_llvm_visitor.cpp    | 404 ++++++++++++++++-
 18 files changed, 987 insertions(+), 129 deletions(-)

diff --git a/src/codegen/codegen_acc_visitor.cpp b/src/codegen/codegen_acc_visitor.cpp
index 0c8838475c..c9cbfe028d 100644
--- a/src/codegen/codegen_acc_visitor.cpp
+++ b/src/codegen/codegen_acc_visitor.cpp
@@ -185,8 +185,8 @@ void CodegenAccVisitor::print_net_init_acc_serial_annotation_block_end() {
 }
 
 void CodegenAccVisitor::print_nrn_cur_matrix_shadow_update() {
-    auto rhs_op = operator_for_rhs();
-    auto d_op = operator_for_d();
+    const auto& rhs_op = info.operator_for_rhs();
+    const auto& d_op = info.operator_for_d();
     if (info.point_process) {
         print_atomic_reduction_pragma();
     }
@@ -202,8 +202,8 @@ void CodegenAccVisitor::print_fast_imem_calculation() {
         return;
     }
 
-    auto rhs_op = operator_for_rhs();
-    auto d_op = operator_for_d();
+    const auto& rhs_op = info.operator_for_rhs();
+    const auto& d_op = info.operator_for_d();
     printer->start_block("if (nt->nrn_fast_imem)");
     if (info.point_process) {
         print_atomic_reduction_pragma();
diff --git a/src/codegen/codegen_c_visitor.cpp b/src/codegen/codegen_c_visitor.cpp
index 3ae0772752..4a9ff1de97 100644
--- a/src/codegen/codegen_c_visitor.cpp
+++ b/src/codegen/codegen_c_visitor.cpp
@@ -322,37 +322,6 @@ void CodegenCVisitor::visit_update_dt(const ast::UpdateDt& node) {
 /*                               Common helper routines                                 */
 /****************************************************************************************/
 
-
-/**
- * \details Certain statements like unit, comment, solve can/need to be skipped
- * during code generation. Note that solve block is wrapped in expression
- * statement and hence we have to check inner expression. It's also true
- * for the initial block defined inside net receive block.
- */
-bool CodegenCVisitor::statement_to_skip(const Statement& node) {
-    // clang-format off
-    if (node.is_unit_state()
-        || node.is_line_comment()
-        || node.is_block_comment()
-        || node.is_solve_block()
-        || node.is_conductance_hint()
-        || node.is_table_statement()) {
-        return true;
-    }
-    // clang-format on
-    if (node.is_expression_statement()) {
-        auto expression = dynamic_cast<const ExpressionStatement*>(&node)->get_expression();
-        if (expression->is_solve_block()) {
-            return true;
-        }
-        if (expression->is_initial_block()) {
-            return true;
-        }
-    }
-    return false;
-}
-
-
 /**
  * \details When floating point data type is not default (i.e. double) then we
  * have to copy old array to new type (for range variables).
@@ -950,8 +919,8 @@ void CodegenCVisitor::print_nrn_cur_matrix_shadow_update() {
         printer->add_line("shadow_rhs[id] = rhs;");
         printer->add_line("shadow_d[id] = g;");
     } else {
-        auto rhs_op = operator_for_rhs();
-        auto d_op = operator_for_d();
+        const auto& rhs_op = info.operator_for_rhs();
+        const auto& d_op = info.operator_for_d();
         print_atomic_reduction_pragma();
         printer->fmt_line("vec_rhs[node_id] {} rhs;", rhs_op);
         print_atomic_reduction_pragma();
@@ -961,8 +930,8 @@ void CodegenCVisitor::print_nrn_cur_matrix_shadow_update() {
 
 
 void CodegenCVisitor::print_nrn_cur_matrix_shadow_reduction() {
-    auto rhs_op = operator_for_rhs();
-    auto d_op = operator_for_d();
+    const auto& rhs_op = info.operator_for_rhs();
+    const auto& d_op = info.operator_for_d();
     if (info.point_process) {
         printer->add_line("int node_id = node_index[id];");
         print_atomic_reduction_pragma();
@@ -1116,7 +1085,7 @@ void CodegenCVisitor::print_statement_block(const ast::StatementBlock& node,
 
     auto statements = node.get_statements();
     for (const auto& statement: statements) {
-        if (statement_to_skip(*statement)) {
+        if (info.statement_to_skip(*statement)) {
             continue;
         }
         /// not necessary to add indent for verbatim block (pretty-printing)
@@ -4280,8 +4249,8 @@ void CodegenCVisitor::print_fast_imem_calculation() {
         return;
     }
     std::string rhs, d;
-    auto rhs_op = operator_for_rhs();
-    auto d_op = operator_for_d();
+    const auto& rhs_op = info.operator_for_rhs();
+    const auto& d_op = info.operator_for_d();
     if (info.point_process) {
         rhs = "shadow_rhs[id]";
         d = "shadow_d[id]";
diff --git a/src/codegen/codegen_c_visitor.hpp b/src/codegen/codegen_c_visitor.hpp
index bf336819f3..36d8287e09 100644
--- a/src/codegen/codegen_c_visitor.hpp
+++ b/src/codegen/codegen_c_visitor.hpp
@@ -219,23 +219,6 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
         return "\"" + text + "\"";
     }
 
-
-    /**
-     * Operator for rhs vector update (matrix update)
-     */
-    std::string operator_for_rhs() const noexcept {
-        return info.electrode_current ? "+=" : "-=";
-    }
-
-
-    /**
-     * Operator for diagonal vector update (matrix update)
-     */
-    std::string operator_for_d() const noexcept {
-        return info.electrode_current ? "-=" : "+=";
-    }
-
-
     /**
      * Data type for the local variables
      */
diff --git a/src/codegen/codegen_cuda_visitor.cpp b/src/codegen/codegen_cuda_visitor.cpp
index 8f58f04917..1c390ab0c9 100644
--- a/src/codegen/codegen_cuda_visitor.cpp
+++ b/src/codegen/codegen_cuda_visitor.cpp
@@ -94,8 +94,8 @@ void CodegenCudaVisitor::print_device_method_annotation() {
 
 
 void CodegenCudaVisitor::print_nrn_cur_matrix_shadow_update() {
-    auto rhs_op = operator_for_rhs();
-    auto d_op = operator_for_d();
+    auto rhs_op = info.operator_for_rhs();
+    auto d_op = info.operator_for_d();
     stringutils::remove_character(rhs_op, '=');
     stringutils::remove_character(d_op, '=');
     print_atomic_op("vec_rhs[node_id]", rhs_op, "rhs");
@@ -107,8 +107,8 @@ void CodegenCudaVisitor::print_fast_imem_calculation() {
         return;
     }
 
-    auto rhs_op = operator_for_rhs();
-    auto d_op = operator_for_d();
+    auto rhs_op = info.operator_for_rhs();
+    auto d_op = info.operator_for_d();
     stringutils::remove_character(rhs_op, '=');
     stringutils::remove_character(d_op, '=');
     printer->start_block("if (nt->nrn_fast_imem)");
diff --git a/src/codegen/codegen_driver.cpp b/src/codegen/codegen_driver.cpp
index b264c47720..5d211fa724 100644
--- a/src/codegen/codegen_driver.cpp
+++ b/src/codegen/codegen_driver.cpp
@@ -180,7 +180,7 @@ bool CodegenDriver::prepare_mod(std::shared_ptr<ast::Program> node, const std::s
     /// that old symbols (e.g. prime variables) are not lost
     update_symtab = true;
 
-    if (cfg.nmodl_inline) {
+    if (cfg.nmodl_inline || cfg.llvm_ir) {
         logger->info("Running nmodl inline visitor");
         InlineVisitor().visit_program(*node);
         ast_to_nmodl(*node, filepath("inline", "mod"));
diff --git a/src/codegen/codegen_info.cpp b/src/codegen/codegen_info.cpp
index 61b6869db3..a395b0e6de 100644
--- a/src/codegen/codegen_info.cpp
+++ b/src/codegen/codegen_info.cpp
@@ -388,5 +388,34 @@ void CodegenInfo::get_float_variables() {
     }
 }
 
+/**
+ * \details Certain statements like unit, comment, solve can/need to be skipped
+ * during code generation. Note that solve block is wrapped in expression
+ * statement and hence we have to check inner expression. It's also true
+ * for the initial block defined inside net receive block.
+ */
+bool CodegenInfo::statement_to_skip(const ast::Statement& node) const {
+    // clang-format off
+    if (node.is_unit_state()
+        || node.is_line_comment()
+        || node.is_block_comment()
+        || node.is_solve_block()
+        || node.is_conductance_hint()
+        || node.is_table_statement()) {
+        return true;
+    }
+    // clang-format on
+    if (node.is_expression_statement()) {
+        auto expression = dynamic_cast<const ast::ExpressionStatement*>(&node)->get_expression();
+        if (expression->is_solve_block()) {
+            return true;
+        }
+        if (expression->is_initial_block()) {
+            return true;
+        }
+    }
+    return false;
+}
+
 }  // namespace codegen
 }  // namespace nmodl
diff --git a/src/codegen/codegen_info.hpp b/src/codegen/codegen_info.hpp
index 4d9375f0c0..fc96db6e17 100644
--- a/src/codegen/codegen_info.hpp
+++ b/src/codegen/codegen_info.hpp
@@ -593,6 +593,29 @@ struct CodegenInfo {
     }
 
 
+    /**
+     * Operator for rhs vector update (matrix update)
+     *
+     * Note that we only rely on following two syntax for
+     * increment and decrement. Code generation backends
+     * are relying on this convention.
+     */
+    std::string operator_for_rhs() const noexcept {
+        return electrode_current ? "+=" : "-=";
+    }
+
+
+    /**
+     * Operator for diagonal vector update (matrix update)
+     *
+     * Note that we only rely on following two syntax for
+     * increment and decrement. Code generation backends
+     * are relying on this convention.
+     */
+    std::string operator_for_d() const noexcept {
+        return electrode_current ? "-=" : "+=";
+    }
+
     /**
      * Check if net_receive function is required
      */
@@ -660,6 +683,13 @@ struct CodegenInfo {
      * \return A \c vector of \c float variables
      */
     void get_float_variables();
+
+    /**
+     * Check if statement should be skipped for code generation
+     * @param node Statement to be checked for code generation
+     * @return True if statement should be skipped otherwise false
+     */
+    bool statement_to_skip(const ast::Statement& node) const;
 };
 
 /** @} */  // end of codegen_backends
diff --git a/src/codegen/codegen_ispc_visitor.cpp b/src/codegen/codegen_ispc_visitor.cpp
index ce7628691c..e9c482bc66 100644
--- a/src/codegen/codegen_ispc_visitor.cpp
+++ b/src/codegen/codegen_ispc_visitor.cpp
@@ -245,8 +245,8 @@ void CodegenIspcVisitor::print_atomic_op(const std::string& lhs,
 
 
 void CodegenIspcVisitor::print_nrn_cur_matrix_shadow_reduction() {
-    auto rhs_op = operator_for_rhs();
-    auto d_op = operator_for_d();
+    const auto& rhs_op = info.operator_for_rhs();
+    const auto& d_op = info.operator_for_d();
     if (info.point_process) {
         printer->add_line("uniform int node_id = node_index[id];");
         printer->fmt_line("vec_rhs[node_id] {} shadow_rhs[id];", rhs_op);
diff --git a/src/codegen/codegen_naming.hpp b/src/codegen/codegen_naming.hpp
index 9ee2425e3b..cdd6f73d6d 100644
--- a/src/codegen/codegen_naming.hpp
+++ b/src/codegen/codegen_naming.hpp
@@ -95,6 +95,12 @@ static constexpr char CELSIUS_VARIABLE[] = "celsius";
 /// instance struct member pointing to the global variable structure
 static constexpr char INST_GLOBAL_MEMBER[] = "global";
 
+/// rhs variable in neuron thread structure
+static constexpr char NTHREAD_RHS[] = "vec_rhs";
+
+/// d variable in neuron thread structure
+static constexpr char NTHREAD_D[] = "vec_d";
+
 /// t variable in neuron thread structure
 static constexpr char NTHREAD_T_VARIABLE[] = "t";
 
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index fcec26f4e9..cdf66f0e17 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -92,18 +92,18 @@ static std::shared_ptr<ast::Expression> create_statement_as_expression(const std
 }
 
 /**
- * \brief Create expression for given NMODL code expression
+ * \brief Create an expression for a given NMODL expression in string form
  * @param code NMODL code expression
- * @return Expression representing NMODL code
+ * @return Expression ast node representing NMODL code
  */
-std::shared_ptr<ast::Expression> create_expression(const std::string& code) {
+static ast::Expression* create_expression(const std::string& code) {
     /// as provided code is only expression and not a full statement, create
     /// a temporary assignment statement
     const auto& wrapped_expr = create_statement_as_expression("some_var = " + code);
     /// now extract RHS (representing original code) and return it as expression
     auto expr = std::dynamic_pointer_cast<ast::WrappedExpression>(wrapped_expr)->get_expression();
     auto rhs = std::dynamic_pointer_cast<ast::BinaryExpression>(expr)->get_rhs();
-    return std::make_shared<ast::WrappedExpression>(rhs->clone());
+    return new ast::WrappedExpression(rhs->clone());
 }
 
 CodegenFunctionVector CodegenLLVMHelperVisitor::get_codegen_functions(const ast::Program& node) {
@@ -246,35 +246,72 @@ std::shared_ptr<ast::InstanceStruct> CodegenLLVMHelperVisitor::create_instance_s
     add_var_with_type(naming::SECOND_ORDER_VARIABLE, INTEGER_TYPE, /*is_pointer=*/0);
     add_var_with_type(naming::MECH_NODECOUNT_VAR, INTEGER_TYPE, /*is_pointer=*/0);
 
+    // As we do not have `NrnThread` object as an argument, we store points to rhs
+    // and d to in the instance struct as well. Also need their respective shadow variables
+    // in case of point process mechanism.
+    // Note: shadow variables are not used at the moment because reduction will be taken care
+    // by LLVM backend (even on CPU via sequential add like ISPC).
+    add_var_with_type(naming::NTHREAD_RHS, FLOAT_TYPE, /*is_pointer=*/1);
+    add_var_with_type(naming::NTHREAD_D, FLOAT_TYPE, /*is_pointer=*/1);
+    add_var_with_type(naming::NTHREAD_RHS_SHADOW, FLOAT_TYPE, /*is_pointer=*/1);
+    add_var_with_type(naming::NTHREAD_D_SHADOW, FLOAT_TYPE, /*is_pointer=*/1);
+
     return std::make_shared<ast::InstanceStruct>(codegen_vars);
 }
 
+/**
+ * Append all code specific statements from StatementBlock to given StatementVector
+ * @param statements Statement vector to which statements to be added
+ * @param block Statement block from which statetments should be appended
+ * @param info CodegenInfo object with necessary data and helper functions
+ */
 static void append_statements_from_block(ast::StatementVector& statements,
-                                         const std::shared_ptr<ast::StatementBlock>& block) {
-    const auto& block_statements = block->get_statements();
-    for (const auto& statement: block_statements) {
-        const auto& expression_statement = std::dynamic_pointer_cast<ast::ExpressionStatement>(
-            statement);
-        if (!expression_statement || !expression_statement->get_expression()->is_solve_block())
-            statements.push_back(statement);
+                                         const std::shared_ptr<ast::StatementBlock> block,
+                                         const codegen::CodegenInfo& info) {
+    for (const auto& statement: block->get_statements()) {
+        if (!info.statement_to_skip(*statement)) {
+            statements.emplace_back(statement->clone());
+        }
     }
 }
 
+/**
+ * Create atomic statement for given expression of the form a[i] += expression
+ * @param var Name of the variable on the LHS (it's an array), e.g. `a`
+ * @param var_index Name of the index variable to access variable `var` e.g. `i`
+ * @param op_str Operators like += or -=
+ * @param rhs_str expression that will be added or subtracted from `var[var_index]`
+ * @return A statement representing atomic operation using `ast::CodegenAtomicStatement`
+ */
 static std::shared_ptr<ast::CodegenAtomicStatement> create_atomic_statement(
-    std::string& ion_varname,
-    std::string& index_varname,
-    std::string& op_str,
-    std::string& rhs_str) {
+    const std::string& var,
+    const std::string& var_index,
+    const std::string& op_str,
+    const std::string& rhs_str) {
     // create lhs expression
-    auto varname = new ast::Name(new ast::String(ion_varname));
-    auto index = new ast::Name(new ast::String(index_varname));
-    auto lhs = std::make_shared<ast::VarName>(new ast::IndexedName(varname, index),
-                                              /*at=*/nullptr,
-                                              /*index=*/nullptr);
-
-    auto op = ast::BinaryOperator(ast::string_to_binaryop(op_str));
-    auto rhs = create_expression(rhs_str);
-    return std::make_shared<ast::CodegenAtomicStatement>(lhs, op, rhs);
+    auto varname = new ast::Name(new ast::String(var));
+    auto index = new ast::Name(new ast::String(var_index));
+    auto lhs = new ast::VarName(new ast::IndexedName(varname, index),
+                                /*at=*/nullptr,
+                                /*index=*/nullptr);
+
+    // LLVM IR generation is now only supporting assignment (=) and not += or -=
+    // So we need to write increment operation a += b as an assignment operation
+    // a = a + b.
+    // See https://github.com/BlueBrain/nmodl/issues/851
+
+    std::string op(op_str);
+    stringutils::remove_character(op, '=');
+
+    // make sure only + or - operator is used
+    if (op_str != "-" && op_str != "+") {
+        throw std::runtime_error("Unsupported binary operator for atomic statement");
+    }
+
+    auto* rhs = create_expression("{}[{}] {} {} "_format(var, var_index, op, rhs_str));
+    return std::make_shared<ast::CodegenAtomicStatement>(lhs,
+                                                         ast::BinaryOperator{ast::BOP_ASSIGN},
+                                                         rhs);
 }
 
 /**
@@ -289,7 +326,7 @@ static std::shared_ptr<ast::CodegenAtomicStatement> create_atomic_statement(
  * @param type The type of code block being generated
  * @param int_variables Index variables to be created
  * @param double_variables Floating point variables to be created
- * @param index_statements Statements for loading indexes (typically for ions)
+ * @param index_statements Statements for loading indexes (typically for ions, rhs, d)
  * @param body_statements main compute/update statements
  *
  * \todo After looking into mod2c and neuron implementation, it seems like
@@ -379,8 +416,24 @@ void CodegenLLVMHelperVisitor::ion_write_statements(BlockType type,
         // push index definition, index statement and actual write statement
         int_variables.push_back(index_varname);
         index_statements.push_back(visitor::create_statement(index_statement));
+
         // pass ion variable to write and its index
-        body_statements.push_back(create_atomic_statement(ion_varname, index_varname, op, rhs));
+
+        // lhs variable
+        std::string lhs = "{}[{}] "_format(ion_varname, index_varname);
+
+        // lets turn a += b into a = a + b if applicable
+        // note that this is done in order to facilitate existing implementation in the llvm
+        // backend which doesn't support += or -= operators.
+        std::string statement;
+        if (!op.compare("+=")) {
+            statement = "{} = {} + {}"_format(lhs, lhs, rhs);
+        } else if (!op.compare("-=")) {
+            statement = "{} = {} - {}"_format(lhs, lhs, rhs);
+        } else {
+            statement = "{} {} {}"_format(lhs, op, rhs);
+        }
+        body_statements.push_back(visitor::create_statement(statement));
     };
 
     /// iterate over all ions and create write ion statements for given block type
@@ -399,7 +452,7 @@ void CodegenLLVMHelperVisitor::ion_write_statements(BlockType type,
                     // for synapse type
                     if (info.point_process) {
                         auto area = codegen::naming::NODE_AREA_VARIABLE;
-                        rhs += fmt::format("*(1.e2/{})", area);
+                        rhs += fmt::format("*(1.e2/{0}[{0}_id])", area);
                     }
                     create_write_statements(lhs, op, rhs);
                 }
@@ -629,19 +682,17 @@ std::shared_ptr<ast::Expression> CodegenLLVMHelperVisitor::loop_count_expression
  * create new code generation function.
  */
 void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
-    /// statements for new function to be generated
-    ast::StatementVector function_statements;
-
-    /// create vectors of local variables that would be used in compute part
+    // create vectors of local variables that would be used in compute part
     std::vector<std::string> int_variables{"node_id"};
     std::vector<std::string> double_variables{"v"};
 
-    /// create now main compute part
-
-    /// compute body : initialization + solve blocks
-    ast::StatementVector def_statements;
+    // statements to load indexes for gather/scatter like variables
     ast::StatementVector index_statements;
+
+    // statements for the main body of nrn_state
     ast::StatementVector body_statements;
+
+    // prepare main body of the compute function
     {
         /// access node index and corresponding voltage
         index_statements.push_back(
@@ -658,13 +709,13 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
             const auto& solution = std::dynamic_pointer_cast<ast::SolutionExpression>(statement);
             const auto& block = std::dynamic_pointer_cast<ast::StatementBlock>(
                 solution->get_node_to_solve());
-            append_statements_from_block(body_statements, block);
+            append_statements_from_block(body_statements, block, info);
         }
 
         /// add breakpoint block if no current
         if (info.currents.empty() && info.breakpoint_node != nullptr) {
             auto block = info.breakpoint_node->get_statement_block();
-            append_statements_from_block(body_statements, block);
+            append_statements_from_block(body_statements, block, info);
         }
 
         /// write ion statements
@@ -676,10 +727,12 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
 
     /// create target-specific compute body
     ast::StatementVector compute_body;
-    compute_body.insert(compute_body.end(), def_statements.begin(), def_statements.end());
     compute_body.insert(compute_body.end(), index_statements.begin(), index_statements.end());
     compute_body.insert(compute_body.end(), body_statements.begin(), body_statements.end());
 
+    /// statements for new function to be generated
+    ast::StatementVector function_statements;
+
     std::vector<std::string> induction_variables{naming::INDUCTION_VAR};
     function_statements.push_back(
         create_local_variable_statement(induction_variables, INTEGER_TYPE));
@@ -698,9 +751,8 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     auto name = new ast::Name(new ast::String(function_name));
     auto return_type = new ast::CodegenVarType(ast::AstNodeType::VOID);
 
-    /// \todo : currently there are no arguments
+    // argument to function: currently only instance structure
     ast::CodegenVarWithTypeVector code_arguments;
-
     auto instance_var_type = new ast::CodegenVarType(ast::AstNodeType::INSTANCE_STRUCT);
     auto instance_var_name = new ast::Name(new ast::String(naming::MECH_INSTANCE_VAR));
     auto instance_var = new ast::CodegenVarWithType(instance_var_type, 1, instance_var_name);
@@ -711,7 +763,8 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
         std::make_shared<ast::CodegenFunction>(return_type, name, code_arguments, function_block);
     codegen_functions.push_back(function);
 
-    std::cout << nmodl::to_nmodl(function) << std::endl;
+    // todo: remove this, temporary
+    std::cout << nmodl::to_nmodl(*function) << std::endl;
 }
 
 void CodegenLLVMHelperVisitor::create_gpu_compute_body(ast::StatementVector& body,
@@ -804,6 +857,281 @@ void CodegenLLVMHelperVisitor::remove_inlined_nodes(ast::Program& node) {
     node.erase_node(nodes_to_erase);
 }
 
+/**
+ * Print `nrn_cur` kernel with`CONDUCTANCE` statements in the BREAKPOINT block
+ * @param node Ast node representing BREAKPOINT block
+ * @param int_variables Vector of integer variables in the kernel being generated
+ * @param double_variables Vector of double variables in the kernel being generated
+ * @param index_statements Statements for loading indexes (typically for ions, rhs, d)
+ * @param body_statements Vector of statements representing loop body of the `nrn_cur` kernel
+ */
+void CodegenLLVMHelperVisitor::print_nrn_cur_conductance_kernel(
+    const ast::BreakpointBlock& node,
+    std::vector<std::string>& int_variables,
+    std::vector<std::string>& double_variables,
+    ast::StatementVector& index_statements,
+    ast::StatementVector& body_statements) {
+    // TODO: this is not used by default but only with sympy --conductance option. This should be
+    // implemented later and hence just throw an error for now.
+    throw std::runtime_error(
+        "BREAKPOINT block with CONDUCTANCE statements is not supported in the LLVM backend yet");
+}
+
+/**
+ * Print `nrn_current` function that is typically generated as part of `nrn_cur()`
+ * @param node Ast node representing BREAKPOINT block
+ * @param body_statements Vector of statements representing loop body of the `nrn_cur` kernel
+ * @param variable Variable to which computed current will be assigned
+ */
+void CodegenLLVMHelperVisitor::print_nrn_current_body(const ast::BreakpointBlock& node,
+                                                      ast::StatementVector& body_statements,
+                                                      const std::string& variable) {
+    ast::StatementVector statements;
+
+    // starts with current initialized to 0
+    statements.emplace_back(visitor::create_statement("current = 0"));
+
+    // append compatible code statements from the breakpoint block
+    append_statements_from_block(statements, node.get_statement_block(), info);
+
+    // sum now all currents
+    for (auto& current: info.currents) {
+        statements.emplace_back(
+            visitor::create_statement("current = current + {}"_format(current)));
+    }
+
+    // assign computed current to the given variable
+    statements.emplace_back(visitor::create_statement("{} = current"_format(variable)));
+
+    // create StatementBlock for better readability of the generated code and add that to the main
+    // body statements
+    body_statements.emplace_back(new ast::ExpressionStatement(new ast::StatementBlock(statements)));
+}
+
+/**
+ * Print `nrn_cur` kernel without `CONDUCTANCE` statements in the BREAKPOINT block
+ * @param node Ast node representing BREAKPOINT block
+ * @param int_variables Vector of integer variables in the kernel being generated
+ * @param double_variables Vector of double variables in the kernel being generated
+ * @param index_statements Statements for loading indexes (typically for ions, rhs, d)
+ * @param body_statements Vector of statements representing loop body of the `nrn_cur` kernel
+ */
+void CodegenLLVMHelperVisitor::print_nrn_cur_non_conductance_kernel(
+    const ast::BreakpointBlock& node,
+    std::vector<std::string>& int_variables,
+    std::vector<std::string>& double_variables,
+    ast::StatementVector& index_statements,
+    ast::StatementVector& body_statements) {
+    // add double variables needed in the local scope
+    double_variables.emplace_back("g");
+    double_variables.emplace_back("rhs");
+    double_variables.emplace_back("v_org");
+    double_variables.emplace_back("current");
+
+    // store original voltage value as we are going to calculate current with v + 0.001
+    body_statements.emplace_back(visitor::create_statement("v_org = v"));
+
+    // first current calculation with v+0.001 and assign it to variable g
+    body_statements.emplace_back(visitor::create_statement("v = v + 0.001"));
+    print_nrn_current_body(node, body_statements, "g");
+
+    // now store all ionic currents to local variable
+    for (const auto& ion: info.ions) {
+        for (const auto& var: ion.writes) {
+            if (ion.is_ionic_current(var)) {
+                // also create local variable
+                std::string name{"di{}"_format(ion.name)};
+                double_variables.emplace_back(name);
+                body_statements.emplace_back(
+                    visitor::create_statement("{} = {}"_format(name, var)));
+            }
+        }
+    }
+
+    // now restore original v and calculate current and store it in rhs
+    body_statements.emplace_back(visitor::create_statement("v = v_org"));
+    print_nrn_current_body(node, body_statements, "rhs");
+
+    // calculate g
+    body_statements.emplace_back(visitor::create_statement("g = (g-rhs)/0.001"));
+
+    // in case of point process we need to load area from another vector.
+    if (info.point_process) {
+        // create integer variable for index and then load value from area_index vector
+        int_variables.emplace_back("{}_id"_format(naming::NODE_AREA_VARIABLE));
+        index_statements.emplace_back(visitor::create_statement(
+            " {0}_id = {0}_index[id]"_format(naming::NODE_AREA_VARIABLE)));
+    }
+
+    // update all ionic currents now
+    for (const auto& ion: info.ions) {
+        for (const auto& var: ion.writes) {
+            if (ion.is_ionic_current(var)) {
+                // variable on the lhs
+                std::string lhs{"{}di{}dv"_format(naming::ION_VARNAME_PREFIX, ion.name)};
+
+                // expression on the rhs
+                std::string rhs{"(di{}-{})/0.001"_format(ion.name, var)};
+                if (info.point_process) {
+                    rhs += "*1.e2/{0}[{0}_id]"_format(naming::NODE_AREA_VARIABLE);
+                }
+
+                // load the index for lhs variable
+                int_variables.emplace_back(lhs + "_id");
+                std::string index_statement{"{}_id = {}_index[id]"_format(lhs, lhs)};
+                index_statements.emplace_back(visitor::create_statement(index_statement));
+
+                // add statement that actually updates the
+                body_statements.emplace_back(
+                    visitor::create_statement("{0}[{0}_id] = {0}[{0}_id] + {1}"_format(lhs, rhs)));
+            }
+        }
+    }
+}
+
+/**
+ * \brief Convert ast::BreakpointBlock to corresponding code generation function nrn_cur
+ * @param node AST node representing ast::BreakpointBlock
+ *
+ * The BREAKPOINT block from MOD file (ast::NrnStateBlock node in the AST) is converted
+ * to `nrn_cur` function in the generated CPP code via various transformations. Here we
+ * perform those transformations and create new codegen node in the AST.
+ */
+void CodegenLLVMHelperVisitor::visit_breakpoint_block(ast::BreakpointBlock& node) {
+    // no-op in case there are no currents or breakpoint block doesn't exist
+    if (!info.nrn_cur_required()) {
+        return;
+    }
+
+    /// local variables in the function scope for integer and double variables
+    std::vector<std::string> int_variables{"node_id"};
+    std::vector<std::string> double_variables{"v"};
+
+    /// statements to load indexes for gather/scatter like expressions
+    ast::StatementVector index_statements;
+
+    /// statements for the rest of compute body
+    ast::StatementVector body_statements;
+
+    /// prepare all function statements
+    {
+        /// access node index and corresponding voltage
+        index_statements.push_back(
+            visitor::create_statement("node_id = node_index[{}]"_format(naming::INDUCTION_VAR)));
+        body_statements.push_back(visitor::create_statement("v = {}[node_id]"_format(VOLTAGE_VAR)));
+
+        /// read ion variables
+        ion_read_statements(BlockType::Equation,
+                            int_variables,
+                            double_variables,
+                            index_statements,
+                            body_statements);
+
+        /// print main current kernel based on conductance exist of not
+        if (info.conductances.empty()) {
+            print_nrn_cur_non_conductance_kernel(
+                node, int_variables, double_variables, index_statements, body_statements);
+        } else {
+            print_nrn_cur_conductance_kernel(
+                node, int_variables, double_variables, index_statements, body_statements);
+        }
+
+        /// add write ion statements
+        ion_write_statements(BlockType::Equation,
+                             int_variables,
+                             double_variables,
+                             index_statements,
+                             body_statements);
+
+        /// in case of point process, we have to scale values based on the area
+        if (info.point_process) {
+            double_variables.emplace_back("mfactor");
+            body_statements.emplace_back(visitor::create_statement(
+                "mfactor = 1.e2/{0}[{0}_id]"_format(naming::NODE_AREA_VARIABLE)));
+            body_statements.emplace_back(visitor::create_statement("g = g*mfactor"));
+            body_statements.emplace_back(visitor::create_statement("rhs = rhs*mfactor"));
+        }
+
+        /// as multiple point processes can exist at same node, with simd or gpu execution we have
+        /// to create atomic statements that will be handled by llvm ir generation
+        // \todo note that we are not creating rhs and d updates based on the shadow vectors. This
+        //       is because llvm backend for cpu as well as gpu is going to take care for
+        //       reductions. if these codegen functions will be used for C backend then we will need
+        //       to implement separate reduction loop like mod2c or nmodl's c backend.
+        if (info.point_process && (platform.is_gpu() || platform.is_cpu_with_simd())) {
+            body_statements.emplace_back(create_atomic_statement(
+                naming::NTHREAD_RHS, "node_id", info.operator_for_rhs(), "rhs"));
+            body_statements.emplace_back(create_atomic_statement(
+                naming::NTHREAD_D, "node_id", info.operator_for_rhs(), "g"));
+        } else {
+            auto rhs_op(info.operator_for_rhs());
+            auto d_op(info.operator_for_d());
+
+            // convert a += b to a = a + b, see BlueBrain/nmodl/issues/851
+            // hence write update of rhs and de in the form of assignment statements
+            stringutils::remove_character(rhs_op, '=');
+            stringutils::remove_character(d_op, '=');
+
+            body_statements.emplace_back(visitor::create_statement(
+                "vec_rhs[node_id] = vec_rhs[node_id] {} rhs"_format(rhs_op)));
+            body_statements.emplace_back(
+                visitor::create_statement("vec_d[node_id] = vec_d[node_id] {} g"_format(d_op)));
+        }
+    }
+
+    /// now create codegen function
+    {
+        /// compute body, index loading statements at the begining and then compute functions
+        ast::StatementVector compute_body;
+        compute_body.insert(compute_body.end(), index_statements.begin(), index_statements.end());
+        compute_body.insert(compute_body.end(), body_statements.begin(), body_statements.end());
+
+        /// statements for new function to be generated
+        ast::StatementVector function_statements;
+
+        std::vector<std::string> induction_variables{naming::INDUCTION_VAR};
+        function_statements.push_back(
+            create_local_variable_statement(induction_variables, INTEGER_TYPE));
+
+        if (platform.is_gpu()) {
+            create_gpu_compute_body(compute_body,
+                                    function_statements,
+                                    int_variables,
+                                    double_variables);
+        } else {
+            create_cpu_compute_body(compute_body,
+                                    function_statements,
+                                    int_variables,
+                                    double_variables);
+        }
+
+        /// new block for the function
+        auto function_block = new ast::StatementBlock(function_statements);
+
+        /// name of the function and it's return type
+        std::string function_name = "nrn_cur_" + stringutils::tolower(info.mod_suffix);
+        auto name = new ast::Name(new ast::String(function_name));
+        auto return_type = new ast::CodegenVarType(ast::AstNodeType::VOID);
+
+        /// only instance struct as an argument for now
+        ast::CodegenVarWithTypeVector code_arguments;
+        auto instance_var_type = new ast::CodegenVarType(ast::AstNodeType::INSTANCE_STRUCT);
+        auto instance_var_name = new ast::Name(new ast::String(naming::MECH_INSTANCE_VAR));
+        auto instance_var = new ast::CodegenVarWithType(instance_var_type, 1, instance_var_name);
+        code_arguments.emplace_back(instance_var);
+
+        /// finally, create new function
+        auto function = std::make_shared<ast::CodegenFunction>(return_type,
+                                                               name,
+                                                               code_arguments,
+                                                               function_block);
+        codegen_functions.push_back(function);
+
+        // todo: remove this, temporary
+        std::cout << nmodl::to_nmodl(*function) << std::endl;
+    }
+}
+
 void CodegenLLVMHelperVisitor::visit_program(ast::Program& node) {
     /// run codegen helper visitor to collect information
     CodegenHelperVisitor v;
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index aea2f5aea8..ab554521fc 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -167,6 +167,13 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     void visit_procedure_block(ast::ProcedureBlock& node) override;
     void visit_function_block(ast::FunctionBlock& node) override;
     void visit_nrn_state_block(ast::NrnStateBlock& node) override;
+
+    /**
+     * \brief Convert ast::BreakpointBlock to corresponding code generation function nrn_cur
+     * @param node AST node representing ast::BreakpointBlock
+     */
+    void visit_breakpoint_block(ast::BreakpointBlock& node) override;
+
     void visit_program(ast::Program& node) override;
 
   private:
@@ -195,6 +202,20 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
                                   std::vector<std::string>& int_variables,
                                   std::vector<std::string>& double_variables,
                                   bool is_remainder_loop = false);
+
+    void print_nrn_current_body(const ast::BreakpointBlock& node,
+                                ast::StatementVector& body_statements,
+                                const std::string& variable);
+    void print_nrn_cur_non_conductance_kernel(const ast::BreakpointBlock& node,
+                                              std::vector<std::string>& int_variables,
+                                              std::vector<std::string>& double_variables,
+                                              ast::StatementVector& index_statements,
+                                              ast::StatementVector& body_statements);
+    void print_nrn_cur_conductance_kernel(const ast::BreakpointBlock& node,
+                                          std::vector<std::string>& int_variables,
+                                          std::vector<std::string>& double_variables,
+                                          ast::StatementVector& index_statements,
+                                          ast::StatementVector& body_statements);
 };
 
 /** @} */  // end of llvm_codegen_details
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 8dfb66e9e0..9e159f7aff 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -1098,5 +1098,11 @@ void CodegenLLVMVisitor::visit_while_statement(const ast::WhileStatement& node)
     ir_builder.set_insertion_point(exit);
 }
 
+// for the llvm backend we only support breakpoint and derivative blocks
+void CodegenLLVMVisitor::print_compute_functions() {
+    print_nrn_cur();
+    print_nrn_state();
+}
+
 }  // namespace codegen
 }  // namespace nmodl
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index a7af83721c..0862307337 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -299,6 +299,9 @@ class CodegenLLVMVisitor: public CodegenCVisitor {
     /// the kernel.
     void wrap_kernel_functions();
 
+    /// print compute functions relevant for this backend
+    void print_compute_functions() override;
+
   private:
     // Annotates kernel function with NVVM metadata.
     void annotate_kernel_with_nvvm(llvm::Function* kernel);
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index b99cc81817..82cb820049 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -59,9 +59,14 @@ llvm::Type* IRBuilder::get_void_type() {
 
 llvm::Type* IRBuilder::get_struct_ptr_type(const std::string& struct_type_name,
                                            TypeVector& member_types) {
-    llvm::StructType* llvm_struct_type = llvm::StructType::create(builder.getContext(),
-                                                                  struct_type_name);
-    llvm_struct_type->setBody(member_types);
+    llvm::StructType* llvm_struct_type = llvm::StructType::getTypeByName(builder.getContext(),
+                                                                         struct_type_name);
+
+    if (!llvm_struct_type) {
+        llvm_struct_type = llvm::StructType::create(builder.getContext(), struct_type_name);
+        llvm_struct_type->setBody(member_types);
+    }
+
     return llvm::PointerType::get(llvm_struct_type, /*AddressSpace=*/0);
 }
 
diff --git a/src/language/nmodl.yaml b/src/language/nmodl.yaml
index ef8fbbe49c..4999dc87a1 100644
--- a/src/language/nmodl.yaml
+++ b/src/language/nmodl.yaml
@@ -1573,11 +1573,28 @@
                                 \sa nmodl::visitor::SympyConductanceVisitor
 
                   - ExpressionStatement:
-                      brief: "TODO"
                       members:
                         - expression:
-                            brief: "TODO"
+                            brief: "An expression representing a construct in the mod file"
                             type: Expression
+                      brief: "Represent statement encpasulated by underlying expression of ast node typeExpression"
+                      description: |
+                                Certain statements defined in the NMODL are complex than typical "single line" statements.
+                                For example, often SOLVE block is written as:
+
+                                    SOLVE states METHOD cnexp
+
+                                but language allow it to be more complex as:
+
+                                    SOLVE states METHOD cnexp {
+                                        statement_1
+                                        statement_2
+                                    }
+
+                                So this type of construct is not really "single line" statement. There are other such cases
+                                where they are categorised as "statement" in the bison specification. Also, there are cases
+                                when a binary expression `a = b` is also a full statement..
+                                In all such cases we wrap underlying expression as statement using ExpressionStatement node.
 
                   - ProtectStatement:
                       brief: "TODO"
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index f1c30e0a9f..b7b8c2268c 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -288,6 +288,7 @@ SCENARIO("Simple scalar kernel", "[llvm][runner]") {
                 v
                 x0
                 x1
+                i (mA/cm2)
             }
 
             BREAKPOINT {
@@ -371,6 +372,7 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
                 v
                 x0
                 x1
+                i (mA/cm2)
             }
 
             BREAKPOINT {
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 3edc6c198f..818e4104fe 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -951,10 +951,16 @@ SCENARIO("Scalar state kernel", "[visitor][llvm]") {
                 m
             }
 
+            PARAMETER {
+                gl = .0003 (S/cm2)  <0,1e9>
+                el = -54.3 (mV)
+            }
+
             ASSIGNED {
                 v (mV)
                 minf
                 mtau (ms)
+                il (mA/cm2)
             }
 
             BREAKPOINT {
@@ -974,7 +980,8 @@ SCENARIO("Scalar state kernel", "[visitor][llvm]") {
             // Check the struct type with correct attributes and the kernel declaration.
             std::regex struct_type(
                 "%.*__instance_var__type = type \\{ double\\*, double\\*, double\\*, double\\*, "
-                "double\\*, double\\*, double\\*, i32\\*, double, double, double, i32, i32 \\}");
+                "double\\*, double\\*, double\\*, double\\*, double\\*, double\\*, i32\\*, double, "
+                "double, double, i32, i32, double\\*, double\\*, double\\*, double\\* \\}");
             std::regex kernel_declaration(
                 R"(define void @nrn_state_hh\(%.*__instance_var__type\* noalias nocapture readonly .*\) #0)");
             REQUIRE(std::regex_search(module_string, m, struct_type));
@@ -1063,6 +1070,7 @@ SCENARIO("Vectorised simple kernel", "[visitor][llvm]") {
 
             ASSIGNED {
                 v (mV)
+                i (mA/cm2)
             }
 
             BREAKPOINT {
@@ -1251,7 +1259,7 @@ SCENARIO("Scalar derivative block", "[visitor][llvm][derivative]") {
             }
         )";
 
-        std::string expected_loop = R"(
+        std::string expected_state_loop = R"(
             for(id = 0; id<mech->node_count; id = id+1) {
                 node_id = mech->node_index[id]
                 v = mech->voltage[node_id]
@@ -1263,10 +1271,10 @@ SCENARIO("Scalar derivative block", "[visitor][llvm][derivative]") {
             auto result = run_llvm_visitor_helper(nmodl_text,
                                                   default_platform,
                                                   {ast::AstNodeType::CODEGEN_FOR_STATEMENT});
-            REQUIRE(result.size() == 1);
+            REQUIRE(result.size() == 2);
 
-            auto main_loop = reindent_text(to_nmodl(result[0]));
-            REQUIRE(main_loop == reindent_text(expected_loop));
+            auto main_state_loop = reindent_text(to_nmodl(result[1]));
+            REQUIRE(main_state_loop == reindent_text(expected_state_loop));
         }
     }
 }
@@ -1276,39 +1284,86 @@ SCENARIO("Vectorised derivative block", "[visitor][llvm][derivative]") {
         std::string nmodl_text = R"(
             NEURON {
                 SUFFIX hh
+                USEION na READ ena WRITE ina
                 NONSPECIFIC_CURRENT il
-                RANGE minf, mtau
+                RANGE minf, mtau, gna, gnabar
             }
             STATE {
-                m
+                m h
+            }
+            PARAMETER {
+                gnabar = .12 (S/cm2) <0,1e9>
             }
             ASSIGNED {
                 v (mV)
                 minf
                 mtau (ms)
+                ena (mV)
+                ina (mA/cm2)
+                gna (S/cm2)
             }
             BREAKPOINT {
                 SOLVE states METHOD cnexp
-                il = 2
+                gna = gnabar*m*m*m*h
+                ina = gna*(v - ena)
             }
             DERIVATIVE states {
                 m = (minf-m)/mtau
             }
         )";
 
-        std::string expected_main_loop = R"(
+        std::string expected_state_main_loop = R"(
             for(id = 0; id<mech->node_count-7; id = id+8) {
                 node_id = mech->node_index[id]
+                ena_id = mech->ion_ena_index[id]
                 v = mech->voltage[node_id]
+                mech->ena[id] = mech->ion_ena[ena_id]
                 mech->m[id] = (mech->minf[id]-mech->m[id])/mech->mtau[id]
             })";
-        std::string expected_epilogue_loop = R"(
+
+        std::string expected_state_epilogue_loop = R"(
             for(; id<mech->node_count; id = id+1) {
                 epilogue_node_id = mech->node_index[id]
+                epilogue_ena_id = mech->ion_ena_index[id]
                 epilogue_v = mech->voltage[epilogue_node_id]
+                mech->ena[id] = mech->ion_ena[epilogue_ena_id]
                 mech->m[id] = (mech->minf[id]-mech->m[id])/mech->mtau[id]
             })";
 
+        std::string expected_cur_main_loop = R"(
+            for(id = 0; id<mech->node_count-7; id = id+8) {
+                node_id = mech->node_index[id]
+                ena_id = mech->ion_ena_index[id]
+                ion_dinadv_id = mech->ion_dinadv_index[id]
+                ion_ina_id = mech->ion_ina_index[id]
+                v = mech->voltage[node_id]
+                mech->ena[id] = mech->ion_ena[ena_id]
+                v_org = v
+                v = v+0.001
+                {
+                    current = 0
+                    mech->gna[id] = mech->gnabar[id]*mech->m[id]*mech->m[id]*mech->m[id]*mech->h[id]
+                    mech->ina[id] = mech->gna[id]*(v-mech->ena[id])
+                    current = current+il
+                    current = current+mech->ina[id]
+                    g = current
+                }
+                dina = mech->ina[id]
+                v = v_org
+                {
+                    current = 0
+                    mech->gna[id] = mech->gnabar[id]*mech->m[id]*mech->m[id]*mech->m[id]*mech->h[id]
+                    mech->ina[id] = mech->gna[id]*(v-mech->ena[id])
+                    current = current+il
+                    current = current+mech->ina[id]
+                    rhs = current
+                }
+                g = (g-rhs)/0.001
+                mech->ion_dinadv[ion_dinadv_id] = mech->ion_dinadv[ion_dinadv_id]+(dina-mech->ina[id])/0.001
+                mech->ion_ina[ion_ina_id] = mech->ion_ina[ion_ina_id]+mech->ina[id]
+                mech->vec_rhs[node_id] = mech->vec_rhs[node_id]-rhs
+                mech->vec_d[node_id] = mech->vec_d[node_id]+g
+            })";
 
         THEN("vector and epilogue scalar loops are constructed") {
             codegen::Platform simd_platform(/*use_single_precision=*/false,
@@ -1316,13 +1371,16 @@ SCENARIO("Vectorised derivative block", "[visitor][llvm][derivative]") {
             auto result = run_llvm_visitor_helper(nmodl_text,
                                                   simd_platform,
                                                   {ast::AstNodeType::CODEGEN_FOR_STATEMENT});
-            REQUIRE(result.size() == 2);
+            REQUIRE(result.size() == 4);
+
+            auto cur_main_loop = reindent_text(to_nmodl(result[0]));
+            REQUIRE(cur_main_loop == reindent_text(expected_cur_main_loop));
 
-            auto main_loop = reindent_text(to_nmodl(result[0]));
-            REQUIRE(main_loop == reindent_text(expected_main_loop));
+            auto state_main_loop = reindent_text(to_nmodl(result[2]));
+            REQUIRE(state_main_loop == reindent_text(expected_state_main_loop));
 
-            auto epilogue_loop = reindent_text(to_nmodl(result[1]));
-            REQUIRE(epilogue_loop == reindent_text(expected_epilogue_loop));
+            auto state_epilogue_loop = reindent_text(to_nmodl(result[3]));
+            REQUIRE(state_epilogue_loop == reindent_text(expected_state_epilogue_loop));
         }
     }
 }
@@ -1343,6 +1401,7 @@ SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") {
             }
             ASSIGNED {
                 v (mV)
+                il (mA/cm2)
             }
             BREAKPOINT {
                 SOLVE states METHOD cnexp
diff --git a/test/unit/codegen/codegen_llvm_visitor.cpp b/test/unit/codegen/codegen_llvm_visitor.cpp
index d2a058b3c5..1906d0d27c 100644
--- a/test/unit/codegen/codegen_llvm_visitor.cpp
+++ b/test/unit/codegen/codegen_llvm_visitor.cpp
@@ -13,6 +13,7 @@
 #include "config/config.h"
 #include "parser/nmodl_driver.hpp"
 #include "test/unit/utils/test_utils.hpp"
+#include "visitors/inline_visitor.hpp"
 #include "visitors/neuron_solve_visitor.hpp"
 #include "visitors/solve_block_visitor.hpp"
 #include "visitors/symtab_visitor.hpp"
@@ -49,6 +50,23 @@ std::string get_wrapper_instance_struct(const std::string& nmodl_text) {
     return strbuf.str();
 }
 
+// Run LLVM codegen helper visitor with given platform as target
+static std::vector<std::shared_ptr<ast::Ast>> run_llvm_visitor_helper(
+    const std::string& text,
+    codegen::Platform& platform,
+    const std::vector<ast::AstNodeType>& nodes_to_collect) {
+    NmodlDriver driver;
+    const auto& ast = driver.parse_string(text);
+
+    SymtabVisitor().visit_program(*ast);
+    InlineVisitor().visit_program(*ast);
+    NeuronSolveVisitor().visit_program(*ast);
+    SolveBlockVisitor().visit_program(*ast);
+    CodegenLLVMHelperVisitor(platform).visit_program(*ast);
+
+    return collect_nodes(*ast, nodes_to_collect);
+}
+
 SCENARIO("Check instance struct declaration and setup in wrapper",
          "[codegen][llvm][instance_struct]") {
     GIVEN("hh: simple mod file") {
@@ -158,6 +176,10 @@ SCENARIO("Check instance struct declaration and setup in wrapper",
                 double celsius;
                 int secondorder;
                 int node_count;
+                double* __restrict__ vec_rhs;
+                double* __restrict__ vec_d;
+                double* __restrict__ _shadow_rhs;
+                double* __restrict__ _shadow_d;
             };
         )";
         std::string generated_instance_struct_setup = R"(
@@ -213,10 +235,9 @@ SCENARIO("Check instance struct declaration and setup in wrapper",
             }
         )";
 
-        THEN("index and nt variables") {
+        THEN("index and nt variables created correctly") {
             auto result_instance_struct_declaration_setup = reindent_text(
                 get_wrapper_instance_struct(nmodl_text));
-            std::cout << "Result\n" << result_instance_struct_declaration_setup << std::endl;
 
             auto expected_instance_struct_declaration = reindent_text(
                 generated_instance_struct_declaration);
@@ -229,3 +250,382 @@ SCENARIO("Check instance struct declaration and setup in wrapper",
         }
     }
 }
+
+
+SCENARIO("Channel: Derivative and breakpoint block llvm transformations",
+         "[visitor][llvm_helper][channel]") {
+    GIVEN("A hh.mod file with derivative and breakpoint block") {
+        std::string nmodl_text = R"(
+            TITLE hh.mod   squid sodium, potassium, and leak channels
+
+            UNITS {
+                (mA) = (milliamp)
+                (mV) = (millivolt)
+                (S) = (siemens)
+            }
+
+            NEURON {
+                SUFFIX hh
+                USEION na READ ena WRITE ina
+                USEION k READ ek WRITE ik
+                NONSPECIFIC_CURRENT il
+                RANGE gnabar, gkbar, gl, el, gna, gk
+                RANGE minf, hinf, ninf, mtau, htau, ntau
+                THREADSAFE
+            }
+
+            PARAMETER {
+                gnabar = .12 (S/cm2) <0,1e9>
+                gkbar = .036 (S/cm2) <0,1e9>
+                gl = .0003 (S/cm2) <0,1e9>
+                el = -54.3 (mV)
+            }
+
+            STATE {
+                m
+                h
+                n
+            }
+
+            ASSIGNED {
+                v (mV)
+                celsius (degC)
+                ena (mV)
+                ek (mV)
+                gna (S/cm2)
+                gk (S/cm2)
+                ina (mA/cm2)
+                ik (mA/cm2)
+                il (mA/cm2)
+                minf
+                hinf
+                ninf
+                mtau (ms)
+                htau (ms)
+                ntau (ms)
+            }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+                gna = gnabar*m*m*m*h
+                ina = gna*(v-ena)
+                gk = gkbar*n*n*n*n
+                ik = gk*(v-ek)
+                il = gl*(v-el)
+            }
+
+            DERIVATIVE states {
+                rates(v)
+                m' = (minf-m)/mtau
+                h' = (hinf-h)/htau
+                n' = (ninf-n)/ntau
+            }
+
+            PROCEDURE rates(v(mV)) {
+                LOCAL alpha, beta, sum, q10
+                UNITSOFF
+                q10 = 3^((celsius-6.3)/10)
+                alpha = .1*vtrap(-(v+40), 10)
+                beta = 4*exp(-(v+65)/18)
+                sum = alpha+beta
+                mtau = 1/(q10*sum)
+                minf = alpha/sum
+                alpha = .07*exp(-(v+65)/20)
+                beta = 1/(exp(-(v+35)/10)+1)
+                sum = alpha+beta
+                htau = 1/(q10*sum)
+                hinf = alpha/sum
+                alpha = .01*vtrap(-(v+55), 10)
+                beta = .125*exp(-(v+65)/80)
+                sum = alpha+beta
+                ntau = 1/(q10*sum)
+                ninf = alpha/sum
+            }
+
+            FUNCTION vtrap(x, y) {
+                IF (fabs(x/y)<1e-6) {
+                    vtrap = y*(1-x/y/2)
+                } ELSE {
+                    vtrap = x/(exp(x/y)-1)
+                }
+            }
+        )";
+
+        std::string expected_state_function = R"(
+            VOID nrn_state_hh(INSTANCE_STRUCT *mech){
+                INTEGER id
+                INTEGER node_id, ena_id, ek_id
+                DOUBLE v
+                for(id = 0; id<mech->node_count; id = id+1) {
+                    node_id = mech->node_index[id]
+                    ena_id = mech->ion_ena_index[id]
+                    ek_id = mech->ion_ek_index[id]
+                    v = mech->voltage[node_id]
+                    mech->ena[id] = mech->ion_ena[ena_id]
+                    mech->ek[id] = mech->ion_ek[ek_id]
+                    {
+                        DOUBLE alpha, beta, sum, q10, vtrap_in_0, vtrap_in_1, v_in_0
+                        v_in_0 = v
+                        UNITSOFF
+                        q10 = 3^((mech->celsius-6.3)/10)
+                        {
+                            DOUBLE x_in_0, y_in_0
+                            x_in_0 = -(v_in_0+40)
+                            y_in_0 = 10
+                            IF (fabs(x_in_0/y_in_0)<1e-6) {
+                                vtrap_in_0 = y_in_0*(1-x_in_0/y_in_0/2)
+                            } ELSE {
+                                vtrap_in_0 = x_in_0/(exp(x_in_0/y_in_0)-1)
+                            }
+                        }
+                        alpha = .1*vtrap_in_0
+                        beta = 4*exp(-(v_in_0+65)/18)
+                        sum = alpha+beta
+                        mech->mtau[id] = 1/(q10*sum)
+                        mech->minf[id] = alpha/sum
+                        alpha = .07*exp(-(v_in_0+65)/20)
+                        beta = 1/(exp(-(v_in_0+35)/10)+1)
+                        sum = alpha+beta
+                        mech->htau[id] = 1/(q10*sum)
+                        mech->hinf[id] = alpha/sum
+                        {
+                            DOUBLE x_in_1, y_in_1
+                            x_in_1 = -(v_in_0+55)
+                            y_in_1 = 10
+                            IF (fabs(x_in_1/y_in_1)<1e-6) {
+                                vtrap_in_1 = y_in_1*(1-x_in_1/y_in_1/2)
+                            } ELSE {
+                                vtrap_in_1 = x_in_1/(exp(x_in_1/y_in_1)-1)
+                            }
+                        }
+                        alpha = .01*vtrap_in_1
+                        beta = .125*exp(-(v_in_0+65)/80)
+                        sum = alpha+beta
+                        mech->ntau[id] = 1/(q10*sum)
+                        mech->ninf[id] = alpha/sum
+                    }
+                    mech->m[id] = mech->m[id]+(1.0-exp(mech->dt*((((-1.0)))/mech->mtau[id])))*(-(((mech->minf[id]))/mech->mtau[id])/((((-1.0)))/mech->mtau[id])-mech->m[id])
+                    mech->h[id] = mech->h[id]+(1.0-exp(mech->dt*((((-1.0)))/mech->htau[id])))*(-(((mech->hinf[id]))/mech->htau[id])/((((-1.0)))/mech->htau[id])-mech->h[id])
+                    mech->n[id] = mech->n[id]+(1.0-exp(mech->dt*((((-1.0)))/mech->ntau[id])))*(-(((mech->ninf[id]))/mech->ntau[id])/((((-1.0)))/mech->ntau[id])-mech->n[id])
+                }
+            })";
+
+        std::string expected_cur_function = R"(
+            VOID nrn_cur_hh(INSTANCE_STRUCT *mech){
+                INTEGER id
+                INTEGER node_id, ena_id, ek_id, ion_dinadv_id, ion_dikdv_id, ion_ina_id, ion_ik_id
+                DOUBLE v, g, rhs, v_org, current, dina, dik
+                for(id = 0; id<mech->node_count; id = id+1) {
+                    node_id = mech->node_index[id]
+                    ena_id = mech->ion_ena_index[id]
+                    ek_id = mech->ion_ek_index[id]
+                    ion_dinadv_id = mech->ion_dinadv_index[id]
+                    ion_dikdv_id = mech->ion_dikdv_index[id]
+                    ion_ina_id = mech->ion_ina_index[id]
+                    ion_ik_id = mech->ion_ik_index[id]
+                    v = mech->voltage[node_id]
+                    mech->ena[id] = mech->ion_ena[ena_id]
+                    mech->ek[id] = mech->ion_ek[ek_id]
+                    v_org = v
+                    v = v+0.001
+                    {
+                        current = 0
+                        mech->gna[id] = mech->gnabar[id]*mech->m[id]*mech->m[id]*mech->m[id]*mech->h[id]
+                        mech->ina[id] = mech->gna[id]*(v-mech->ena[id])
+                        mech->gk[id] = mech->gkbar[id]*mech->n[id]*mech->n[id]*mech->n[id]*mech->n[id]
+                        mech->ik[id] = mech->gk[id]*(v-mech->ek[id])
+                        mech->il[id] = mech->gl[id]*(v-mech->el[id])
+                        current = current+mech->il[id]
+                        current = current+mech->ina[id]
+                        current = current+mech->ik[id]
+                        g = current
+                    }
+                    dina = mech->ina[id]
+                    dik = mech->ik[id]
+                    v = v_org
+                    {
+                        current = 0
+                        mech->gna[id] = mech->gnabar[id]*mech->m[id]*mech->m[id]*mech->m[id]*mech->h[id]
+                        mech->ina[id] = mech->gna[id]*(v-mech->ena[id])
+                        mech->gk[id] = mech->gkbar[id]*mech->n[id]*mech->n[id]*mech->n[id]*mech->n[id]
+                        mech->ik[id] = mech->gk[id]*(v-mech->ek[id])
+                        mech->il[id] = mech->gl[id]*(v-mech->el[id])
+                        current = current+mech->il[id]
+                        current = current+mech->ina[id]
+                        current = current+mech->ik[id]
+                        rhs = current
+                    }
+                    g = (g-rhs)/0.001
+                    mech->ion_dinadv[ion_dinadv_id] = mech->ion_dinadv[ion_dinadv_id]+(dina-mech->ina[id])/0.001
+                    mech->ion_dikdv[ion_dikdv_id] = mech->ion_dikdv[ion_dikdv_id]+(dik-mech->ik[id])/0.001
+                    mech->ion_ina[ion_ina_id] = mech->ion_ina[ion_ina_id]+mech->ina[id]
+                    mech->ion_ik[ion_ik_id] = mech->ion_ik[ion_ik_id]+mech->ik[id]
+                    mech->vec_rhs[node_id] = mech->vec_rhs[node_id]-rhs
+                    mech->vec_d[node_id] = mech->vec_d[node_id]+g
+                }
+            })";
+
+        THEN("codegen functions are constructed correctly for density channel") {
+            codegen::Platform simd_platform(/*use_single_precision=*/false,
+                                            /*instruction_width=*/1);
+            auto result = run_llvm_visitor_helper(nmodl_text,
+                                                  simd_platform,
+                                                  {ast::AstNodeType::CODEGEN_FUNCTION});
+            REQUIRE(result.size() == 2);
+
+            auto cur_function = reindent_text(to_nmodl(result[0]));
+            REQUIRE(cur_function == reindent_text(expected_cur_function));
+
+            auto state_function = reindent_text(to_nmodl(result[1]));
+            REQUIRE(state_function == reindent_text(expected_state_function));
+        }
+    }
+}
+
+SCENARIO("Synapse: Derivative and breakpoint block llvm transformations",
+         "[visitor][llvm_helper][derivative]") {
+    GIVEN("A exp2syn.mod file with derivative and breakpoint block") {
+        // note that USEION statement is added just for better code coverage (ionic current)
+        std::string nmodl_text = R"(
+            NEURON {
+                POINT_PROCESS Exp2Syn
+                USEION na READ ena WRITE ina
+                RANGE tau1, tau2, e, i
+                NONSPECIFIC_CURRENT i
+                RANGE g, gna
+            }
+
+            UNITS {
+                (nA) = (nanoamp)
+                (mV) = (millivolt)
+                (uS) = (microsiemens)
+            }
+
+            PARAMETER {
+                tau1 = 0.1 (ms) <1e-9,1e9>
+                tau2 = 10 (ms) <1e-9,1e9>
+                e = 0 (mV)
+            }
+
+            ASSIGNED {
+                v (mV)
+                i (nA)
+                g (uS)
+                gna (S/cm2)
+                factor
+            }
+
+            STATE {
+                A (uS)
+                B (uS)
+            }
+
+            INITIAL {
+                LOCAL tp
+                IF (tau1/tau2>0.9999) {
+                    tau1 = 0.9999*tau2
+                }
+                IF (tau1/tau2<1e-9) {
+                    tau1 = tau2*1e-9
+                }
+                A = 0
+                B = 0
+                tp = (tau1*tau2)/(tau2-tau1)*log(tau2/tau1)
+                factor = -exp(-tp/tau1)+exp(-tp/tau2)
+                factor = 1/factor
+            }
+
+            BREAKPOINT {
+                SOLVE state METHOD cnexp
+                ina = gna*(v-ena)
+                g = B-A
+                i = g*(v-e)
+            }
+
+            DERIVATIVE state {
+                A' = -A/tau1
+                B' = -B/tau2
+            }
+
+            NET_RECEIVE (weight(uS)) {
+                A = A+weight*factor
+                B = B+weight*factor
+            })";
+
+        std::string expected_cur_function = R"(
+            VOID nrn_cur_exp2syn(INSTANCE_STRUCT *mech){
+                INTEGER id
+                INTEGER node_id, ena_id, node_area_id, ion_dinadv_id, ion_ina_id
+                DOUBLE v, g, rhs, v_org, current, dina, mfactor
+                for(id = 0; id<mech->node_count; id = id+1) {
+                    node_id = mech->node_index[id]
+                    ena_id = mech->ion_ena_index[id]
+                    node_area_id = mech->node_area_index[id]
+                    ion_dinadv_id = mech->ion_dinadv_index[id]
+                    ion_ina_id = mech->ion_ina_index[id]
+                    v = mech->voltage[node_id]
+                    mech->ena[id] = mech->ion_ena[ena_id]
+                    v_org = v
+                    v = v+0.001
+                    {
+                        current = 0
+                        mech->ina[id] = mech->gna[id]*(v-mech->ena[id])
+                        mech->g[id] = mech->B[id]-mech->A[id]
+                        mech->i[id] = mech->g[id]*(v-mech->e[id])
+                        current = current+mech->i[id]
+                        current = current+mech->ina[id]
+                        mech->g[id] = current
+                    }
+                    dina = mech->ina[id]
+                    v = v_org
+                    {
+                        current = 0
+                        mech->ina[id] = mech->gna[id]*(v-mech->ena[id])
+                        mech->g[id] = mech->B[id]-mech->A[id]
+                        mech->i[id] = mech->g[id]*(v-mech->e[id])
+                        current = current+mech->i[id]
+                        current = current+mech->ina[id]
+                        rhs = current
+                    }
+                    mech->g[id] = (mech->g[id]-rhs)/0.001
+                    mech->ion_dinadv[ion_dinadv_id] = mech->ion_dinadv[ion_dinadv_id]+(dina-mech->ina[id])/0.001*1.e2/mech->node_area[node_area_id]
+                    mech->ion_ina[ion_ina_id] = mech->ion_ina[ion_ina_id]+mech->ina[id]*(1.e2/mech->node_area[node_area_id])
+                    mfactor = 1.e2/mech->node_area[node_area_id]
+                    mech->g[id] = mech->g[id]*mfactor
+                    rhs = rhs*mfactor
+                    mech->vec_rhs[node_id] = mech->vec_rhs[node_id]-rhs
+                    mech->vec_d[node_id] = mech->vec_d[node_id]+mech->g[id]
+                }
+            })";
+
+        std::string expected_state_function = R"(
+            VOID nrn_state_exp2syn(INSTANCE_STRUCT *mech){
+                INTEGER id
+                INTEGER node_id, ena_id
+                DOUBLE v
+                for(id = 0; id<mech->node_count; id = id+1) {
+                    node_id = mech->node_index[id]
+                    ena_id = mech->ion_ena_index[id]
+                    v = mech->voltage[node_id]
+                    mech->ena[id] = mech->ion_ena[ena_id]
+                    mech->A[id] = mech->A[id]+(1.0-exp(mech->dt*((-1.0)/mech->tau1[id])))*(-(0.0)/((-1.0)/mech->tau1[id])-mech->A[id])
+                    mech->B[id] = mech->B[id]+(1.0-exp(mech->dt*((-1.0)/mech->tau2[id])))*(-(0.0)/((-1.0)/mech->tau2[id])-mech->B[id])
+                }
+            })";
+
+        THEN("codegen functions are constructed correctly for synapse") {
+            codegen::Platform simd_platform(/*use_single_precision=*/false,
+                                            /*instruction_width=*/1);
+            auto result = run_llvm_visitor_helper(nmodl_text,
+                                                  simd_platform,
+                                                  {ast::AstNodeType::CODEGEN_FUNCTION});
+            REQUIRE(result.size() == 2);
+
+            auto cur_function = reindent_text(to_nmodl(result[0]));
+            REQUIRE(cur_function == reindent_text(expected_cur_function));
+
+            auto state_function = reindent_text(to_nmodl(result[1]));
+            REQUIRE(state_function == reindent_text(expected_state_function));
+        }
+    }
+}

From 8797f9bd1c7acb696e485e38a307d895ef4dbc41 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Mon, 9 May 2022 14:00:15 +0200
Subject: [PATCH 087/105] [LLVM][GPU] Added CUDADriver to execute benchmark on
 GPU (#829)

- Added CUDADriver to compile LLVM IR string generated from CodegenLLVMVisitor to PTX string and then execute it using CUDA API
- Ability to select the compilation GPU architecture and then set the proper GPU architecture based on the GPU that is going to be used
- Link `libdevice` math library with GPU LLVM module
- Handles kernel and wrapper functions attributes properly for GPU execution (wrapper function is `kernel` and kernel attribute is `device`)
- Small fixes in InstanceStruct declaration and setup to allocate the pointer variables properly, including the shadow variables
- Adds tests in the CI that run small benchmarks in CPU and GPU on BB5
- Adds replacement of `log` math function for SLEEF and libdevice, `pow` and `fabs` for libdevice
- Adds GPU execution ability in PyJIT
- Small improvement in PyJIT benchmark python script to handle arguments and GPU execution
- Separated benchmark info from benchmark driver
- Added hh and expsyn mod files in benchmarking tests
---
 .gitlab-ci.yml                                |  37 ++--
 CMakeLists.txt                                |   2 +
 INSTALL.md                                    |  25 ++-
 src/codegen/codegen_driver.hpp                |   3 -
 .../llvm/codegen_llvm_helper_visitor.cpp      |  18 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp     |  78 +++++--
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  12 +-
 src/codegen/llvm/llvm_utils.cpp               |  35 ++-
 src/codegen/llvm/llvm_utils.hpp               |   9 +-
 .../llvm/replace_with_lib_functions.cpp       |  11 +-
 src/main.cpp                                  |  51 +++--
 src/pybind/CMakeLists.txt                     |   3 +-
 src/pybind/pynmodl.cpp                        |  36 +++-
 test/benchmark/CMakeLists.txt                 |  30 ++-
 test/benchmark/benchmark.py                   |  26 ++-
 test/benchmark/benchmark_info.hpp             |  29 +++
 test/benchmark/cuda_driver.cpp                | 201 ++++++++++++++++++
 test/benchmark/cuda_driver.hpp                | 187 ++++++++++++++++
 test/benchmark/gpu_parameters.hpp             |  27 +++
 test/benchmark/jit_driver.hpp                 |  20 +-
 test/benchmark/kernels/expsyn.mod             |  42 ++++
 test/benchmark/kernels/hh.mod                 | 125 +++++++++++
 test/benchmark/llvm_benchmark.cpp             |  55 ++++-
 test/benchmark/llvm_benchmark.hpp             |  49 ++++-
 test/integration/mod/test_math.mod            |  16 ++
 test/unit/CMakeLists.txt                      |   1 -
 .../codegen/codegen_llvm_instance_struct.cpp  |  18 +-
 test/unit/codegen/codegen_llvm_ir.cpp         |  12 +-
 test/unit/codegen/codegen_llvm_visitor.cpp    |  12 +-
 29 files changed, 1045 insertions(+), 125 deletions(-)
 create mode 100644 test/benchmark/benchmark_info.hpp
 create mode 100644 test/benchmark/cuda_driver.cpp
 create mode 100644 test/benchmark/cuda_driver.hpp
 create mode 100644 test/benchmark/gpu_parameters.hpp
 create mode 100644 test/benchmark/kernels/expsyn.mod
 create mode 100644 test/benchmark/kernels/hh.mod
 create mode 100644 test/integration/mod/test_math.mod

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 7af190b392..3d75cd928d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -42,7 +42,6 @@ trigger cvf:
   variables:
     SPACK_PACKAGE: nmodl
     SPACK_PACKAGE_SPEC: ~legacy-unit+python+llvm
-    SPACK_EXTRA_MODULES: llvm
     SPACK_INSTALL_EXTRA_FLAGS: -v
 
 spack_setup:
@@ -67,14 +66,6 @@ build:intel:
   variables:
     SPACK_PACKAGE_COMPILER: intel
 
-build:gcc:
-  extends:
-    - .spack_build
-    - .spack_nmodl
-  variables:
-    SPACK_PACKAGE_COMPILER: gcc
-    SPACK_PACKAGE_DEPENDENCIES: ^bison%gcc^flex%gcc^py-jinja2%gcc^py-sympy%gcc^py-pyyaml%gcc
-
 .nmodl_tests:
   variables:
     # https://github.com/BlueBrain/nmodl/issues/737
@@ -86,8 +77,30 @@ test:intel:
     - .nmodl_tests
   needs: ["build:intel"]
 
-test:gcc:
+.benchmark_config:
+  variables:
+    bb5_ntasks: 1
+    bb5_cpus_per_task: 1
+    bb5_memory: 16G
+    bb5_exclusive: full
+    bb5_constraint: gpu_32g # CascadeLake CPU & V100 GPU node
+
+.build_allocation:
+  variables:
+    bb5_ntasks: 2   # so we block 16 cores
+    bb5_cpus_per_task: 8 # ninja -j {this}
+    bb5_memory: 76G # ~16*384/80
+
+build_cuda:gcc:
+  extends: [.spack_build, .build_allocation]
+  variables:
+    SPACK_PACKAGE: nmodl
+    SPACK_PACKAGE_SPEC: ~legacy-unit+python+llvm+llvm_cuda
+    SPACK_INSTALL_EXTRA_FLAGS: -v
+    SPACK_PACKAGE_COMPILER: gcc
+
+test_benchmark:gcc:
   extends:
+    - .benchmark_config
     - .ctest
-    - .nmodl_tests
-  needs: ["build:gcc"]
+  needs: ["build_cuda:gcc"]
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 550034d098..6414a16830 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,6 +24,7 @@ option(NMODL_ENABLE_PYTHON_BINDINGS "Enable pybind11 based python bindings" ON)
 option(NMODL_ENABLE_LEGACY_UNITS "Use original faraday, R, etc. instead of 2019 nist constants" OFF)
 option(NMODL_ENABLE_LLVM "Enable LLVM based code generation" ON)
 option(NMODL_ENABLE_LLVM_GPU "Enable LLVM based GPU code generation" ON)
+option(NMODL_ENABLE_LLVM_CUDA "Enable LLVM CUDA backend to run GPU benchmark" OFF)
 option(NMODL_ENABLE_JIT_EVENT_LISTENERS "Enable JITEventListener for Perf and Vtune" OFF)
 
 if(NMODL_ENABLE_LEGACY_UNITS)
@@ -188,6 +189,7 @@ if(NMODL_ENABLE_LLVM)
   if(NMODL_ENABLE_LLVM_CUDA)
     enable_language(CUDA)
     find_package(CUDAToolkit)
+    include_directories(${CUDAToolkit_INCLUDE_DIRS})
     add_definitions(-DNMODL_LLVM_CUDA_BACKEND)
   endif()
 endif()
diff --git a/INSTALL.md b/INSTALL.md
index 20f869f5e2..36c4e047af 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -21,7 +21,7 @@ To build the project from source, a modern C++ compiler with C++14 support is ne
 
 - flex (>=2.6)
 - bison (>=3.0)
-- CMake (>=3.15)
+- CMake (>=3.17)
 - Python (>=3.7)
 - Python packages : jinja2 (>=2.10), pyyaml (>=3.13), pytest (>=4.0.0), sympy (>=1.3), textwrap
 
@@ -141,6 +141,29 @@ export NMODL_WRAPLIB=/opt/nmodl/lib/libpywrapper.so
 **Note**: In order for all unit tests to function correctly when building without linking against libpython we must
 set `NMODL_PYLIB` before running cmake!
 
+### Using CUDA backend to run benchmarks
+
+`NMODL` supports generating code and compiling it for execution on an `NVIDIA` GPU via its benchmark infrastructure using the `LLVM` backend. To enable the `CUDA` backend to compile and execute the GPU code we need to set the following `CMake` flag during compilation of `NMODL`:
+```
+-DNMODL_ENABLE_LLVM_CUDA=ON
+```
+
+To find the need `CUDA` libraries (`cudart` and `nvrtc`) it's needed to have CUDA Toolkit installed on your system.
+This can be done by installing the CUDA Toolkit from the [CUDA Toolkit website](https://developer.nvidia.com/cuda-downloads) or by installing the `CUDA` spack package and loading the corresponding module.
+
+Then given a supported MOD file you can execute the benchmark on GPU in you supported NVIDIA GPU by running the following command:
+```
+./bin/nmodl <file>.mod llvm --no-debug --ir --opt-level-ir 3 gpu --target-arch "sm_80" --name "nvptx64" --math-library libdevice benchmark --run --libs "${CUDA_ROOT}/nvvm/libdevice/libdevice.10.bc" --opt-level-codegen 3 --instance-size 10000000 --repeat 2 --grid-dim-x 4096 --block-dim-x 256
+```
+The above command executes the benchmark on a GPU with `Compute Architecture` `sm_80` and links the generated code to the `libdevice` optimized math library provided by `NVIDIA`.
+Using the above command you can also select the optimization level of the generated code, the instance size of the generated data, the number of repetitions and the grid and block dimensions for the GPU execution.
+
+**Note**: In order for the CUDA backend to be able to compile and execute the generated code on GPU the CUDA Toolkit version installed needs to have the same version as the `CUDA` installed by the NVIDIA driver in the system that will be used to run the benchmark.
+You can find the CUDA Toolkit version by running the following command:
+```
+nvidia-smi
+```
+and noting the `CUDA Version` stated there. For example if `CUDA Version` reported by `nvidia-smi` is CUDA 11.4 you need to install the `CUDA Toolkit 11.4.*` to be able to compile and execute the GPU code.
 
 ## Testing the Installed Module
 
diff --git a/src/codegen/codegen_driver.hpp b/src/codegen/codegen_driver.hpp
index 78c95421da..14d8ed76ab 100644
--- a/src/codegen/codegen_driver.hpp
+++ b/src/codegen/codegen_driver.hpp
@@ -33,9 +33,6 @@ struct CodeGenConfig {
     /// true if cuda code to be generated
     bool cuda_backend = false;
 
-    /// true if llvm code to be generated
-    bool llvm_backend = false;
-
     /// true if sympy should be used for solving ODEs analytically
     bool sympy_analytic = false;
 
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index cdf66f0e17..fc0ab1c7b8 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -239,13 +239,6 @@ std::shared_ptr<ast::InstanceStruct> CodegenLLVMHelperVisitor::create_instance_s
     add_var_with_type(VOLTAGE_VAR, FLOAT_TYPE, /*is_pointer=*/1);
     add_var_with_type(NODE_INDEX_VAR, INTEGER_TYPE, /*is_pointer=*/1);
 
-    // add dt, t, celsius
-    add_var_with_type(naming::NTHREAD_T_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
-    add_var_with_type(naming::NTHREAD_DT_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
-    add_var_with_type(naming::CELSIUS_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
-    add_var_with_type(naming::SECOND_ORDER_VARIABLE, INTEGER_TYPE, /*is_pointer=*/0);
-    add_var_with_type(naming::MECH_NODECOUNT_VAR, INTEGER_TYPE, /*is_pointer=*/0);
-
     // As we do not have `NrnThread` object as an argument, we store points to rhs
     // and d to in the instance struct as well. Also need their respective shadow variables
     // in case of point process mechanism.
@@ -256,6 +249,17 @@ std::shared_ptr<ast::InstanceStruct> CodegenLLVMHelperVisitor::create_instance_s
     add_var_with_type(naming::NTHREAD_RHS_SHADOW, FLOAT_TYPE, /*is_pointer=*/1);
     add_var_with_type(naming::NTHREAD_D_SHADOW, FLOAT_TYPE, /*is_pointer=*/1);
 
+    // NOTE: All the pointer variables should be declared before the scalar variables otherwise
+    // the allocation of memory for the variables in the InstanceStruct and their offsets will be
+    // wrong
+
+    // add dt, t, celsius
+    add_var_with_type(naming::NTHREAD_T_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
+    add_var_with_type(naming::NTHREAD_DT_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
+    add_var_with_type(naming::CELSIUS_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
+    add_var_with_type(naming::SECOND_ORDER_VARIABLE, INTEGER_TYPE, /*is_pointer=*/0);
+    add_var_with_type(naming::MECH_NODECOUNT_VAR, INTEGER_TYPE, /*is_pointer=*/0);
+
     return std::make_shared<ast::InstanceStruct>(codegen_vars);
 }
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 9e159f7aff..de6c7ad914 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -25,6 +25,10 @@ namespace codegen {
 /*                                  Helper routines                                     */
 /****************************************************************************************/
 
+static std::string get_wrapper_name(const std::string& kernel_name) {
+    return "__" + kernel_name + "_wrapper";
+}
+
 /// A utility to check for supported Statement AST nodes.
 static bool is_supported_statement(const ast::Statement& statement) {
     return statement.is_codegen_atomic_statement() || statement.is_codegen_for_statement() ||
@@ -55,15 +59,36 @@ static bool can_vectorize(const ast::CodegenForStatement& statement, symtab::Sym
     return unsupported.empty() && supported.size() <= 1;
 }
 
-void CodegenLLVMVisitor::annotate_kernel_with_nvvm(llvm::Function* kernel) {
+void CodegenLLVMVisitor::annotate_kernel_with_nvvm(llvm::Function* kernel,
+                                                   const std::string& annotation = "kernel") {
     llvm::Metadata* metadata[] = {llvm::ValueAsMetadata::get(kernel),
-                                  llvm::MDString::get(*context, "kernel"),
+                                  llvm::MDString::get(*context, annotation),
                                   llvm::ValueAsMetadata::get(
                                       llvm::ConstantInt::get(llvm::Type::getInt32Ty(*context), 1))};
     llvm::MDNode* node = llvm::MDNode::get(*context, metadata);
     module->getOrInsertNamedMetadata("nvvm.annotations")->addOperand(node);
 }
 
+void CodegenLLVMVisitor::annotate_wrapper_kernels_with_nvvm() {
+    // First clear all the nvvm annotations from the module
+    auto module_named_metadata = module->getNamedMetadata("nvvm.annotations");
+    module->eraseNamedMetadata(module_named_metadata);
+
+    // Then each kernel should be annotated as "device" function and wrappers should be annotated as
+    // "kernel" functions
+    std::vector<std::string> kernel_names;
+    find_kernel_names(kernel_names);
+
+    for (const auto& kernel_name: kernel_names) {
+        // Get the kernel function.
+        auto kernel = module->getFunction(kernel_name);
+        // Get the kernel wrapper function.
+        auto kernel_wrapper = module->getFunction(get_wrapper_name(kernel_name));
+        annotate_kernel_with_nvvm(kernel, "device");
+        annotate_kernel_with_nvvm(kernel_wrapper, "kernel");
+    }
+}
+
 llvm::Value* CodegenLLVMVisitor::accept_and_get(const std::shared_ptr<ast::Node>& node) {
     node->accept(*this);
     return ir_builder.pop_last_value();
@@ -402,12 +427,17 @@ void CodegenLLVMVisitor::wrap_kernel_functions() {
         auto kernel = module->getFunction(kernel_name);
 
         // Create a wrapper void function that takes a void pointer as a single argument.
-        llvm::Type* i32_type = ir_builder.get_i32_type();
+        llvm::Type* return_type;
+        if (platform.is_gpu()) {
+            return_type = ir_builder.get_void_type();
+        } else {
+            return_type = ir_builder.get_i32_type();
+        }
         llvm::Type* void_ptr_type = ir_builder.get_i8_ptr_type();
         llvm::Function* wrapper_func = llvm::Function::Create(
-            llvm::FunctionType::get(i32_type, {void_ptr_type}, /*isVarArg=*/false),
+            llvm::FunctionType::get(return_type, {void_ptr_type}, /*isVarArg=*/false),
             llvm::Function::ExternalLinkage,
-            "__" + kernel_name + "_wrapper",
+            get_wrapper_name(kernel_name),
             *module);
 
         // Optionally, add debug information for the wrapper function.
@@ -425,9 +455,23 @@ void CodegenLLVMVisitor::wrap_kernel_functions() {
         args.push_back(bitcasted);
         ir_builder.create_function_call(kernel, args, /*use_result=*/false);
 
-        // Create a 0 return value and a return instruction.
-        ir_builder.create_i32_constant(0);
-        ir_builder.create_return(ir_builder.pop_last_value());
+        // create return instructions and annotate wrapper with certain attributes depending on
+        // the backend type
+        if (platform.is_gpu()) {
+            // return void
+            ir_builder.create_return();
+        } else {
+            // Create a 0 return value and a return instruction.
+            ir_builder.create_i32_constant(0);
+            ir_builder.create_return(ir_builder.pop_last_value());
+            ir_builder.set_function(wrapper_func);
+            ir_builder.set_kernel_attributes();
+        }
+        ir_builder.clear_function();
+    }
+    // for GPU we need to first clear all the annotations and then reapply them
+    if (platform.is_gpu()) {
+        annotate_wrapper_kernels_with_nvvm();
     }
 }
 
@@ -823,9 +867,6 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
 
     // Handle GPU optimizations (CUDA platfroms only for now).
     if (platform.is_gpu()) {
-        if (!platform.is_CUDA_gpu())
-            throw std::runtime_error("Error: unsupported GPU architecture!\n");
-
         // We only support CUDA backends anyway, so this works for now.
         utils::initialise_nvptx_passes();
 
@@ -839,15 +880,12 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
         logger->debug("Dumping generated IR...\n" + dump_module());
     }
 
-    // If the output directory is specified, save the IR to .ll file.
-    if (output_dir != ".") {
-        utils::save_ir_to_ll_file(*module, output_dir + "/" + mod_filename);
-    }
-
     // Setup CodegenHelper for C++ wrapper file
     setup(node);
+    // Print C++ wrapper file
     print_wrapper_routines();
-    print_target_file();
+    // Print LLVM IR module to <mod_filename>.ll file
+    utils::save_ir_to_ll_file(*module, output_dir + "/" + mod_filename);
 }
 
 void CodegenLLVMVisitor::print_mechanism_range_var_structure() {
@@ -960,6 +998,12 @@ void CodegenLLVMVisitor::print_instance_variable_setup() {
     // Pass ml->nodeindices pointer to node_index
     printer->add_line("inst->node_index = ml->nodeindices;");
 
+    // Setup rhs, d and their shadow vectors
+    printer->add_line(fmt::format("inst->{} = nt->_actual_rhs;", naming::NTHREAD_RHS));
+    printer->add_line(fmt::format("inst->{} = nt->_actual_d;", naming::NTHREAD_D));
+    printer->add_line(fmt::format("inst->{} = nt->_shadow_rhs;", naming::NTHREAD_RHS_SHADOW));
+    printer->add_line(fmt::format("inst->{} = nt->_shadow_d;", naming::NTHREAD_D_SHADOW));
+
     // Setup global variables
     printer->add_line("inst->{0} = nt->{0};"_format(naming::NTHREAD_T_VARIABLE));
     printer->add_line("inst->{0} = nt->{0};"_format(naming::NTHREAD_DT_VARIABLE));
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index 0862307337..a22f698431 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -139,10 +139,6 @@ class CodegenLLVMVisitor: public CodegenCVisitor {
         return str;
     }
 
-    void print_target_file() const {
-        target_printer->add_multi_line(dump_module());
-    }
-
     /// Fills the container with the names of kernel functions from the MOD file.
     void find_kernel_names(std::vector<std::string>& container);
 
@@ -303,8 +299,12 @@ class CodegenLLVMVisitor: public CodegenCVisitor {
     void print_compute_functions() override;
 
   private:
-    // Annotates kernel function with NVVM metadata.
-    void annotate_kernel_with_nvvm(llvm::Function* kernel);
+    /// Annotates kernel function with NVVM metadata.
+    void annotate_kernel_with_nvvm(llvm::Function* kernel, const std::string& annotation);
+
+    /// Handles NVVM function annotations when we create the wrapper functions. All original kernels
+    /// should be "device" functions and wrappers "kernel" functions
+    void annotate_wrapper_kernels_with_nvvm();
 
     /// Accepts the given AST node and returns the processed value.
     llvm::Value* accept_and_get(const std::shared_ptr<ast::Node>& node);
diff --git a/src/codegen/llvm/llvm_utils.cpp b/src/codegen/llvm/llvm_utils.cpp
index bd4feee32f..f6590fec5b 100644
--- a/src/codegen/llvm/llvm_utils.cpp
+++ b/src/codegen/llvm/llvm_utils.cpp
@@ -75,12 +75,9 @@ void initialise_nvptx_passes() {
     initialise_optimisation_passes();
 }
 
-void optimise_module_for_nvptx(codegen::Platform& platform,
-                               llvm::Module& module,
-                               int opt_level,
-                               std::string& target_asm) {
+std::unique_ptr<llvm::TargetMachine> create_CUDA_target_machine(const codegen::Platform& platform,
+                                                                llvm::Module& module) {
     // CUDA target machine we generating code for.
-    std::unique_ptr<llvm::TargetMachine> tm;
     std::string platform_name = platform.get_name();
 
     // Target and layout information.
@@ -111,9 +108,30 @@ void optimise_module_for_nvptx(codegen::Platform& platform,
     if (!target)
         throw std::runtime_error("Error: " + error_msg + "\n");
 
+    std::unique_ptr<llvm::TargetMachine> tm;
     tm.reset(target->createTargetMachine(triple, subtarget, features, {}, {}));
     if (!tm)
         throw std::runtime_error("Error: creating target machine failed! Aborting.");
+    return tm;
+}
+
+std::string get_module_ptx(llvm::TargetMachine& tm, llvm::Module& module) {
+    std::string target_asm;
+    llvm::raw_string_ostream stream(target_asm);
+    llvm::buffer_ostream pstream(stream);
+    llvm::legacy::PassManager codegen_pm;
+
+    tm.addPassesToEmitFile(codegen_pm, pstream, nullptr, llvm::CGFT_AssemblyFile);
+    codegen_pm.run(module);
+    return target_asm;
+}
+
+void optimise_module_for_nvptx(const codegen::Platform& platform,
+                               llvm::Module& module,
+                               int opt_level,
+                               std::string& target_asm) {
+    // Create target machine for CUDA GPU
+    auto tm = create_CUDA_target_machine(platform, module);
 
     // Create pass managers.
     llvm::legacy::FunctionPassManager func_pm(&module);
@@ -137,12 +155,7 @@ void optimise_module_for_nvptx(codegen::Platform& platform,
 
     // Now, we want to run target-specific (e.g. NVPTX) passes. In LLVM, this
     // is done via `addPassesToEmitFile`.
-    llvm::raw_string_ostream stream(target_asm);
-    llvm::buffer_ostream pstream(stream);
-    llvm::legacy::PassManager codegen_pm;
-
-    tm->addPassesToEmitFile(codegen_pm, pstream, nullptr, llvm::CGFT_AssemblyFile);
-    codegen_pm.run(module);
+    target_asm = get_module_ptx(*tm, module);
 }
 
 void initialise_optimisation_passes() {
diff --git a/src/codegen/llvm/llvm_utils.hpp b/src/codegen/llvm/llvm_utils.hpp
index 3394463317..9763718ab0 100644
--- a/src/codegen/llvm/llvm_utils.hpp
+++ b/src/codegen/llvm/llvm_utils.hpp
@@ -21,11 +21,18 @@ void initialise_optimisation_passes();
 /// Initialises NVPTX-specific optimisation passes.
 void initialise_nvptx_passes();
 
+//// Initializes a CUDA target machine
+std::unique_ptr<llvm::TargetMachine> create_CUDA_target_machine(const codegen::Platform& platform,
+                                                                llvm::Module& module);
+
+/// Generate PTX code given a CUDA target machine and the module
+std::string get_module_ptx(llvm::TargetMachine& tm, llvm::Module& module);
+
 /// Replaces calls to LLVM intrinsics with appropriate library calls.
 void replace_with_lib_functions(codegen::Platform& platform, llvm::Module& module);
 
 /// Optimises the given LLVM IR module for NVPTX targets.
-void optimise_module_for_nvptx(codegen::Platform& platform,
+void optimise_module_for_nvptx(const codegen::Platform& platform,
                                llvm::Module& module,
                                int opt_level,
                                std::string& target_asm);
diff --git a/src/codegen/llvm/replace_with_lib_functions.cpp b/src/codegen/llvm/replace_with_lib_functions.cpp
index 6d98dd3eb0..750e2c2318 100644
--- a/src/codegen/llvm/replace_with_lib_functions.cpp
+++ b/src/codegen/llvm/replace_with_lib_functions.cpp
@@ -72,6 +72,8 @@ void ReplaceMathFunctions::add_vectorizable_functions_from_vec_lib(TargetLibrary
             DISPATCH("llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2))
             DISPATCH("llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4))
             DISPATCH("llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2))
+            DISPATCH("llvm.log.f32", "_ZGVnN4v_logf", FIXED(4))
+            DISPATCH("llvm.log.f64", "_ZGVnN2v_log", FIXED(2))
             // clang-format on
         };
         const VecDesc x86_functions[] = {
@@ -82,6 +84,9 @@ void ReplaceMathFunctions::add_vectorizable_functions_from_vec_lib(TargetLibrary
             DISPATCH("llvm.pow.f64", "_ZGVbN2vv_pow", FIXED(2))
             DISPATCH("llvm.pow.f64", "_ZGVdN4vv_pow", FIXED(4))
             DISPATCH("llvm.pow.f64", "_ZGVeN8vv_pow", FIXED(8))
+            DISPATCH("llvm.log.f64", "_ZGVbN2v_log", FIXED(2))
+            DISPATCH("llvm.log.f64", "_ZGVdN4v_log", FIXED(4))
+            DISPATCH("llvm.log.f64", "_ZGVeN8v_log", FIXED(8))
             // clang-format on
         };
 #undef DISPATCH
@@ -166,7 +171,11 @@ bool ReplaceWithLibdevice::replace_call(CallInst& call_inst) {
     static const std::map<std::string, std::string> libdevice_name = {{"llvm.exp.f32", "__nv_expf"},
                                                                       {"llvm.exp.f64", "__nv_exp"},
                                                                       {"llvm.pow.f32", "__nv_powf"},
-                                                                      {"llvm.pow.f64", "__nv_pow"}};
+                                                                      {"llvm.pow.f64", "__nv_pow"},
+                                                                      {"llvm.log.f32", "__nv_logf"},
+                                                                      {"llvm.log.f64", "__nv_log"},
+                                                                      {"llvm.fabs.f32", "__nv_fabsf"},
+                                                                      {"llvm.fabs.f64", "__nv_fabs"}};
 
     // If replacement is not supported, abort.
     std::string old_name = function->getName().str();
diff --git a/src/main.cpp b/src/main.cpp
index 8731077d14..6095c89be2 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -65,6 +65,12 @@ int main(int argc, const char* argv[]) {
 
     /// the number of repeated experiments for the benchmarking
     int num_experiments = 100;
+
+    /// X dimension of grid in blocks for GPU execution
+    int llvm_cuda_grid_dim_x = 1;
+
+    /// X dimension of block in threads for GPU execution
+    int llvm_cuda_block_dim_x = 1;
 #endif
 
     CodeGenConfig cfg;
@@ -227,9 +233,10 @@ int main(int argc, const char* argv[]) {
     auto gpu_target_name = gpu_opt->add_option("--name",
         cfg.llvm_gpu_name,
         "Name of GPU platform to use")->ignore_case();
-   gpu_opt->add_option("--target-chip",
+    gpu_target_name->check(CLI::IsMember({"nvptx", "nvptx64"}));
+    gpu_opt->add_option("--target-arch",
         cfg.llvm_gpu_target_architecture,
-        "Name of target chip to use")->ignore_case();
+        "Name of target architecture to use")->ignore_case();
     auto gpu_math_library_opt = gpu_opt->add_option("--math-library",
         cfg.llvm_math_library,
         "Math library for GPU code generation ({})"_format(cfg.llvm_math_library));
@@ -257,6 +264,12 @@ int main(int argc, const char* argv[]) {
     benchmark_opt->add_option("--repeat",
                               num_experiments,
                               fmt::format("Number of experiments for benchmarking ({})", num_experiments))->ignore_case();
+    benchmark_opt->add_option("--grid-dim-x",
+                              llvm_cuda_grid_dim_x,
+                              fmt::format("Grid dimension X ({})", llvm_cuda_grid_dim_x))->ignore_case();
+    benchmark_opt->add_option("--block-dim-x",
+                                llvm_cuda_block_dim_x,
+                                fmt::format("Block dimension X ({})", llvm_cuda_block_dim_x))->ignore_case();
 #endif
     // clang-format on
 
@@ -362,11 +375,18 @@ int main(int argc, const char* argv[]) {
                                                                         : cfg.llvm_gpu_name;
                 Platform platform(pid,
                                   name,
-                                  cfg.llvm_cpu_name,
+                                  cfg.llvm_gpu_target_architecture,
                                   cfg.llvm_math_library,
                                   cfg.llvm_float_type,
                                   cfg.llvm_vector_width);
 
+                // GPU code generation doesn't support debug information at the moment so disable it
+                // in case it's enabled
+                if (!cfg.llvm_no_debug && platform.is_gpu()) {
+                    logger->warn("Disabling addition of debug symbols in GPU code.");
+                    cfg.llvm_no_debug = true;
+                }
+
                 logger->info("Running LLVM backend code generator");
                 CodegenLLVMVisitor visitor(modfile,
                                            cfg.output_dir,
@@ -387,23 +407,30 @@ int main(int argc, const char* argv[]) {
                 }
 
                 if (llvm_benchmark) {
-                    // \todo integrate Platform class here
-                    if (cfg.llvm_gpu_name != "default") {
-                        logger->warn(
-                            "GPU benchmarking is not supported, targeting "
-                            "CPU instead");
-                    }
-
                     logger->info("Running LLVM benchmark");
+                    if (platform.is_gpu() && !platform.is_CUDA_gpu()) {
+                        throw std::runtime_error(
+                            "Benchmarking is only supported on CUDA GPUs at the moment");
+                    }
+#ifndef NMODL_LLVM_CUDA_BACKEND
+                    if (platform.is_CUDA_gpu()) {
+                        throw std::runtime_error(
+                            "GPU benchmarking is not supported if NMODL is not built with CUDA "
+                            "backend enabled.");
+                    }
+#endif
+                    const GPUExecutionParameters gpu_execution_parameters{llvm_cuda_grid_dim_x,
+                                                                          llvm_cuda_block_dim_x};
                     benchmark::LLVMBenchmark benchmark(visitor,
                                                        modfile,
                                                        cfg.output_dir,
                                                        cfg.shared_lib_paths,
                                                        num_experiments,
                                                        instance_size,
-                                                       cfg.llvm_cpu_name,
+                                                       platform,
                                                        cfg.llvm_opt_level_ir,
-                                                       cfg.llvm_opt_level_codegen);
+                                                       cfg.llvm_opt_level_codegen,
+                                                       gpu_execution_parameters);
                     benchmark.run();
                 }
             }
diff --git a/src/pybind/CMakeLists.txt b/src/pybind/CMakeLists.txt
index d89c68ba86..eb4d57ea94 100644
--- a/src/pybind/CMakeLists.txt
+++ b/src/pybind/CMakeLists.txt
@@ -65,7 +65,8 @@ if(NMODL_ENABLE_PYTHON_BINDINGS)
 
   # Additional options are needed when LLVM JIT functionality is built
   if(NMODL_ENABLE_LLVM)
-    set_property(TARGET codegen llvm_codegen llvm_benchmark benchmark_data PROPERTY POSITION_INDEPENDENT_CODE ON)
+    set_property(TARGET codegen llvm_codegen llvm_benchmark benchmark_data
+                 PROPERTY POSITION_INDEPENDENT_CODE ON)
     target_link_libraries(_nmodl PRIVATE codegen llvm_codegen llvm_benchmark benchmark_data
                                          ${LLVM_LIBS_TO_LINK})
   endif()
diff --git a/src/pybind/pynmodl.cpp b/src/pybind/pynmodl.cpp
index 9cd350ad5f..a3176cc570 100644
--- a/src/pybind/pynmodl.cpp
+++ b/src/pybind/pynmodl.cpp
@@ -149,8 +149,22 @@ class JitDriver {
                                              : nmodl::codegen::PlatformID::GPU;
         const std::string name = cfg.llvm_gpu_name == "default" ? cfg.llvm_cpu_name
                                                                 : cfg.llvm_gpu_name;
-        platform = nmodl::codegen::Platform(
-            pid, name, cfg.llvm_math_library, cfg.llvm_float_type, cfg.llvm_vector_width);
+        platform = nmodl::codegen::Platform(pid,
+                                            name,
+                                            cfg.llvm_gpu_target_architecture,
+                                            cfg.llvm_math_library,
+                                            cfg.llvm_float_type,
+                                            cfg.llvm_vector_width);
+        if (platform.is_gpu() && !platform.is_CUDA_gpu()) {
+            throw std::runtime_error("Benchmarking is only supported on CUDA GPUs at the moment");
+        }
+#ifndef NMODL_LLVM_CUDA_BACKEND
+        if (platform.is_CUDA_gpu()) {
+            throw std::runtime_error(
+                "GPU benchmarking is not supported if NMODL is not built with CUDA "
+                "backend enabled.");
+        }
+#endif
     }
 
   public:
@@ -169,7 +183,9 @@ class JitDriver {
     benchmark::BenchmarkResults run(std::shared_ptr<nmodl::ast::Program> node,
                                     std::string& modname,
                                     int num_experiments,
-                                    int instance_size) {
+                                    int instance_size,
+                                    int cuda_grid_dim_x,
+                                    int cuda_block_dim_x) {
         // New directory is needed to be created otherwise the directory cannot be created
         // automatically through python
         if (cfg.nmodl_ast || cfg.json_ast || cfg.json_perfstat) {
@@ -178,15 +194,17 @@ class JitDriver {
         cg_driver.prepare_mod(node, modname);
         nmodl::codegen::CodegenLLVMVisitor visitor(modname, cfg.output_dir, platform, 0);
         visitor.visit_program(*node);
+        const GPUExecutionParameters gpu_execution_parameters{cuda_grid_dim_x, cuda_block_dim_x};
         nmodl::benchmark::LLVMBenchmark benchmark(visitor,
                                                   modname,
                                                   cfg.output_dir,
                                                   cfg.shared_lib_paths,
                                                   num_experiments,
                                                   instance_size,
-                                                  cfg.llvm_cpu_name,
+                                                  platform,
                                                   cfg.llvm_opt_level_ir,
-                                                  cfg.llvm_opt_level_codegen);
+                                                  cfg.llvm_opt_level_codegen,
+                                                  gpu_execution_parameters);
         return benchmark.run();
     }
 };
@@ -227,7 +245,7 @@ PYBIND11_MODULE(_nmodl, m_nmodl) {
     cfg.def(py::init([]() {
            auto cfg = std::make_unique<nmodl::codegen::CodeGenConfig>();
            // set to more sensible defaults for python binding
-           cfg->llvm_backend = true;
+           cfg->llvm_ir = true;
            return cfg;
        }))
         .def_readwrite("sympy_analytic", &nmodl::codegen::CodeGenConfig::sympy_analytic)
@@ -264,6 +282,8 @@ PYBIND11_MODULE(_nmodl, m_nmodl) {
         .def_readwrite("llvm_fast_math_flags", &nmodl::codegen::CodeGenConfig::llvm_fast_math_flags)
         .def_readwrite("llvm_cpu_name", &nmodl::codegen::CodeGenConfig::llvm_cpu_name)
         .def_readwrite("llvm_gpu_name", &nmodl::codegen::CodeGenConfig::llvm_gpu_name)
+        .def_readwrite("llvm_gpu_target_architecture",
+                       &nmodl::codegen::CodeGenConfig::llvm_gpu_target_architecture)
         .def_readwrite("llvm_vector_width", &nmodl::codegen::CodeGenConfig::llvm_vector_width)
         .def_readwrite("llvm_opt_level_codegen",
                        &nmodl::codegen::CodeGenConfig::llvm_opt_level_codegen)
@@ -277,7 +297,9 @@ PYBIND11_MODULE(_nmodl, m_nmodl) {
              "node"_a,
              "modname"_a,
              "num_experiments"_a,
-             "instance_size"_a);
+             "instance_size"_a,
+             "cuda_grid_dim_x"_a = 1,
+             "cuda_block_dim_x"_a = 1);
 
     m_nmodl.def("to_nmodl",
                 static_cast<std::string (*)(const nmodl::ast::Ast&,
diff --git a/test/benchmark/CMakeLists.txt b/test/benchmark/CMakeLists.txt
index cc3e26bb35..5529b505d2 100644
--- a/test/benchmark/CMakeLists.txt
+++ b/test/benchmark/CMakeLists.txt
@@ -5,12 +5,20 @@ set(LLVM_BENCHMARK_SOURCE_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.cpp ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.hpp)
 
+if(NMODL_ENABLE_LLVM_CUDA)
+  list(APPEND LLVM_BENCHMARK_SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/cuda_driver.cpp
+       ${CMAKE_CURRENT_SOURCE_DIR}/cuda_driver.hpp)
+endif()
+
 # =============================================================================
 # LLVM benchmark library
 # =============================================================================
 include_directories(${LLVM_INCLUDE_DIRS})
 add_library(llvm_benchmark STATIC ${LLVM_BENCHMARK_SOURCE_FILES})
 add_dependencies(llvm_benchmark lexer util visitor)
+if(NMODL_ENABLE_LLVM_CUDA)
+  target_link_libraries(llvm_benchmark PRIVATE CUDA::cudart CUDA::nvrtc)
+endif()
 
 if(NMODL_ENABLE_JIT_EVENT_LISTENERS)
   target_compile_definitions(llvm_benchmark PUBLIC NMODL_HAVE_JIT_EVENT_LISTENERS)
@@ -22,13 +30,33 @@ endif()
 
 if(NMODL_ENABLE_PYTHON_BINDINGS)
   file(GLOB modfiles "${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/kernels/*.mod")
+  list(APPEND modfiles "${NMODL_PROJECT_SOURCE_DIR}/test/integration/mod/test_math.mod")
   foreach(modfile ${modfiles})
+    # For expsyn.mod set the vector width to 1 since atomic operations are not supported for vector
+    # widths > 1. See https://github.com/BlueBrain/nmodl/issues/857
+    if(${modfile} STREQUAL "${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/kernels/expsyn.mod")
+      set(extra_args "--vec 1")
+    endif()
     get_filename_component(modfile_name "${modfile}" NAME)
     add_test(NAME "PyJIT/${modfile_name}"
              COMMAND ${PYTHON_EXECUTABLE} ${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/benchmark.py
-                     ${modfile})
+                     --file ${modfile} ${extra_args})
     set_tests_properties(
       "PyJIT/${modfile_name}" PROPERTIES ENVIRONMENT
                                          PYTHONPATH=${PROJECT_BINARY_DIR}/lib:$ENV{PYTHONPATH})
+    # Disable running the expsyn.mod on GPU because atomic instructions are not supported yet on GPU
+    # See https://github.com/BlueBrain/nmodl/issues/834
+    if(NMODL_ENABLE_LLVM_CUDA AND NOT ${modfile} STREQUAL "${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/kernels/expsyn.mod")
+      add_test(NAME "PyJIT/${modfile_name}_gpu"
+               COMMAND ${PYTHON_EXECUTABLE} ${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/benchmark.py
+                      --file ${modfile} --gpu ${extra_args})
+      message(STATUS "CUDA_HOME is ${CUDAToolkit_TARGET_DIR}")
+      set_tests_properties(
+        "PyJIT/${modfile_name}_gpu"
+        PROPERTIES
+          ENVIRONMENT
+          "PYTHONPATH=${PROJECT_BINARY_DIR}/lib:$ENV{PYTHONPATH};CUDA_HOME=${CUDAToolkit_TARGET_DIR}"
+      )
+    endif()
   endforeach()
 endif()
diff --git a/test/benchmark/benchmark.py b/test/benchmark/benchmark.py
index c133f8d59c..9144fa549d 100644
--- a/test/benchmark/benchmark.py
+++ b/test/benchmark/benchmark.py
@@ -1,17 +1,39 @@
+import argparse
 import sys
+import os
 
 import nmodl.dsl as nmodl
 from nmodl import ast, visitor
 
+def parse_arguments():
+    parser = argparse.ArgumentParser(description='Benchmark test script for NMODL.')
+    parser.add_argument('--gpu', action='store_true', default=False,
+                        help='Enable GPU JIT execution')
+    parser.add_argument('--vec', type=int, default=1,
+                        help='Vector width for CPU execution')
+    parser.add_argument('--file', type=str,
+                        help='NMODL file to benchmark')
+    args, _ = parser.parse_known_args()
+    return args
+
 def main():
+    args = parse_arguments()
+
     driver = nmodl.NmodlDriver()
     lookup_visitor = visitor.AstLookupVisitor()
 
     cfg = nmodl.CodeGenConfig()
-    cfg.llvm_vector_width = 4
+    cfg.llvm_vector_width = args.vec
     cfg.llvm_opt_level_ir = 2
     cfg.nmodl_ast = True
-    fname = sys.argv[1]
+    fname = args.file
+    if args.gpu:  # GPU enabled
+        cfg.llvm_math_library = "libdevice"
+        cfg.llvm_gpu_name = "nvptx64"
+        cfg.llvm_gpu_target_architecture = "sm_70"
+        if not os.environ.get("CUDA_HOME"):
+            raise RuntimeError("CUDA_HOME environment variable not set")
+        cfg.shared_lib_paths = [os.getenv("CUDA_HOME") + "/nvvm/libdevice/libdevice.10.bc"]
     with open(fname) as f:
         hh = f.read()
         modast = driver.parse_string(hh)
diff --git a/test/benchmark/benchmark_info.hpp b/test/benchmark/benchmark_info.hpp
new file mode 100644
index 0000000000..d02d33ce2e
--- /dev/null
+++ b/test/benchmark/benchmark_info.hpp
@@ -0,0 +1,29 @@
+/*************************************************************************
+ * Copyright (C) 2018-2022 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+/// A struct to hold the information for benchmarking.
+struct BenchmarkInfo {
+    /// Object or PTX filename to dump.
+    std::string filename;
+
+    /// Object file output directory.
+    std::string output_dir;
+
+    /// Shared libraries' paths to link against.
+    std::vector<std::string> shared_lib_paths;
+
+    /// Optimisation level for IT.
+    int opt_level_ir;
+
+    /// Optimisation level for machine code generation.
+    int opt_level_codegen;
+};
diff --git a/test/benchmark/cuda_driver.cpp b/test/benchmark/cuda_driver.cpp
new file mode 100644
index 0000000000..cecc97b35d
--- /dev/null
+++ b/test/benchmark/cuda_driver.cpp
@@ -0,0 +1,201 @@
+/*************************************************************************
+ * Copyright (C) 2018-2022 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include <fstream>
+#include <regex>
+
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "codegen/llvm/llvm_utils.hpp"
+#include "cuda_driver.hpp"
+#include "fmt/format.h"
+#include "utils/common_utils.hpp"
+
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Target/TargetMachine.h"
+
+using fmt::literals::operator""_format;
+
+namespace nmodl {
+namespace runner {
+
+void CUDADriver::checkCudaErrors(CUresult err) {
+    if (err != CUDA_SUCCESS) {
+        const char* ret = NULL;
+        cuGetErrorName(err, &ret);
+        throw std::runtime_error("CUDA error: " + std::string(ret));
+    }
+}
+
+void CUDADriver::link_libraries(llvm::Module& module, BenchmarkInfo* benchmark_info) {
+    llvm::Linker linker(module);
+    for (const auto& lib_path: benchmark_info->shared_lib_paths) {
+        const auto lib_name = lib_path.substr(lib_path.find_last_of("/\\") + 1);
+        std::regex libdevice_bitcode_name{"libdevice.*.bc"};
+        if (!std::regex_match(lib_name, libdevice_bitcode_name)) {
+            throw std::runtime_error("Only libdevice is supported for now");
+        }
+        // Load libdevice module to the LLVM Module
+        auto libdevice_file_memory_buffer = llvm::MemoryBuffer::getFile(lib_path);
+        llvm::Expected<std::unique_ptr<llvm::Module>> libdevice_expected_module =
+            parseBitcodeFile(libdevice_file_memory_buffer->get()->getMemBufferRef(),
+                             module.getContext());
+        if (std::error_code error = errorToErrorCode(libdevice_expected_module.takeError())) {
+            throw std::runtime_error("Error reading bitcode: {}"_format(error.message()));
+        }
+        linker.linkInModule(std::move(libdevice_expected_module.get()),
+                            llvm::Linker::LinkOnlyNeeded);
+    }
+}
+
+void print_string_to_file(const std::string& ptx_compiled_module, const std::string& filename) {
+    std::ofstream ptx_file(filename);
+    ptx_file << ptx_compiled_module;
+    ptx_file.close();
+}
+
+// Converts the CUDA compute version to the CUjit_target enum used by the CUJIT
+CUjit_target get_CUjit_target(const int compute_version_major, const int compute_version_minor) {
+    auto compute_architecture = compute_version_major * 10 + compute_version_minor;
+    switch (compute_architecture) {
+    case 20:
+        return CU_TARGET_COMPUTE_20;
+    case 21:
+        return CU_TARGET_COMPUTE_21;
+    case 30:
+        return CU_TARGET_COMPUTE_30;
+    case 32:
+        return CU_TARGET_COMPUTE_32;
+    case 35:
+        return CU_TARGET_COMPUTE_35;
+    case 37:
+        return CU_TARGET_COMPUTE_37;
+    case 50:
+        return CU_TARGET_COMPUTE_50;
+    case 52:
+        return CU_TARGET_COMPUTE_52;
+    case 53:
+        return CU_TARGET_COMPUTE_53;
+    case 60:
+        return CU_TARGET_COMPUTE_60;
+    case 61:
+        return CU_TARGET_COMPUTE_61;
+    case 62:
+        return CU_TARGET_COMPUTE_62;
+    case 70:
+        return CU_TARGET_COMPUTE_70;
+    case 72:
+        return CU_TARGET_COMPUTE_72;
+    case 75:
+        return CU_TARGET_COMPUTE_75;
+    case 80:
+        return CU_TARGET_COMPUTE_80;
+    case 86:
+        return CU_TARGET_COMPUTE_86;
+    default:
+        throw std::runtime_error("Unsupported compute architecture");
+    }
+}
+
+void CUDADriver::init(const codegen::Platform& platform, BenchmarkInfo* benchmark_info) {
+    // CUDA initialization
+    checkCudaErrors(cuInit(0));
+    checkCudaErrors(cuDeviceGetCount(&device_info.count));
+    checkCudaErrors(cuDeviceGet(&device, 0));
+
+    char name[128];
+    checkCudaErrors(cuDeviceGetName(name, 128, device));
+    device_info.name = name;
+    logger->info("Using CUDA Device [0]: {}"_format(device_info.name));
+
+    // Get the compute capability of the device that is actually going to be used to run the kernel
+    checkCudaErrors(cuDeviceGetAttribute(&device_info.compute_version_major,
+                                         CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+                                         device));
+    checkCudaErrors(cuDeviceGetAttribute(&device_info.compute_version_minor,
+                                         CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+                                         device));
+    logger->info("Device Compute Capability: {}.{}"_format(device_info.compute_version_major,
+                                                           device_info.compute_version_minor));
+    if (device_info.compute_version_major < 2) {
+        throw std::runtime_error("ERROR: Device 0 is not SM 2.0 or greater");
+    }
+
+    // Load the external libraries modules to the NVVM program
+    // Currently only libdevice is supported
+    link_libraries(*module, benchmark_info);
+
+    // Compile the program
+    logger->info("Compiling the LLVM IR to PTX");
+
+    // Optimize code for nvptx including the wrapper functions and generate PTX
+    const auto opt_level_codegen = benchmark_info ? benchmark_info->opt_level_codegen : 0;
+    utils::optimise_module_for_nvptx(platform, *module, opt_level_codegen, ptx_compiled_module);
+    utils::save_ir_to_ll_file(*module,
+                              benchmark_info->output_dir + "/" + benchmark_info->filename +
+                                  "_benchmark");
+    if (benchmark_info) {
+        print_string_to_file(ptx_compiled_module,
+                             benchmark_info->output_dir + "/" + benchmark_info->filename + ".ptx");
+    }
+
+    // Create driver context
+    checkCudaErrors(cuCtxCreate(&context, 0, device));
+
+    // Create module for object
+    logger->info("Loading PTX to CUDA module");
+    const unsigned int jitNumOptions = 5;
+    CUjit_option* jitOptions = new CUjit_option[jitNumOptions];
+    void** jitOptVals = new void*[jitNumOptions];
+
+    // set up size of compilation log buffer
+    jitOptions[0] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
+    size_t jitLogBufferSize = 1024 * 1024;
+    jitOptVals[0] = (void*) jitLogBufferSize;
+
+    // set up pointer to the compilation log buffer
+    jitOptions[1] = CU_JIT_INFO_LOG_BUFFER;
+    char* jitLogBuffer = new char[jitLogBufferSize];
+    jitOptVals[1] = jitLogBuffer;
+
+    // set up size of compilation error log buffer
+    jitOptions[2] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
+    size_t jitErrorLogBufferSize = 1024 * 1024;
+    jitOptVals[2] = (void*) jitErrorLogBufferSize;
+
+    // set up pointer to the compilation error log buffer
+    jitOptions[3] = CU_JIT_ERROR_LOG_BUFFER;
+    char* jitErrorLogBuffer = new char[jitErrorLogBufferSize];
+    jitOptVals[3] = jitErrorLogBuffer;
+
+    // set the exact CUDA compute target architecture based on the GPU it's going to be actually
+    // used
+    jitOptions[4] = CU_JIT_TARGET;
+    auto target_architecture = get_CUjit_target(device_info.compute_version_major,
+                                                device_info.compute_version_minor);
+    jitOptVals[4] = (void*) target_architecture;
+
+    // load the LLVM module to the CUDA module (CUDA JIT compilation)
+    auto cuda_jit_ret = cuModuleLoadDataEx(
+        &cudaModule, ptx_compiled_module.c_str(), jitNumOptions, jitOptions, jitOptVals);
+    if (!std::string(jitLogBuffer).empty()) {
+        logger->info("CUDA JIT INFO LOG: {}"_format(std::string(jitLogBuffer)));
+    }
+    if (!std::string(jitErrorLogBuffer).empty()) {
+        logger->info("CUDA JIT ERROR LOG: {}"_format(std::string(jitErrorLogBuffer)));
+    }
+    delete[] jitOptions;
+    delete[] jitOptVals;
+    delete[] jitLogBuffer;
+    delete[] jitErrorLogBuffer;
+    checkCudaErrors(cuda_jit_ret);
+}
+
+}  // namespace runner
+}  // namespace nmodl
diff --git a/test/benchmark/cuda_driver.hpp b/test/benchmark/cuda_driver.hpp
new file mode 100644
index 0000000000..3fd02fd55e
--- /dev/null
+++ b/test/benchmark/cuda_driver.hpp
@@ -0,0 +1,187 @@
+/*************************************************************************
+ * Copyright (C) 2018-2022 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+/**
+ * \dir
+ * \brief Implementation of CUDA and NVVM-based execution engine to run functions from MOD files
+ *
+ * \file
+ * \brief \copybrief nmodl::runner::CUDADriver
+ */
+
+#include <memory>
+#include <string>
+
+#include "llvm/IR/Module.h"
+
+#include "benchmark_info.hpp"
+#include "cuda.h"
+#include "cuda_runtime.h"
+#include "gpu_parameters.hpp"
+
+using nmodl::cuda_details::GPUExecutionParameters;
+
+namespace nmodl {
+namespace runner {
+
+struct DeviceInfo {
+    int count;
+    std::string name;
+    int compute_version_major;
+    int compute_version_minor;
+};
+
+/**
+ * @brief Throw meaningful error in case CUDA API call fails
+ *
+ * Checks whether a call to the CUDA API was succsful and if not it throws a runntime_error with
+ * the error message from CUDA.
+ *
+ * @param err Return value of the CUDA API call
+ */
+void checkCudaErrors(CUresult err);
+
+/**
+ * \class CUDADriver
+ * \brief Driver to execute a MOD file function via the CUDA JIT backend.
+ */
+class CUDADriver {
+    /// LLVM IR module to execute.
+    std::unique_ptr<llvm::Module> module;
+    CUdevice device;
+    CUmodule cudaModule;
+    CUcontext context;
+    CUfunction function;
+    CUlinkState linker;
+    DeviceInfo device_info;
+    std::string ptx_compiled_module;
+
+    void checkCudaErrors(CUresult err);
+    void link_libraries(llvm::Module& module, BenchmarkInfo* benchmark_info);
+
+  public:
+    explicit CUDADriver(std::unique_ptr<llvm::Module> m)
+        : module(std::move(m)) {}
+
+    /// Initializes the CUDA GPU JIT driver.
+    void init(const codegen::Platform& platform, BenchmarkInfo* benchmark_info = nullptr);
+
+    void launch_cuda_kernel(const std::string& entry_point,
+                            const GPUExecutionParameters& gpu_execution_parameters,
+                            void* kernel_parameters) {
+        // Get kernel function
+        checkCudaErrors(cuModuleGetFunction(&function, cudaModule, entry_point.c_str()));
+
+        // Kernel launch
+        checkCudaErrors(cuLaunchKernel(function,
+                                       gpu_execution_parameters.gridDimX,
+                                       1,
+                                       1,
+                                       gpu_execution_parameters.blockDimX,
+                                       1,
+                                       1,
+                                       0,
+                                       nullptr,
+                                       &kernel_parameters,
+                                       nullptr));
+        auto asyncErr = cudaDeviceSynchronize();
+        if (asyncErr != cudaSuccess) {
+            throw std::runtime_error(
+                fmt::format("CUDA Execution Error: {}\n", cudaGetErrorString(asyncErr)));
+        }
+    }
+
+    /// Lookups the entry-point without arguments in the CUDA module and executes it.
+    void execute_without_arguments(const std::string& entry_point,
+                                   const GPUExecutionParameters& gpu_execution_parameters) {
+        launch_cuda_kernel(entry_point, gpu_execution_parameters, {});
+    }
+
+    /// Lookups the entry-point with arguments in the CUDA module and executes it.
+    template <typename ArgType>
+    void execute_with_arguments(const std::string& entry_point,
+                                ArgType arg,
+                                const GPUExecutionParameters& gpu_execution_parameters) {
+        launch_cuda_kernel(entry_point, gpu_execution_parameters, {&arg});
+    }
+};
+
+/**
+ * \class BaseGPURunner
+ * \brief A base runner class that provides functionality to execute an
+ * entry point in the CUDA module.
+ */
+class BaseGPURunner {
+  protected:
+    std::unique_ptr<CUDADriver> driver;
+
+    explicit BaseGPURunner(std::unique_ptr<llvm::Module> m)
+        : driver(std::make_unique<CUDADriver>(std::move(m))) {}
+
+  public:
+    /// Sets up the CUDA driver.
+    virtual void initialize_driver(const codegen::Platform& platform) = 0;
+
+    /// Runs the entry-point function without arguments.
+    void run_without_arguments(const std::string& entry_point,
+                               const GPUExecutionParameters& gpu_execution_parameters) {
+        return driver->execute_without_arguments(entry_point, gpu_execution_parameters);
+    }
+
+    /// Runs the entry-point function with a pointer to the data as an argument.
+    template <typename ArgType>
+    void run_with_argument(const std::string& entry_point,
+                           ArgType arg,
+                           const GPUExecutionParameters& gpu_execution_parameters) {
+        return driver->template execute_with_arguments(entry_point, arg, gpu_execution_parameters);
+    }
+};
+
+/**
+ * \class TestGPURunner
+ * \brief A simple runner for testing purposes.
+ */
+class TestGPURunner: public BaseGPURunner {
+  public:
+    explicit TestGPURunner(std::unique_ptr<llvm::Module> m)
+        : BaseGPURunner(std::move(m)) {}
+
+    virtual void initialize_driver(const codegen::Platform& platform) {
+        driver->init(platform);
+    }
+};
+
+/**
+ * \class BenchmarkGPURunner
+ * \brief A runner with benchmarking functionality. It takes user-specified GPU
+ * features into account, as well as it can link against shared libraries.
+ */
+class BenchmarkGPURunner: public BaseGPURunner {
+  private:
+    /// Benchmarking information passed to JIT driver.
+    BenchmarkInfo benchmark_info;
+
+  public:
+    BenchmarkGPURunner(std::unique_ptr<llvm::Module> m,
+                       std::string filename,
+                       std::string output_dir,
+                       std::vector<std::string> lib_paths = {},
+                       int opt_level_ir = 0,
+                       int opt_level_codegen = 0)
+        : BaseGPURunner(std::move(m))
+        , benchmark_info{filename, output_dir, lib_paths, opt_level_ir, opt_level_codegen} {}
+
+    virtual void initialize_driver(const codegen::Platform& platform) {
+        driver->init(platform, &benchmark_info);
+    }
+};
+
+
+}  // namespace runner
+}  // namespace nmodl
diff --git a/test/benchmark/gpu_parameters.hpp b/test/benchmark/gpu_parameters.hpp
new file mode 100644
index 0000000000..5e72edb147
--- /dev/null
+++ b/test/benchmark/gpu_parameters.hpp
@@ -0,0 +1,27 @@
+/*************************************************************************
+ * Copyright (C) 2018-2022 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+/**
+ * \dir
+ * \brief GPU execution parameters struct
+ *
+ * \file
+ * \brief \copybrief nmodl::cuda_details::GPUExecutionParameters
+ */
+
+namespace nmodl {
+namespace cuda_details {
+
+struct GPUExecutionParameters {
+    int gridDimX;
+    int blockDimX;
+};
+
+}  // namespace cuda_details
+}  // namespace nmodl
diff --git a/test/benchmark/jit_driver.hpp b/test/benchmark/jit_driver.hpp
index ed86684f76..3569c4bd4f 100644
--- a/test/benchmark/jit_driver.hpp
+++ b/test/benchmark/jit_driver.hpp
@@ -15,6 +15,8 @@
  * \brief \copybrief nmodl::runner::JITDriver
  */
 
+#include "benchmark_info.hpp"
+
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/Orc/LLJIT.h"
 #include "llvm/Support/Host.h"
@@ -22,24 +24,6 @@
 namespace nmodl {
 namespace runner {
 
-/// A struct to hold the information for benchmarking.
-struct BenchmarkInfo {
-    /// Object filename to dump.
-    std::string filename;
-
-    /// Object file output directory.
-    std::string output_dir;
-
-    /// Shared libraries' paths to link against.
-    std::vector<std::string> shared_lib_paths;
-
-    /// Optimisation level for IT.
-    int opt_level_ir;
-
-    /// Optimisation level for machine code generation.
-    int opt_level_codegen;
-};
-
 /**
  * \class JITDriver
  * \brief Driver to execute a MOD file function via LLVM IR backend.
diff --git a/test/benchmark/kernels/expsyn.mod b/test/benchmark/kernels/expsyn.mod
new file mode 100644
index 0000000000..56ddde3b19
--- /dev/null
+++ b/test/benchmark/kernels/expsyn.mod
@@ -0,0 +1,42 @@
+NEURON {
+	POINT_PROCESS ExpSyn
+	RANGE tau, e, i
+	NONSPECIFIC_CURRENT i
+}
+
+UNITS {
+	(nA) = (nanoamp)
+	(mV) = (millivolt)
+	(uS) = (microsiemens)
+}
+
+PARAMETER {
+	tau = 0.1 (ms) <1e-9,1e9>
+	e = 0	(mV)
+}
+
+ASSIGNED {
+	v (mV)
+	i (nA)
+}
+
+STATE {
+	g (uS)
+}
+
+INITIAL {
+	g=0
+}
+
+BREAKPOINT {
+	SOLVE state METHOD cnexp
+	i = g*(v - e)
+}
+
+DERIVATIVE state {
+	g' = -g/tau
+}
+
+NET_RECEIVE(weight (uS)) {
+	g = g + weight
+}
diff --git a/test/benchmark/kernels/hh.mod b/test/benchmark/kernels/hh.mod
new file mode 100644
index 0000000000..053a15f43f
--- /dev/null
+++ b/test/benchmark/kernels/hh.mod
@@ -0,0 +1,125 @@
+TITLE hh.mod   squid sodium, potassium, and leak channels
+ 
+COMMENT
+ This is the original Hodgkin-Huxley treatment for the set of sodium, 
+  potassium, and leakage channels found in the squid giant axon membrane.
+  ("A quantitative description of membrane current and its application 
+  conduction and excitation in nerve" J.Physiol. (Lond.) 117:500-544 (1952).)
+ Membrane voltage is in absolute mV and has been reversed in polarity
+  from the original HH convention and shifted to reflect a resting potential
+  of -65 mV.
+ Remember to set celsius=6.3 (or whatever) in your HOC file.
+ See squid.hoc for an example of a simulation using this model.
+ SW Jaslove  6 March, 1992
+ENDCOMMENT
+ 
+UNITS {
+        (mA) = (milliamp)
+        (mV) = (millivolt)
+	(S) = (siemens)
+}
+ 
+? interface
+NEURON {
+        SUFFIX hh
+        USEION na READ ena WRITE ina
+        USEION k READ ek WRITE ik
+        NONSPECIFIC_CURRENT il
+        RANGE gnabar, gkbar, gl, el, gna, gk
+        :GLOBAL minf, hinf, ninf, mtau, htau, ntau
+        RANGE minf, hinf, ninf, mtau, htau, ntau
+	THREADSAFE : assigned GLOBALs will be per thread
+}
+ 
+PARAMETER {
+        gnabar = .12 (S/cm2)	<0,1e9>
+        gkbar = .036 (S/cm2)	<0,1e9>
+        gl = .0003 (S/cm2)	<0,1e9>
+        el = -54.3 (mV)
+}
+ 
+STATE {
+        m h n
+}
+ 
+ASSIGNED {
+        v (mV)
+        celsius (degC)
+        ena (mV)
+        ek (mV)
+
+	gna (S/cm2)
+	gk (S/cm2)
+        ina (mA/cm2)
+        ik (mA/cm2)
+        il (mA/cm2)
+        minf hinf ninf
+	mtau (ms) htau (ms) ntau (ms)
+}
+ 
+? currents
+BREAKPOINT {
+        SOLVE states METHOD cnexp
+        gna = gnabar*m*m*m*h
+	ina = gna*(v - ena)
+        gk = gkbar*n*n*n*n
+	ik = gk*(v - ek)      
+        il = gl*(v - el)
+}
+ 
+ 
+INITIAL {
+	rates(v)
+	m = minf
+	h = hinf
+	n = ninf
+}
+
+? states
+DERIVATIVE states {  
+        rates(v)
+        m' =  (minf-m)/mtau
+        h' = (hinf-h)/htau
+        n' = (ninf-n)/ntau
+}
+ 
+:LOCAL q10
+
+
+? rates
+PROCEDURE rates(v(mV)) {  :Computes rate and other constants at current v.
+                      :Call once from HOC to initialize inf at resting v.
+        LOCAL  alpha, beta, sum, q10
+:        TABLE minf, mtau, hinf, htau, ninf, ntau DEPEND celsius FROM -100 TO 100 WITH 200
+
+UNITSOFF
+        q10 = 3^((celsius - 6.3)/10)
+                :"m" sodium activation system
+        alpha = .1 * vtrap(-(v+40),10)
+        beta =  4 * exp(-(v+65)/18)
+        sum = alpha + beta
+	mtau = 1/(q10*sum)
+        minf = alpha/sum
+                :"h" sodium inactivation system
+        alpha = .07 * exp(-(v+65)/20)
+        beta = 1 / (exp(-(v+35)/10) + 1)
+        sum = alpha + beta
+	htau = 1/(q10*sum)
+        hinf = alpha/sum
+                :"n" potassium activation system
+        alpha = .01*vtrap(-(v+55),10) 
+        beta = .125*exp(-(v+65)/80)
+	sum = alpha + beta
+        ntau = 1/(q10*sum)
+        ninf = alpha/sum
+}
+ 
+FUNCTION vtrap(x,y) {  :Traps for 0 in denominator of rate eqns.
+        if (fabs(x/y) < 1e-6) {
+                vtrap = y*(1 - x/y/2)
+        }else{
+                vtrap = x/(exp(x/y) - 1)
+        }
+}
+ 
+UNITSON
diff --git a/test/benchmark/llvm_benchmark.cpp b/test/benchmark/llvm_benchmark.cpp
index 87d7e34512..010bc2edf3 100644
--- a/test/benchmark/llvm_benchmark.cpp
+++ b/test/benchmark/llvm_benchmark.cpp
@@ -16,6 +16,9 @@
 
 #include "test/unit/codegen/codegen_data_helper.hpp"
 
+#ifdef NMODL_LLVM_CUDA_BACKEND
+#include "test/benchmark/cuda_driver.hpp"
+#endif
 
 namespace nmodl {
 namespace benchmark {
@@ -45,17 +48,43 @@ BenchmarkResults LLVMBenchmark::run_benchmark() {
     llvm_visitor.find_kernel_names(kernel_names);
 
     // Get feature's string and turn them off depending on the cpu.
-    std::string cpu_name = cpu == "default" ? llvm::sys::getHostCPUName().str() : cpu;
-    logger->info("CPU: {}", cpu_name);
+    std::string backend_name;
+#ifdef NMODL_LLVM_CUDA_BACKEND
+    if (platform.is_CUDA_gpu()) {
+        backend_name = platform.get_name();
+    } else {
+#endif
+        backend_name = platform.get_name() == "default" ? llvm::sys::getHostCPUName().str()
+                                                        : platform.get_name();
+#ifdef NMODL_LLVM_CUDA_BACKEND
+    }
+#endif
+    logger->info("Backend: {}", backend_name);
 
     std::unique_ptr<llvm::Module> m = llvm_visitor.get_module();
 
     // Create the benchmark runner and initialize it.
-    std::string filename = "v" + std::to_string(llvm_visitor.get_vector_width()) + "_" +
-                           mod_filename;
-    runner::BenchmarkRunner runner(
-        std::move(m), filename, output_dir, cpu_name, shared_libs, opt_level_ir, opt_level_codegen);
-    runner.initialize_driver();
+#ifdef NMODL_LLVM_CUDA_BACKEND
+    if (platform.is_CUDA_gpu()) {
+        std::string filename = "cuda_" + mod_filename;
+        cuda_runner = std::make_unique<runner::BenchmarkGPURunner>(
+            std::move(m), filename, output_dir, shared_libs, opt_level_ir, opt_level_codegen);
+        cuda_runner->initialize_driver(platform);
+    } else {
+#endif
+        std::string filename = "v" + std::to_string(llvm_visitor.get_vector_width()) + "_" +
+                               mod_filename;
+        cpu_runner = std::make_unique<runner::BenchmarkRunner>(std::move(m),
+                                                               filename,
+                                                               output_dir,
+                                                               backend_name,
+                                                               shared_libs,
+                                                               opt_level_ir,
+                                                               opt_level_codegen);
+        cpu_runner->initialize_driver();
+#ifdef NMODL_LLVM_CUDA_BACKEND
+    }
+#endif
 
     BenchmarkResults results{};
     // Benchmark every kernel.
@@ -75,7 +104,17 @@ BenchmarkResults LLVMBenchmark::run_benchmark() {
             // Record the execution time of the kernel.
             std::string wrapper_name = "__" + kernel_name + "_wrapper";
             auto start = std::chrono::steady_clock::now();
-            runner.run_with_argument<int, void*>(kernel_name, instance_data.base_ptr);
+#ifdef NMODL_LLVM_CUDA_BACKEND
+            if (platform.is_CUDA_gpu()) {
+                cuda_runner->run_with_argument<void*>(wrapper_name,
+                                                      instance_data.base_ptr,
+                                                      gpu_execution_parameters);
+            } else {
+#endif
+                cpu_runner->run_with_argument<int, void*>(wrapper_name, instance_data.base_ptr);
+#ifdef NMODL_LLVM_CUDA_BACKEND
+            }
+#endif
             auto end = std::chrono::steady_clock::now();
             std::chrono::duration<double> diff = end - start;
 
diff --git a/test/benchmark/llvm_benchmark.hpp b/test/benchmark/llvm_benchmark.hpp
index f79cad62e5..f03e9ea52d 100644
--- a/test/benchmark/llvm_benchmark.hpp
+++ b/test/benchmark/llvm_benchmark.hpp
@@ -13,6 +13,16 @@
 #include <tuple>
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "gpu_parameters.hpp"
+#include "test/benchmark/jit_driver.hpp"
+#include "utils/logger.hpp"
+
+#ifdef NMODL_LLVM_CUDA_BACKEND
+#include "test/benchmark/cuda_driver.hpp"
+#endif
+
+using nmodl::codegen::Platform;
+using nmodl::cuda_details::GPUExecutionParameters;
 
 namespace nmodl {
 namespace benchmark {
@@ -47,8 +57,11 @@ class LLVMBenchmark {
     /// The size of the instance struct for benchmarking.
     int instance_size;
 
-    /// CPU to target.
-    std::string cpu;
+    /// Target platform for the code generation.
+    Platform platform;
+
+    /// The GPU execution parameters needed to configure the kernels' execution.
+    GPUExecutionParameters gpu_execution_parameters;
 
     /// Optimisation level for IR generation.
     int opt_level_ir;
@@ -59,6 +72,14 @@ class LLVMBenchmark {
     /// Filestream for dumping logs to the file.
     std::ofstream ofs;
 
+    /// CPU benchmark runner
+    std::unique_ptr<runner::BenchmarkRunner> cpu_runner;
+
+#ifdef NMODL_LLVM_CUDA_BACKEND
+    /// CUDA benchmark runner
+    std::unique_ptr<runner::BenchmarkGPURunner> cuda_runner;
+#endif
+
   public:
     LLVMBenchmark(codegen::CodegenLLVMVisitor& llvm_visitor,
                   const std::string& mod_filename,
@@ -66,7 +87,7 @@ class LLVMBenchmark {
                   std::vector<std::string> shared_libs,
                   int num_experiments,
                   int instance_size,
-                  const std::string& cpu,
+                  const Platform& platform,
                   int opt_level_ir,
                   int opt_level_codegen)
         : llvm_visitor(llvm_visitor)
@@ -75,9 +96,29 @@ class LLVMBenchmark {
         , shared_libs(shared_libs)
         , num_experiments(num_experiments)
         , instance_size(instance_size)
-        , cpu(cpu)
+        , platform(platform)
         , opt_level_ir(opt_level_ir)
         , opt_level_codegen(opt_level_codegen) {}
+    LLVMBenchmark(codegen::CodegenLLVMVisitor& llvm_visitor,
+                  const std::string& mod_filename,
+                  const std::string& output_dir,
+                  std::vector<std::string> shared_libs,
+                  int num_experiments,
+                  int instance_size,
+                  const Platform& platform,
+                  int opt_level_ir,
+                  int opt_level_codegen,
+                  const GPUExecutionParameters& gpu_exec_params)
+        : llvm_visitor(llvm_visitor)
+        , mod_filename(mod_filename)
+        , output_dir(output_dir)
+        , shared_libs(shared_libs)
+        , num_experiments(num_experiments)
+        , instance_size(instance_size)
+        , platform(platform)
+        , opt_level_ir(opt_level_ir)
+        , opt_level_codegen(opt_level_codegen)
+        , gpu_execution_parameters(gpu_exec_params) {}
 
     /// Runs the benchmark.
     BenchmarkResults run();
diff --git a/test/integration/mod/test_math.mod b/test/integration/mod/test_math.mod
new file mode 100644
index 0000000000..6e3174a846
--- /dev/null
+++ b/test/integration/mod/test_math.mod
@@ -0,0 +1,16 @@
+NEURON {
+    SUFFIX test
+    RANGE x, y
+}
+
+ASSIGNED { x y }
+
+STATE { m }
+
+BREAKPOINT {
+    SOLVE states METHOD cnexp
+}
+
+DERIVATIVE states {
+    m = exp(y) + x ^ 107  + log(x)
+}
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index e0d9104b7c..f293680d73 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -119,7 +119,6 @@ if(NMODL_ENABLE_LLVM)
   add_executable(test_llvm_runner visitor/main.cpp codegen/codegen_data_helper.cpp
                                   codegen/codegen_llvm_execution.cpp)
   if(NMODL_ENABLE_LLVM_CUDA)
-    include_directories(${CUDAToolkit_INCLUDE_DIRS})
     target_link_libraries(benchmark_data PRIVATE CUDA::cudart)
     target_link_libraries(testllvm CUDA::cudart)
     target_link_libraries(test_llvm_runner CUDA::cudart)
diff --git a/test/unit/codegen/codegen_llvm_instance_struct.cpp b/test/unit/codegen/codegen_llvm_instance_struct.cpp
index 401e0a6c63..9c22fdda78 100644
--- a/test/unit/codegen/codegen_llvm_instance_struct.cpp
+++ b/test/unit/codegen/codegen_llvm_instance_struct.cpp
@@ -120,11 +120,15 @@ SCENARIO("Instance Struct creation", "[visitor][llvm][instance_struct]") {
             size_t ion_ena_index_index = 8;
             size_t voltage_index = 9;
             size_t node_index_index = 10;
-            size_t t_index = 11;
-            size_t dt_index = 12;
-            size_t celsius_index = 13;
-            size_t secondorder_index = 14;
-            size_t node_count_index = 15;
+            size_t rhs_index = 11;
+            size_t d_index = 12;
+            size_t rhs_shadow_index = 13;
+            size_t d_shadow_index = 14;
+            size_t t_index = 15;
+            size_t dt_index = 16;
+            size_t celsius_index = 17;
+            size_t secondorder_index = 18;
+            size_t node_count_index = 19;
             // Check if the various instance struct fields are properly initialized
             REQUIRE(compare(instance_data.members[minf_index],
                             generate_dummy_data<double>(minf_index, num_elements)));
@@ -155,6 +159,10 @@ SCENARIO("Instance Struct creation", "[visitor][llvm][instance_struct]") {
                 int* ion_ena_index;
                 double* voltage;
                 int* node_index;
+                double* vec_rhs;
+                double* vec_d;
+                double* _shadow_rhs;
+                double* _shadow_d;
                 double t;
                 double dt;
                 double celsius;
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 818e4104fe..26e8763341 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -980,8 +980,8 @@ SCENARIO("Scalar state kernel", "[visitor][llvm]") {
             // Check the struct type with correct attributes and the kernel declaration.
             std::regex struct_type(
                 "%.*__instance_var__type = type \\{ double\\*, double\\*, double\\*, double\\*, "
-                "double\\*, double\\*, double\\*, double\\*, double\\*, double\\*, i32\\*, double, "
-                "double, double, i32, i32, double\\*, double\\*, double\\*, double\\* \\}");
+                "double\\*, double\\*, double\\*, double\\*, double\\*, double\\*, i32\\*, "
+                "double\\*, double\\*, double\\*, double\\*, double, double, double, i32, i32 \\}");
             std::regex kernel_declaration(
                 R"(define void @nrn_state_hh\(%.*__instance_var__type\* noalias nocapture readonly .*\) #0)");
             REQUIRE(std::regex_search(module_string, m, struct_type));
@@ -1775,7 +1775,7 @@ SCENARIO("GPU kernel body IR generation", "[visitor][llvm][gpu]") {
             }
 
             DERIVATIVE states {
-              m = exp(y) + x ^ 2
+              m = exp(y) + x ^ 2 + log(x)
             }
         )";
 
@@ -1793,12 +1793,18 @@ SCENARIO("GPU kernel body IR generation", "[visitor][llvm][gpu]") {
             std::regex pow_declaration(R"(declare double @__nv_pow\(double, double\))");
             std::regex pow_new_call(R"(call double @__nv_pow\(double %.*, double .*\))");
             std::regex pow_old_call(R"(call double @llvm\.pow\.f64\(double %.*, double .*\))");
+            std::regex log_declaration(R"(declare double @__nv_log\(double\))");
+            std::regex log_new_call(R"(call double @__nv_log\(double %.*\))");
+            std::regex log_old_call(R"(call double @llvm\.log\.f64\(double %.*\))");
             REQUIRE(std::regex_search(module_string, m, exp_declaration));
             REQUIRE(std::regex_search(module_string, m, exp_new_call));
             REQUIRE(!std::regex_search(module_string, m, exp_old_call));
             REQUIRE(std::regex_search(module_string, m, pow_declaration));
             REQUIRE(std::regex_search(module_string, m, pow_new_call));
             REQUIRE(!std::regex_search(module_string, m, pow_old_call));
+            REQUIRE(std::regex_search(module_string, m, log_declaration));
+            REQUIRE(std::regex_search(module_string, m, log_new_call));
+            REQUIRE(!std::regex_search(module_string, m, log_old_call));
         }
     }
 }
diff --git a/test/unit/codegen/codegen_llvm_visitor.cpp b/test/unit/codegen/codegen_llvm_visitor.cpp
index 1906d0d27c..af9bed5e7c 100644
--- a/test/unit/codegen/codegen_llvm_visitor.cpp
+++ b/test/unit/codegen/codegen_llvm_visitor.cpp
@@ -171,15 +171,15 @@ SCENARIO("Check instance struct declaration and setup in wrapper",
                 int* __restrict__ ion_dikdv_index;
                 double* __restrict__ voltage;
                 int* __restrict__ node_index;
+                double* __restrict__ vec_rhs;
+                double* __restrict__ vec_d;
+                double* __restrict__ _shadow_rhs;
+                double* __restrict__ _shadow_d;
                 double t;
                 double dt;
                 double celsius;
                 int secondorder;
                 int node_count;
-                double* __restrict__ vec_rhs;
-                double* __restrict__ vec_d;
-                double* __restrict__ _shadow_rhs;
-                double* __restrict__ _shadow_d;
             };
         )";
         std::string generated_instance_struct_setup = R"(
@@ -226,6 +226,10 @@ SCENARIO("Check instance struct declaration and setup in wrapper",
                 inst->ion_dikdv_index = indexes+5*pnodecount;
                 inst->voltage = nt->_actual_v;
                 inst->node_index = ml->nodeindices;
+                inst->vec_rhs = nt->_actual_rhs;
+                inst->vec_d = nt->_actual_d;
+                inst->_shadow_rhs = nt->_shadow_rhs;
+                inst->_shadow_d = nt->_shadow_d;
                 inst->t = nt->t;
                 inst->dt = nt->dt;
                 inst->celsius = celsius;

From b13ca9097409ee90e2d2d94f0b0f89b687fd19b9 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 12 May 2022 10:19:10 +0200
Subject: [PATCH 088/105] [LLVM][GPU] Atomic updates support (#853)

1. Helper visitor generates atomic statements with += and -=
once again.
2. LLVM visitor now knows how to lower atomic statements for
CPUs (trivially) and GPUs.
3. A corresponding IR  test was added. `expsyn` test on GPU
enabled

Co-authored-by: Ioannis Magkanaris <iomagkanaris@gmail.com>
---
 .../llvm/codegen_llvm_helper_visitor.cpp      | 39 ++-------
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 84 ++++++++++++++-----
 src/codegen/llvm/llvm_ir_builder.cpp          | 22 +++++
 src/codegen/llvm/llvm_ir_builder.hpp          | 12 ++-
 test/benchmark/CMakeLists.txt                 |  5 +-
 test/unit/codegen/codegen_llvm_ir.cpp         | 36 +++++++-
 test/unit/codegen/codegen_llvm_visitor.cpp    |  6 +-
 7 files changed, 139 insertions(+), 65 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index fc0ab1c7b8..616d4ef401 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -283,7 +283,7 @@ static void append_statements_from_block(ast::StatementVector& statements,
  * Create atomic statement for given expression of the form a[i] += expression
  * @param var Name of the variable on the LHS (it's an array), e.g. `a`
  * @param var_index Name of the index variable to access variable `var` e.g. `i`
- * @param op_str Operators like += or -=
+ * @param op_str Operators like =, += or -=
  * @param rhs_str expression that will be added or subtracted from `var[var_index]`
  * @return A statement representing atomic operation using `ast::CodegenAtomicStatement`
  */
@@ -299,23 +299,9 @@ static std::shared_ptr<ast::CodegenAtomicStatement> create_atomic_statement(
                                 /*at=*/nullptr,
                                 /*index=*/nullptr);
 
-    // LLVM IR generation is now only supporting assignment (=) and not += or -=
-    // So we need to write increment operation a += b as an assignment operation
-    // a = a + b.
-    // See https://github.com/BlueBrain/nmodl/issues/851
-
-    std::string op(op_str);
-    stringutils::remove_character(op, '=');
-
-    // make sure only + or - operator is used
-    if (op_str != "-" && op_str != "+") {
-        throw std::runtime_error("Unsupported binary operator for atomic statement");
-    }
-
-    auto* rhs = create_expression("{}[{}] {} {} "_format(var, var_index, op, rhs_str));
-    return std::make_shared<ast::CodegenAtomicStatement>(lhs,
-                                                         ast::BinaryOperator{ast::BOP_ASSIGN},
-                                                         rhs);
+    auto op = ast::BinaryOperator(ast::string_to_binaryop(op_str));
+    auto rhs = create_expression(rhs_str);
+    return std::make_shared<ast::CodegenAtomicStatement>(lhs, op, rhs);
 }
 
 /**
@@ -422,22 +408,7 @@ void CodegenLLVMHelperVisitor::ion_write_statements(BlockType type,
         index_statements.push_back(visitor::create_statement(index_statement));
 
         // pass ion variable to write and its index
-
-        // lhs variable
-        std::string lhs = "{}[{}] "_format(ion_varname, index_varname);
-
-        // lets turn a += b into a = a + b if applicable
-        // note that this is done in order to facilitate existing implementation in the llvm
-        // backend which doesn't support += or -= operators.
-        std::string statement;
-        if (!op.compare("+=")) {
-            statement = "{} = {} + {}"_format(lhs, lhs, rhs);
-        } else if (!op.compare("-=")) {
-            statement = "{} = {} - {}"_format(lhs, lhs, rhs);
-        } else {
-            statement = "{} {} {}"_format(lhs, op, rhs);
-        }
-        body_statements.push_back(visitor::create_statement(statement));
+        body_statements.push_back(create_atomic_statement(ion_varname, index_varname, op, rhs));
     };
 
     /// iterate over all ions and create write ion statements for given block type
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index de6c7ad914..1516f23634 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -512,31 +512,73 @@ void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) {
     ir_builder.create_boolean_constant(node.get_value());
 }
 
-/**
- * Currently, this functions is very similar to visiting the binary operator. However, the
- * difference here is that the writes to the LHS variable must be atomic. These has a particular
- * use case in synapse kernels. For simplicity, we choose not to support atomic writes at this
- * stage and emit a warning.
- *
- * \todo support this properly.
- */
 void CodegenLLVMVisitor::visit_codegen_atomic_statement(const ast::CodegenAtomicStatement& node) {
-    if (platform.is_cpu_with_simd())
-        logger->warn("Atomic operations are not supported");
+    // Get the variable node that need an atomic update.
+    const auto& var = std::dynamic_pointer_cast<ast::VarName>(node.get_lhs());
+    if (!var)
+        throw std::runtime_error("Error: only 'VarName' update is supported\n");
 
-    // Support only assignment for now.
+    // Evaluate RHS of the update.
     llvm::Value* rhs = accept_and_get(node.get_rhs());
-    if (node.get_atomic_op().get_value() != ast::BinaryOp::BOP_ASSIGN)
-        throw std::runtime_error(
-            "Error: only assignment is supported for CodegenAtomicStatement\n");
-    const auto& var = dynamic_cast<ast::VarName*>(node.get_lhs().get());
-    if (!var)
-        throw std::runtime_error("Error: only 'VarName' assignment is supported\n");
 
-    // Process the assignment as if it was non-atomic.
-    if (platform.is_cpu_with_simd())
-        logger->warn("Treating write as non-atomic");
-    write_to_variable(*var, rhs);
+    // First, check if it is an atomic write only and we can return early.
+    // Otherwise, extract what kind of atomic update we want to make.
+    ast::BinaryOp atomic_op = node.get_atomic_op().get_value();
+    if (atomic_op == ast::BinaryOp::BOP_ASSIGN) {
+        write_to_variable(*var, rhs);
+        return;
+    }
+    ast::BinaryOp op = ir_builder.extract_atomic_op(atomic_op);
+
+    // For different platforms, we handle atomic updates differently!
+    if (platform.is_cpu_with_simd()) {
+        throw std::runtime_error("Error: no atomic update support for SIMD CPUs\n");
+    } else if (platform.is_gpu()) {
+        const auto& identifier = var->get_name();
+
+        // We only need to support atomic updates to instance struct members.
+        if (!identifier->is_codegen_instance_var())
+            throw std::runtime_error("Error: atomic updates for non-instance variable\n");
+
+        const auto& node = std::dynamic_pointer_cast<ast::CodegenInstanceVar>(identifier);
+        const auto& instance_name = node->get_instance_var()->get_node_name();
+        const auto& member_node = node->get_member_var();
+        const auto& member_name = member_node->get_node_name();
+
+        if (!instance_var_helper.is_an_instance_variable(member_name))
+            throw std::runtime_error("Error: " + member_name +
+                                     " is not a member of the instance variable\n");
+
+        llvm::Value* instance_ptr = ir_builder.create_load(instance_name);
+        int member_index = instance_var_helper.get_variable_index(member_name);
+        llvm::Value* member_ptr = ir_builder.get_struct_member_ptr(instance_ptr, member_index);
+
+        // Some sanity checks.
+        auto codegen_var_with_type = instance_var_helper.get_variable(member_name);
+        if (!codegen_var_with_type->get_is_pointer())
+            throw std::runtime_error(
+                "Error: atomic updates are allowed on pointer variables only\n");
+        const auto& member_var_name = std::dynamic_pointer_cast<ast::VarName>(member_node);
+        if (!member_var_name->get_name()->is_indexed_name())
+            throw std::runtime_error("Error: " + member_name + " is not an IndexedName\n");
+        const auto& member_indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(
+            member_var_name->get_name());
+        if (!member_indexed_name->get_length()->is_name())
+            throw std::runtime_error("Error: " + member_name + " must be indexed with a variable!");
+
+        llvm::Value* i64_index = get_index(*member_indexed_name);
+        llvm::Value* instance_member = ir_builder.create_load(member_ptr);
+        llvm::Value* ptr = ir_builder.create_inbounds_gep(instance_member, i64_index);
+
+        ir_builder.create_atomic_op(ptr, rhs, op);
+    } else {
+        // For non-SIMD CPUs, updates don't have to be atomic at all!
+        llvm::Value* lhs = accept_and_get(node.get_lhs());
+        ir_builder.create_binary_op(lhs, rhs, op);
+        llvm::Value* result = ir_builder.pop_last_value();
+
+        write_to_variable(*var, result);
+    }
 }
 
 // Generating FOR loop in LLVM IR creates the following structure:
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index 82cb820049..efbd7aa050 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -293,6 +293,28 @@ void IRBuilder::create_array_alloca(const std::string& name,
     create_alloca(name, array_type);
 }
 
+ast::BinaryOp IRBuilder::extract_atomic_op(ast::BinaryOp op) {
+    switch (op) {
+    case ast::BinaryOp::BOP_SUB_ASSIGN:
+        return ast::BinaryOp::BOP_SUBTRACTION;
+    case ast::BinaryOp::BOP_ADD_ASSIGN:
+        return ast::BinaryOp::BOP_ADDITION;
+    default:
+        throw std::runtime_error("Error: only atomic addition and subtraction is supported\n");
+    }
+}
+
+void IRBuilder::create_atomic_op(llvm::Value* ptr, llvm::Value* update, ast::BinaryOp op) {
+    if (op == ast::BinaryOp::BOP_SUBTRACTION) {
+        update = builder.CreateFNeg(update);
+    }
+    builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd,
+                            ptr,
+                            update,
+                            llvm::MaybeAlign(),
+                            llvm::AtomicOrdering::SequentiallyConsistent);
+}
+
 void IRBuilder::create_binary_op(llvm::Value* lhs, llvm::Value* rhs, ast::BinaryOp op) {
     // Check that both lhs and rhs have the same types.
     if (lhs->getType() != rhs->getType())
diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp
index 1b144afcfd..67db6fcded 100644
--- a/src/codegen/llvm/llvm_ir_builder.hpp
+++ b/src/codegen/llvm/llvm_ir_builder.hpp
@@ -146,6 +146,9 @@ class IRBuilder {
         return vectorize && mask;
     }
 
+    /// Extracts binary operator (+ or -) from atomic update (+= or =-).
+    ast::BinaryOp extract_atomic_op(ast::BinaryOp op);
+
     /// Generates LLVM IR to allocate the arguments of the function on the stack.
     void allocate_function_arguments(llvm::Function* function,
                                      const ast::CodegenVarWithTypeVector& nmodl_arguments);
@@ -158,6 +161,9 @@ class IRBuilder {
     /// Generates LLVM IR for the given binary operator.
     void create_binary_op(llvm::Value* lhs, llvm::Value* rhs, ast::BinaryOp op);
 
+    /// Generates LLVM IR for the given atomic operator.
+    void create_atomic_op(llvm::Value* ptr, llvm::Value* update, ast::BinaryOp op);
+
     /// Generates LLVM IR for the bitcast instruction.
     llvm::Value* create_bitcast(llvm::Value* value, llvm::Type* dst_type);
 
@@ -304,13 +310,13 @@ class IRBuilder {
     /// Pops the last visited value from the value stack.
     llvm::Value* pop_last_value();
 
+    /// Generates an inbounds GEP instruction for the given value and returns calculated address.
+    llvm::Value* create_inbounds_gep(llvm::Value* variable, llvm::Value* index);
+
   private:
     /// Generates an inbounds GEP instruction for the given name and returns calculated address.
     llvm::Value* create_inbounds_gep(const std::string& variable_name, llvm::Value* index);
 
-    /// Generates an inbounds GEP instruction for the given value and returns calculated address.
-    llvm::Value* create_inbounds_gep(llvm::Value* variable, llvm::Value* index);
-
     /// Returns a scalar constant of the provided type.
     template <typename C, typename V>
     llvm::Value* get_scalar_constant(llvm::Type* type, V value);
diff --git a/test/benchmark/CMakeLists.txt b/test/benchmark/CMakeLists.txt
index 5529b505d2..f8f6c762f0 100644
--- a/test/benchmark/CMakeLists.txt
+++ b/test/benchmark/CMakeLists.txt
@@ -44,9 +44,8 @@ if(NMODL_ENABLE_PYTHON_BINDINGS)
     set_tests_properties(
       "PyJIT/${modfile_name}" PROPERTIES ENVIRONMENT
                                          PYTHONPATH=${PROJECT_BINARY_DIR}/lib:$ENV{PYTHONPATH})
-    # Disable running the expsyn.mod on GPU because atomic instructions are not supported yet on GPU
-    # See https://github.com/BlueBrain/nmodl/issues/834
-    if(NMODL_ENABLE_LLVM_CUDA AND NOT ${modfile} STREQUAL "${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/kernels/expsyn.mod")
+
+    if(NMODL_ENABLE_LLVM_CUDA)
       add_test(NAME "PyJIT/${modfile_name}_gpu"
                COMMAND ${PYTHON_EXECUTABLE} ${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/benchmark.py
                       --file ${modfile} --gpu ${extra_args})
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 26e8763341..7e209e9123 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -1360,7 +1360,7 @@ SCENARIO("Vectorised derivative block", "[visitor][llvm][derivative]") {
                 }
                 g = (g-rhs)/0.001
                 mech->ion_dinadv[ion_dinadv_id] = mech->ion_dinadv[ion_dinadv_id]+(dina-mech->ina[id])/0.001
-                mech->ion_ina[ion_ina_id] = mech->ion_ina[ion_ina_id]+mech->ina[id]
+                mech->ion_ina[ion_ina_id] += mech->ina[id]
                 mech->vec_rhs[node_id] = mech->vec_rhs[node_id]-rhs
                 mech->vec_d[node_id] = mech->vec_d[node_id]+g
             })";
@@ -1807,4 +1807,38 @@ SCENARIO("GPU kernel body IR generation", "[visitor][llvm][gpu]") {
             REQUIRE(!std::regex_search(module_string, m, log_old_call));
         }
     }
+
+    GIVEN("For current update with atomic addition ") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+                USEION na READ ena WRITE ina
+            }
+
+            STATE { }
+
+            ASSIGNED {
+                v (mV)
+                ena (mV)
+                ina (mA/cm2)
+            }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+            }
+
+            DERIVATIVE states { }
+        )";
+
+        THEN("corresponding LLVM atomic instruction is generated") {
+            std::string module_string = run_gpu_llvm_visitor(nmodl_text,
+                                                             /*opt_level=*/0,
+                                                             /*use_single_precision=*/false);
+            std::smatch m;
+
+            // Check for atomic addition.
+            std::regex add(R"(atomicrmw fadd double\* %.*, double %.* seq_cst)");
+            REQUIRE(std::regex_search(module_string, m, add));
+        }
+    }
 }
diff --git a/test/unit/codegen/codegen_llvm_visitor.cpp b/test/unit/codegen/codegen_llvm_visitor.cpp
index af9bed5e7c..1e3504ae61 100644
--- a/test/unit/codegen/codegen_llvm_visitor.cpp
+++ b/test/unit/codegen/codegen_llvm_visitor.cpp
@@ -462,8 +462,8 @@ SCENARIO("Channel: Derivative and breakpoint block llvm transformations",
                     g = (g-rhs)/0.001
                     mech->ion_dinadv[ion_dinadv_id] = mech->ion_dinadv[ion_dinadv_id]+(dina-mech->ina[id])/0.001
                     mech->ion_dikdv[ion_dikdv_id] = mech->ion_dikdv[ion_dikdv_id]+(dik-mech->ik[id])/0.001
-                    mech->ion_ina[ion_ina_id] = mech->ion_ina[ion_ina_id]+mech->ina[id]
-                    mech->ion_ik[ion_ik_id] = mech->ion_ik[ion_ik_id]+mech->ik[id]
+                    mech->ion_ina[ion_ina_id] += mech->ina[id]
+                    mech->ion_ik[ion_ik_id] += mech->ik[id]
                     mech->vec_rhs[node_id] = mech->vec_rhs[node_id]-rhs
                     mech->vec_d[node_id] = mech->vec_d[node_id]+g
                 }
@@ -593,7 +593,7 @@ SCENARIO("Synapse: Derivative and breakpoint block llvm transformations",
                     }
                     mech->g[id] = (mech->g[id]-rhs)/0.001
                     mech->ion_dinadv[ion_dinadv_id] = mech->ion_dinadv[ion_dinadv_id]+(dina-mech->ina[id])/0.001*1.e2/mech->node_area[node_area_id]
-                    mech->ion_ina[ion_ina_id] = mech->ion_ina[ion_ina_id]+mech->ina[id]*(1.e2/mech->node_area[node_area_id])
+                    mech->ion_ina[ion_ina_id] += mech->ina[id]*(1.e2/mech->node_area[node_area_id])
                     mfactor = 1.e2/mech->node_area[node_area_id]
                     mech->g[id] = mech->g[id]*mfactor
                     rhs = rhs*mfactor

From ece5c4758e03c24c818af32763a033ef4a998c5e Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Thu, 12 May 2022 13:18:11 +0200
Subject: [PATCH 089/105] Replaced fmt literals with fmt::format

---
 src/codegen/codegen_c_visitor.cpp             |  2 +-
 src/codegen/codegen_c_visitor.hpp             |  1 -
 src/codegen/codegen_driver.cpp                |  5 +-
 src/codegen/codegen_info.cpp                  |  3 +-
 .../llvm/codegen_llvm_helper_visitor.cpp      | 57 +++++++++---------
 .../llvm/codegen_llvm_helper_visitor.hpp      |  6 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 58 ++++++++++---------
 .../llvm/replace_with_lib_functions.cpp       |  6 +-
 src/main.cpp                                  |  6 +-
 test/benchmark/cuda_driver.cpp                | 15 +++--
 10 files changed, 82 insertions(+), 77 deletions(-)

diff --git a/src/codegen/codegen_c_visitor.cpp b/src/codegen/codegen_c_visitor.cpp
index 4a9ff1de97..c4c88327f9 100644
--- a/src/codegen/codegen_c_visitor.cpp
+++ b/src/codegen/codegen_c_visitor.cpp
@@ -887,7 +887,7 @@ bool CodegenCVisitor::nrn_cur_reduction_loop_required() {
 
 void CodegenCVisitor::print_channel_iteration_loop(const std::string& start = "start",
                                                    const std::string& end = "end") {
-    printer->start_block("for (int id = {}; id < {}; id++)"_format(start, end));
+    printer->start_block(fmt::format("for (int id = {}; id < {}; id++)", start, end));
 }
 
 
diff --git a/src/codegen/codegen_c_visitor.hpp b/src/codegen/codegen_c_visitor.hpp
index 36d8287e09..45ab7b7441 100644
--- a/src/codegen/codegen_c_visitor.hpp
+++ b/src/codegen/codegen_c_visitor.hpp
@@ -36,7 +36,6 @@ namespace nmodl {
 /// encapsulates code generation backend implementations
 namespace codegen {
 
-using namespace fmt::literals;
 /**
  * @defgroup codegen Code Generation Implementation
  * @brief Implementations of code generation backends
diff --git a/src/codegen/codegen_driver.cpp b/src/codegen/codegen_driver.cpp
index 5d211fa724..19c3e24952 100644
--- a/src/codegen/codegen_driver.cpp
+++ b/src/codegen/codegen_driver.cpp
@@ -157,8 +157,9 @@ bool CodegenDriver::prepare_mod(std::shared_ptr<ast::Program> node, const std::s
         ast_to_nmodl(*node, filename);
         if (cfg.nmodl_ast && kineticBlockVisitor.get_conserve_statement_count()) {
             logger->warn(
-                fmt::format("{} presents non-standard CONSERVE statements in DERIVATIVE blocks. Use it only for debugging/developing",
-                    filename));
+                fmt::format("{} presents non-standard CONSERVE statements in DERIVATIVE blocks. "
+                            "Use it only for debugging/developing",
+                            filename));
         }
     }
 
diff --git a/src/codegen/codegen_info.cpp b/src/codegen/codegen_info.cpp
index a395b0e6de..e2415fb6cb 100644
--- a/src/codegen/codegen_info.cpp
+++ b/src/codegen/codegen_info.cpp
@@ -16,7 +16,6 @@
 namespace nmodl {
 namespace codegen {
 
-using namespace fmt::literals;
 using symtab::syminfo::NmodlType;
 using visitor::VarUsageVisitor;
 
@@ -303,7 +302,7 @@ void CodegenInfo::get_int_variables() {
      */
     if (!watch_statements.empty()) {
         for (int i = 0; i < watch_statements.size() + 1; i++) {
-            codegen_int_variables.emplace_back(make_symbol(fmt::format("watch{}",i)),
+            codegen_int_variables.emplace_back(make_symbol(fmt::format("watch{}", i)),
                                                false,
                                                false,
                                                true);
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 616d4ef401..0e05c6d885 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -18,8 +18,6 @@
 namespace nmodl {
 namespace codegen {
 
-using namespace fmt::literals;
-
 using symtab::syminfo::Status;
 
 /// initialize static member variables
@@ -148,7 +146,7 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
     auto name = new ast::Name(new ast::String(function_name));
 
     /// return variable name has "ret_" prefix
-    std::string return_var_name = "ret_{}"_format(function_name);
+    std::string return_var_name = fmt::format("ret_{}", function_name);
     auto return_var = new ast::Name(new ast::String(return_var_name));
 
     /// return type based on node type
@@ -341,7 +339,8 @@ void CodegenLLVMHelperVisitor::ion_read_statements(BlockType type,
         // first load the index
         std::string index_statement = fmt::format("{} = {}_index[id]", index_varname, ion_varname);
         // now assign the value
-        std::string read_statement = fmt::format("{} = {}[{}]", varname, ion_varname, index_varname);
+        std::string read_statement =
+            fmt::format("{} = {}[{}]", varname, ion_varname, index_varname);
         // push index definition, index statement and actual read statement
         int_variables.push_back(index_varname);
         index_statements.push_back(visitor::create_statement(index_statement));
@@ -401,8 +400,6 @@ void CodegenLLVMHelperVisitor::ion_write_statements(BlockType type,
         std::string index_varname = fmt::format("{}_id", ion_varname);
         // load index
         std::string index_statement = fmt::format("{} = {}_index[id]", index_varname, ion_varname);
-        // ion variable to write (with index)
-        std::string ion_to_write = fmt::format("{}[{}]", ion_varname, index_varname);
         // push index definition, index statement and actual write statement
         int_variables.push_back(index_varname);
         index_statements.push_back(visitor::create_statement(index_statement));
@@ -559,7 +556,7 @@ void CodegenLLVMHelperVisitor::rename_local_variables(ast::StatementBlock& node)
         /// rename local variable in entire statement block
         for (auto& var: local_statement->get_variables()) {
             std::string old_name = var->get_node_name();
-            std::string new_name = "{}_{}"_format(old_name, local_block_counter);
+            std::string new_name = fmt::format("{}_{}", old_name, local_block_counter);
             visitor::RenameVisitor(old_name, new_name).visit_statement_block(node);
         }
     }
@@ -670,9 +667,10 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     // prepare main body of the compute function
     {
         /// access node index and corresponding voltage
-        index_statements.push_back(
-            visitor::create_statement("node_id = node_index[{}]"_format(naming::INDUCTION_VAR)));
-        body_statements.push_back(visitor::create_statement("v = {}[node_id]"_format(VOLTAGE_VAR)));
+        index_statements.push_back(visitor::create_statement(
+            fmt::format("node_id = node_index[{}]", naming::INDUCTION_VAR)));
+        body_statements.push_back(
+            visitor::create_statement(fmt::format("v = {}[node_id]", VOLTAGE_VAR)));
 
         /// read ion variables
         ion_read_statements(
@@ -872,11 +870,11 @@ void CodegenLLVMHelperVisitor::print_nrn_current_body(const ast::BreakpointBlock
     // sum now all currents
     for (auto& current: info.currents) {
         statements.emplace_back(
-            visitor::create_statement("current = current + {}"_format(current)));
+            visitor::create_statement(fmt::format("current = current + {}", current)));
     }
 
     // assign computed current to the given variable
-    statements.emplace_back(visitor::create_statement("{} = current"_format(variable)));
+    statements.emplace_back(visitor::create_statement(fmt::format("{} = current", variable)));
 
     // create StatementBlock for better readability of the generated code and add that to the main
     // body statements
@@ -915,10 +913,10 @@ void CodegenLLVMHelperVisitor::print_nrn_cur_non_conductance_kernel(
         for (const auto& var: ion.writes) {
             if (ion.is_ionic_current(var)) {
                 // also create local variable
-                std::string name{"di{}"_format(ion.name)};
+                std::string name{fmt::format("di{}", ion.name)};
                 double_variables.emplace_back(name);
                 body_statements.emplace_back(
-                    visitor::create_statement("{} = {}"_format(name, var)));
+                    visitor::create_statement(fmt::format("{} = {}", name, var)));
             }
         }
     }
@@ -933,9 +931,9 @@ void CodegenLLVMHelperVisitor::print_nrn_cur_non_conductance_kernel(
     // in case of point process we need to load area from another vector.
     if (info.point_process) {
         // create integer variable for index and then load value from area_index vector
-        int_variables.emplace_back("{}_id"_format(naming::NODE_AREA_VARIABLE));
+        int_variables.emplace_back(fmt::format("{}_id", naming::NODE_AREA_VARIABLE));
         index_statements.emplace_back(visitor::create_statement(
-            " {0}_id = {0}_index[id]"_format(naming::NODE_AREA_VARIABLE)));
+            fmt::format(" {0}_id = {0}_index[id]", naming::NODE_AREA_VARIABLE)));
     }
 
     // update all ionic currents now
@@ -943,22 +941,22 @@ void CodegenLLVMHelperVisitor::print_nrn_cur_non_conductance_kernel(
         for (const auto& var: ion.writes) {
             if (ion.is_ionic_current(var)) {
                 // variable on the lhs
-                std::string lhs{"{}di{}dv"_format(naming::ION_VARNAME_PREFIX, ion.name)};
+                std::string lhs{fmt::format("{}di{}dv", naming::ION_VARNAME_PREFIX, ion.name)};
 
                 // expression on the rhs
-                std::string rhs{"(di{}-{})/0.001"_format(ion.name, var)};
+                std::string rhs{fmt::format("(di{}-{})/0.001", ion.name, var)};
                 if (info.point_process) {
-                    rhs += "*1.e2/{0}[{0}_id]"_format(naming::NODE_AREA_VARIABLE);
+                    rhs += fmt::format("*1.e2/{0}[{0}_id]", naming::NODE_AREA_VARIABLE);
                 }
 
                 // load the index for lhs variable
                 int_variables.emplace_back(lhs + "_id");
-                std::string index_statement{"{}_id = {}_index[id]"_format(lhs, lhs)};
+                std::string index_statement{fmt::format("{}_id = {}_index[id]", lhs, lhs)};
                 index_statements.emplace_back(visitor::create_statement(index_statement));
 
                 // add statement that actually updates the
-                body_statements.emplace_back(
-                    visitor::create_statement("{0}[{0}_id] = {0}[{0}_id] + {1}"_format(lhs, rhs)));
+                body_statements.emplace_back(visitor::create_statement(
+                    fmt::format("{0}[{0}_id] = {0}[{0}_id] + {1}", lhs, rhs)));
             }
         }
     }
@@ -991,9 +989,10 @@ void CodegenLLVMHelperVisitor::visit_breakpoint_block(ast::BreakpointBlock& node
     /// prepare all function statements
     {
         /// access node index and corresponding voltage
-        index_statements.push_back(
-            visitor::create_statement("node_id = node_index[{}]"_format(naming::INDUCTION_VAR)));
-        body_statements.push_back(visitor::create_statement("v = {}[node_id]"_format(VOLTAGE_VAR)));
+        index_statements.push_back(visitor::create_statement(
+            fmt::format("node_id = node_index[{}]", naming::INDUCTION_VAR)));
+        body_statements.push_back(
+            visitor::create_statement(fmt::format("v = {}[node_id]", VOLTAGE_VAR)));
 
         /// read ion variables
         ion_read_statements(BlockType::Equation,
@@ -1022,7 +1021,7 @@ void CodegenLLVMHelperVisitor::visit_breakpoint_block(ast::BreakpointBlock& node
         if (info.point_process) {
             double_variables.emplace_back("mfactor");
             body_statements.emplace_back(visitor::create_statement(
-                "mfactor = 1.e2/{0}[{0}_id]"_format(naming::NODE_AREA_VARIABLE)));
+                fmt::format("mfactor = 1.e2/{0}[{0}_id]", naming::NODE_AREA_VARIABLE)));
             body_statements.emplace_back(visitor::create_statement("g = g*mfactor"));
             body_statements.emplace_back(visitor::create_statement("rhs = rhs*mfactor"));
         }
@@ -1048,9 +1047,9 @@ void CodegenLLVMHelperVisitor::visit_breakpoint_block(ast::BreakpointBlock& node
             stringutils::remove_character(d_op, '=');
 
             body_statements.emplace_back(visitor::create_statement(
-                "vec_rhs[node_id] = vec_rhs[node_id] {} rhs"_format(rhs_op)));
-            body_statements.emplace_back(
-                visitor::create_statement("vec_d[node_id] = vec_d[node_id] {} g"_format(d_op)));
+                fmt::format("vec_rhs[node_id] = vec_rhs[node_id] {} rhs", rhs_op)));
+            body_statements.emplace_back(visitor::create_statement(
+                fmt::format("vec_d[node_id] = vec_d[node_id] {} g", d_op)));
         }
     }
 
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index ab554521fc..50ef1d6b8d 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -24,7 +24,6 @@
 namespace nmodl {
 namespace codegen {
 
-using namespace fmt::literals;
 typedef std::vector<std::shared_ptr<ast::CodegenFunction>> CodegenFunctionVector;
 
 /**
@@ -66,7 +65,7 @@ struct InstanceVarHelper {
         const auto& vars = instance->get_codegen_vars();
         auto it = find_variable(vars, name);
         if (it == vars.end()) {
-            throw std::runtime_error("Can not find variable with name {}"_format(name));
+            throw std::runtime_error(fmt::format("Can not find variable with name {}", name));
         }
         return *it;
     }
@@ -76,7 +75,8 @@ struct InstanceVarHelper {
         const auto& vars = instance->get_codegen_vars();
         auto it = find_variable(vars, name);
         if (it == vars.end()) {
-            throw std::runtime_error("Can not find codegen variable with name {}"_format(name));
+            throw std::runtime_error(
+                fmt::format("Can not find codegen variable with name {}", name));
         }
         return (it - vars.begin());
     }
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 1516f23634..c11ba6cc3b 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -933,7 +933,7 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
 void CodegenLLVMVisitor::print_mechanism_range_var_structure() {
     printer->add_newline(2);
     printer->add_line("/** Instance Struct passed as argument to LLVM IR kernels */");
-    printer->start_block("struct {} "_format(instance_struct()));
+    printer->start_block(fmt::format("struct {} ", instance_struct()));
     for (const auto& variable: instance_var_helper.instance->get_codegen_vars()) {
         auto is_pointer = variable->get_is_pointer();
         auto name = to_nmodl(variable->get_name());
@@ -942,10 +942,14 @@ void CodegenLLVMVisitor::print_mechanism_range_var_structure() {
         auto pointer = is_pointer ? "*" : "";
         auto var_name = variable->get_node_name();
         switch (nmodl_type) {
-#define DISPATCH(type, c_type)                                                              \
-    case type:                                                                              \
-        printer->add_line("{}{}{} {}{};"_format(                                            \
-            qualifier, c_type, pointer, is_pointer ? ptr_type_qualifier() : "", var_name)); \
+#define DISPATCH(type, c_type)                                                \
+    case type:                                                                \
+        printer->add_line(fmt::format("{}{}{} {}{};",                         \
+                                      qualifier,                              \
+                                      c_type,                                 \
+                                      pointer,                                \
+                                      is_pointer ? ptr_type_qualifier() : "", \
+                                      var_name));                             \
         break;
 
             DISPATCH(ast::AstNodeType::DOUBLE, "double");
@@ -972,7 +976,8 @@ void CodegenLLVMVisitor::print_instance_variable_setup() {
     printer->add_newline(2);
     printer->add_line("/** initialize mechanism instance variables */");
     printer->start_block("static inline void setup_instance(NrnThread* nt, Memb_list* ml) ");
-    printer->add_line("{0}* inst = ({0}*) mem_alloc(1, sizeof({0}));"_format(instance_struct()));
+    printer->add_line(
+        fmt::format("{0}* inst = ({0}*) mem_alloc(1, sizeof({0}));", instance_struct()));
     if (channel_task_dependency_enabled() && !info.codegen_shadow_variables.empty()) {
         printer->add_line("setup_shadow_vectors(inst, ml);");
     }
@@ -995,12 +1000,12 @@ void CodegenLLVMVisitor::print_instance_variable_setup() {
         auto name = var->get_name();
         auto range_var_type = get_range_var_float_type(var);
         if (float_type == range_var_type) {
-            auto variable = "ml->data+{}{}"_format(id, stride);
+            auto variable = fmt::format("ml->data+{}{}", id, stride);
             auto device_variable = get_variable_device_pointer(variable, float_type_pointer);
-            printer->add_line("inst->{} = {};"_format(name, device_variable));
+            printer->add_line(fmt::format("inst->{} = {};", name, device_variable));
         } else {
-            printer->add_line("inst->{} = setup_range_variable(ml->data+{}{}, pnodecount);"_format(
-                name, id, stride));
+            printer->add_line(fmt::format(
+                "inst->{} = setup_range_variable(ml->data+{}{}, pnodecount);", name, id, stride));
             variables_to_free.push_back(name);
         }
         id += var->get_length();
@@ -1021,7 +1026,7 @@ void CodegenLLVMVisitor::print_instance_variable_setup() {
             type = info.artificial_cell ? "void*" : float_type_pointer;
         }
         auto device_variable = get_variable_device_pointer(variable, type);
-        printer->add_line("inst->{} = {};"_format(name, device_variable));
+        printer->add_line(fmt::format("inst->{} = {};", name, device_variable));
     }
 
     int index_id = 0;
@@ -1030,7 +1035,7 @@ void CodegenLLVMVisitor::print_instance_variable_setup() {
         std::string var_name = int_var.symbol->get_name() + "_index";
         // Create for loop that instantiates the ion_<var>_index with
         // indexes[<var_id>*pdnodecount]
-        printer->add_line("inst->{} = indexes+{}*pnodecount;"_format(var_name, index_id));
+        printer->add_line(fmt::format("inst->{} = indexes+{}*pnodecount;", var_name, index_id));
         index_id++;
     }
 
@@ -1047,21 +1052,21 @@ void CodegenLLVMVisitor::print_instance_variable_setup() {
     printer->add_line(fmt::format("inst->{} = nt->_shadow_d;", naming::NTHREAD_D_SHADOW));
 
     // Setup global variables
-    printer->add_line("inst->{0} = nt->{0};"_format(naming::NTHREAD_T_VARIABLE));
-    printer->add_line("inst->{0} = nt->{0};"_format(naming::NTHREAD_DT_VARIABLE));
-    printer->add_line("inst->{0} = {0};"_format(naming::CELSIUS_VARIABLE));
-    printer->add_line("inst->{0} = {0};"_format(naming::SECOND_ORDER_VARIABLE));
-    printer->add_line("inst->{} = ml->nodecount;"_format(naming::MECH_NODECOUNT_VAR));
+    printer->add_line(fmt::format("inst->{0} = nt->{0};", naming::NTHREAD_T_VARIABLE));
+    printer->add_line(fmt::format("inst->{0} = nt->{0};", naming::NTHREAD_DT_VARIABLE));
+    printer->add_line(fmt::format("inst->{0} = {0};", naming::CELSIUS_VARIABLE));
+    printer->add_line(fmt::format("inst->{0} = {0};", naming::SECOND_ORDER_VARIABLE));
+    printer->add_line(fmt::format("inst->{} = ml->nodecount;", naming::MECH_NODECOUNT_VAR));
 
     printer->add_line("ml->instance = inst;");
     printer->end_block(3);
 
     printer->add_line("/** cleanup mechanism instance variables */");
     printer->start_block("static inline void cleanup_instance(Memb_list* ml) ");
-    printer->add_line("{0}* inst = ({0}*) ml->instance;"_format(instance_struct()));
+    printer->add_line(fmt::format("{0}* inst = ({0}*) ml->instance;", instance_struct()));
     if (range_variable_setup_required()) {
         for (auto& var: variables_to_free) {
-            printer->add_line("mem_free((void*)inst->{});"_format(var));
+            printer->add_line(fmt::format("mem_free((void*)inst->{});", var));
         }
     }
     printer->add_line("mem_free((void*)inst);");
@@ -1071,7 +1076,7 @@ void CodegenLLVMVisitor::print_instance_variable_setup() {
 CodegenLLVMVisitor::ParamVector CodegenLLVMVisitor::get_compute_function_parameter() {
     auto params = ParamVector();
     params.emplace_back(param_type_qualifier(),
-                        "{}*"_format(instance_struct()),
+                        fmt::format("{}*", instance_struct()),
                         ptr_type_qualifier(),
                         "inst");
     return params;
@@ -1082,18 +1087,19 @@ void CodegenLLVMVisitor::print_backend_compute_routine_decl() {
     auto compute_function = compute_method_name(BlockType::Initial);
 
     printer->add_newline(2);
-    printer->add_line("extern void {}({});"_format(compute_function, get_parameter_str(params)));
+    printer->add_line(
+        fmt::format("extern void {}({});", compute_function, get_parameter_str(params)));
 
     if (info.nrn_cur_required()) {
         compute_function = compute_method_name(BlockType::Equation);
         printer->add_line(
-            "extern void {}({});"_format(compute_function, get_parameter_str(params)));
+            fmt::format("extern void {}({});", compute_function, get_parameter_str(params)));
     }
 
     if (info.nrn_state_required()) {
         compute_function = compute_method_name(BlockType::State);
         printer->add_line(
-            "extern void {}({});"_format(compute_function, get_parameter_str(params)));
+            fmt::format("extern void {}({});", compute_function, get_parameter_str(params)));
     }
 }
 
@@ -1105,10 +1111,10 @@ void CodegenLLVMVisitor::print_wrapper_routine(const std::string& wrapper_functi
     auto compute_function = compute_method_name(type);
 
     printer->add_newline(2);
-    printer->start_block("void {}({})"_format(function_name, args));
+    printer->start_block(fmt::format("void {}({})", function_name, args));
     printer->add_line("int nodecount = ml->nodecount;");
     // clang-format off
-    printer->add_line("{0}* {1}inst = ({0}*) ml->instance;"_format(instance_struct(), ptr_type_qualifier()));
+    printer->add_line(fmt::format("{0}* {1}inst = ({0}*) ml->instance;", instance_struct(), ptr_type_qualifier()));
     // clang-format on
 
     if (type == BlockType::Initial) {
@@ -1121,7 +1127,7 @@ void CodegenLLVMVisitor::print_wrapper_routine(const std::string& wrapper_functi
         printer->add_newline();
     }
 
-    printer->add_line("{}(inst);"_format(compute_function));
+    printer->add_line(fmt::format("{}(inst);", compute_function));
     printer->end_block();
     printer->add_newline();
 }
diff --git a/src/codegen/llvm/replace_with_lib_functions.cpp b/src/codegen/llvm/replace_with_lib_functions.cpp
index 750e2c2318..07d6dd8f04 100644
--- a/src/codegen/llvm/replace_with_lib_functions.cpp
+++ b/src/codegen/llvm/replace_with_lib_functions.cpp
@@ -174,8 +174,10 @@ bool ReplaceWithLibdevice::replace_call(CallInst& call_inst) {
                                                                       {"llvm.pow.f64", "__nv_pow"},
                                                                       {"llvm.log.f32", "__nv_logf"},
                                                                       {"llvm.log.f64", "__nv_log"},
-                                                                      {"llvm.fabs.f32", "__nv_fabsf"},
-                                                                      {"llvm.fabs.f64", "__nv_fabs"}};
+                                                                      {"llvm.fabs.f32",
+                                                                       "__nv_fabsf"},
+                                                                      {"llvm.fabs.f64",
+                                                                       "__nv_fabs"}};
 
     // If replacement is not supported, abort.
     std::string old_name = function->getName().str();
diff --git a/src/main.cpp b/src/main.cpp
index 6095c89be2..dc42b58043 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -222,11 +222,11 @@ int main(int argc, const char* argv[]) {
         "Name of CPU platform to use")->ignore_case();
     auto simd_math_library_opt = cpu_opt->add_option("--math-library",
         cfg.llvm_math_library,
-        "Math library for SIMD code generation ({})"_format(cfg.llvm_math_library));
+        fmt::format("Math library for SIMD code generation ({})", cfg.llvm_math_library));
     simd_math_library_opt->check(CLI::IsMember({"Accelerate", "libmvec", "libsystem_m", "MASSV", "SLEEF", "SVML", "none"}));
     cpu_opt->add_option("--vector-width",
         cfg.llvm_vector_width,
-        "Explicit vectorization width for IR generation ({})"_format(cfg.llvm_vector_width))->ignore_case();
+        fmt::format("Explicit vectorization width for IR generation ({})", cfg.llvm_vector_width))->ignore_case();
 
     auto gpu_opt = app.add_subcommand("gpu", "LLVM GPU option")->ignore_case();
     gpu_opt->needs(llvm_opt);
@@ -239,7 +239,7 @@ int main(int argc, const char* argv[]) {
         "Name of target architecture to use")->ignore_case();
     auto gpu_math_library_opt = gpu_opt->add_option("--math-library",
         cfg.llvm_math_library,
-        "Math library for GPU code generation ({})"_format(cfg.llvm_math_library));
+        fmt::format("Math library for GPU code generation ({})", cfg.llvm_math_library));
     gpu_math_library_opt->check(CLI::IsMember({"libdevice"}));
 
     // Allow only one platform at a time.
diff --git a/test/benchmark/cuda_driver.cpp b/test/benchmark/cuda_driver.cpp
index cecc97b35d..b65caeff0d 100644
--- a/test/benchmark/cuda_driver.cpp
+++ b/test/benchmark/cuda_driver.cpp
@@ -20,8 +20,6 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Target/TargetMachine.h"
 
-using fmt::literals::operator""_format;
-
 namespace nmodl {
 namespace runner {
 
@@ -47,7 +45,7 @@ void CUDADriver::link_libraries(llvm::Module& module, BenchmarkInfo* benchmark_i
             parseBitcodeFile(libdevice_file_memory_buffer->get()->getMemBufferRef(),
                              module.getContext());
         if (std::error_code error = errorToErrorCode(libdevice_expected_module.takeError())) {
-            throw std::runtime_error("Error reading bitcode: {}"_format(error.message()));
+            throw std::runtime_error(fmt::format("Error reading bitcode: {}", error.message()));
         }
         linker.linkInModule(std::move(libdevice_expected_module.get()),
                             llvm::Linker::LinkOnlyNeeded);
@@ -112,7 +110,7 @@ void CUDADriver::init(const codegen::Platform& platform, BenchmarkInfo* benchmar
     char name[128];
     checkCudaErrors(cuDeviceGetName(name, 128, device));
     device_info.name = name;
-    logger->info("Using CUDA Device [0]: {}"_format(device_info.name));
+    logger->info(fmt::format("Using CUDA Device [0]: {}", device_info.name));
 
     // Get the compute capability of the device that is actually going to be used to run the kernel
     checkCudaErrors(cuDeviceGetAttribute(&device_info.compute_version_major,
@@ -121,8 +119,9 @@ void CUDADriver::init(const codegen::Platform& platform, BenchmarkInfo* benchmar
     checkCudaErrors(cuDeviceGetAttribute(&device_info.compute_version_minor,
                                          CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
                                          device));
-    logger->info("Device Compute Capability: {}.{}"_format(device_info.compute_version_major,
-                                                           device_info.compute_version_minor));
+    logger->info(fmt::format("Device Compute Capability: {}.{}",
+                             device_info.compute_version_major,
+                             device_info.compute_version_minor));
     if (device_info.compute_version_major < 2) {
         throw std::runtime_error("ERROR: Device 0 is not SM 2.0 or greater");
     }
@@ -185,10 +184,10 @@ void CUDADriver::init(const codegen::Platform& platform, BenchmarkInfo* benchmar
     auto cuda_jit_ret = cuModuleLoadDataEx(
         &cudaModule, ptx_compiled_module.c_str(), jitNumOptions, jitOptions, jitOptVals);
     if (!std::string(jitLogBuffer).empty()) {
-        logger->info("CUDA JIT INFO LOG: {}"_format(std::string(jitLogBuffer)));
+        logger->info(fmt::format("CUDA JIT INFO LOG: {}", std::string(jitLogBuffer)));
     }
     if (!std::string(jitErrorLogBuffer).empty()) {
-        logger->info("CUDA JIT ERROR LOG: {}"_format(std::string(jitErrorLogBuffer)));
+        logger->info(fmt::format("CUDA JIT ERROR LOG: {}", std::string(jitErrorLogBuffer)));
     }
     delete[] jitOptions;
     delete[] jitOptVals;

From 0c8f566581fdd2bd6a5bd4e03f64022934f31126 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Thu, 12 May 2022 17:55:26 +0200
Subject: [PATCH 090/105] [LLVM][FIX] Float generation fix in LLVM helper
 visitor (#865)

* Fixed float AST in helper
* Fixed InstanceStruct test to generate struct with doubles
---
 .../llvm/codegen_llvm_helper_visitor.cpp      | 29 +++++++++----------
 .../llvm/codegen_llvm_helper_visitor.hpp      | 12 ++++++--
 src/codegen/llvm/codegen_llvm_visitor.cpp     |  7 +++++
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  4 +--
 test/unit/codegen/codegen_llvm_execution.cpp  | 26 ++++++++---------
 .../codegen/codegen_llvm_instance_struct.cpp  |  2 +-
 6 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 0e05c6d885..06fde2bcd7 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -22,7 +22,6 @@ using symtab::syminfo::Status;
 
 /// initialize static member variables
 const ast::AstNodeType CodegenLLVMHelperVisitor::INTEGER_TYPE = ast::AstNodeType::INTEGER;
-const ast::AstNodeType CodegenLLVMHelperVisitor::FLOAT_TYPE = ast::AstNodeType::DOUBLE;
 const std::string CodegenLLVMHelperVisitor::NODECOUNT_VAR = "node_count";
 const std::string CodegenLLVMHelperVisitor::VOLTAGE_VAR = "voltage";
 const std::string CodegenLLVMHelperVisitor::NODE_INDEX_VAR = "node_index";
@@ -152,7 +151,7 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
     /// return type based on node type
     ast::CodegenVarType* ret_var_type = nullptr;
     if (node.get_node_type() == ast::AstNodeType::FUNCTION_BLOCK) {
-        ret_var_type = new ast::CodegenVarType(FLOAT_TYPE);
+        ret_var_type = new ast::CodegenVarType(fp_type);
     } else {
         ret_var_type = new ast::CodegenVarType(INTEGER_TYPE);
     }
@@ -183,7 +182,7 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
     ast::CodegenVarWithTypeVector arguments;
     for (const auto& param: node.get_parameters()) {
         /// create new type and name for creating new ast node
-        auto type = new ast::CodegenVarType(FLOAT_TYPE);
+        auto type = new ast::CodegenVarType(fp_type);
         auto var = param->get_name()->clone();
         arguments.emplace_back(new ast::CodegenVarWithType(type, /*is_pointer=*/0, var));
     }
@@ -219,12 +218,12 @@ std::shared_ptr<ast::InstanceStruct> CodegenLLVMHelperVisitor::create_instance_s
 
     /// float variables are standard pointers to float vectors
     for (const auto& float_var: info.codegen_float_variables) {
-        add_var_with_type(float_var->get_name(), FLOAT_TYPE, /*is_pointer=*/1);
+        add_var_with_type(float_var->get_name(), fp_type, /*is_pointer=*/1);
     }
 
     /// int variables are pointers to indexes for other vectors
     for (const auto& int_var: info.codegen_int_variables) {
-        add_var_with_type(int_var.symbol->get_name(), FLOAT_TYPE, /*is_pointer=*/1);
+        add_var_with_type(int_var.symbol->get_name(), fp_type, /*is_pointer=*/1);
     }
 
     // for integer variables, there should be index
@@ -234,7 +233,7 @@ std::shared_ptr<ast::InstanceStruct> CodegenLLVMHelperVisitor::create_instance_s
     }
 
     // add voltage and node index
-    add_var_with_type(VOLTAGE_VAR, FLOAT_TYPE, /*is_pointer=*/1);
+    add_var_with_type(VOLTAGE_VAR, fp_type, /*is_pointer=*/1);
     add_var_with_type(NODE_INDEX_VAR, INTEGER_TYPE, /*is_pointer=*/1);
 
     // As we do not have `NrnThread` object as an argument, we store points to rhs
@@ -242,19 +241,19 @@ std::shared_ptr<ast::InstanceStruct> CodegenLLVMHelperVisitor::create_instance_s
     // in case of point process mechanism.
     // Note: shadow variables are not used at the moment because reduction will be taken care
     // by LLVM backend (even on CPU via sequential add like ISPC).
-    add_var_with_type(naming::NTHREAD_RHS, FLOAT_TYPE, /*is_pointer=*/1);
-    add_var_with_type(naming::NTHREAD_D, FLOAT_TYPE, /*is_pointer=*/1);
-    add_var_with_type(naming::NTHREAD_RHS_SHADOW, FLOAT_TYPE, /*is_pointer=*/1);
-    add_var_with_type(naming::NTHREAD_D_SHADOW, FLOAT_TYPE, /*is_pointer=*/1);
+    add_var_with_type(naming::NTHREAD_RHS, fp_type, /*is_pointer=*/1);
+    add_var_with_type(naming::NTHREAD_D, fp_type, /*is_pointer=*/1);
+    add_var_with_type(naming::NTHREAD_RHS_SHADOW, fp_type, /*is_pointer=*/1);
+    add_var_with_type(naming::NTHREAD_D_SHADOW, fp_type, /*is_pointer=*/1);
 
     // NOTE: All the pointer variables should be declared before the scalar variables otherwise
     // the allocation of memory for the variables in the InstanceStruct and their offsets will be
     // wrong
 
     // add dt, t, celsius
-    add_var_with_type(naming::NTHREAD_T_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
-    add_var_with_type(naming::NTHREAD_DT_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
-    add_var_with_type(naming::CELSIUS_VARIABLE, FLOAT_TYPE, /*is_pointer=*/0);
+    add_var_with_type(naming::NTHREAD_T_VARIABLE, fp_type, /*is_pointer=*/0);
+    add_var_with_type(naming::NTHREAD_DT_VARIABLE, fp_type, /*is_pointer=*/0);
+    add_var_with_type(naming::CELSIUS_VARIABLE, fp_type, /*is_pointer=*/0);
     add_var_with_type(naming::SECOND_ORDER_VARIABLE, INTEGER_TYPE, /*is_pointer=*/0);
     add_var_with_type(naming::MECH_NODECOUNT_VAR, INTEGER_TYPE, /*is_pointer=*/0);
 
@@ -526,7 +525,7 @@ void CodegenLLVMHelperVisitor::convert_local_statement(ast::StatementBlock& node
         parent_node->erase_statement(to_delete);
 
         /// create new codegen variable statement and insert at the beginning of the block
-        auto type = new ast::CodegenVarType(FLOAT_TYPE);
+        auto type = new ast::CodegenVarType(fp_type);
         auto new_statement = std::make_shared<ast::CodegenVarListStatement>(type, variables);
         const auto& statements = parent_node->get_statements();
         parent_node->insert_statement(statements.begin(), new_statement);
@@ -811,7 +810,7 @@ void CodegenLLVMHelperVisitor::create_compute_body_loop(std::shared_ptr<ast::Sta
 
     // Push variables and  the loop to the function statements vector.
     function_statements.push_back(create_local_variable_statement(int_variables, INTEGER_TYPE));
-    function_statements.push_back(create_local_variable_statement(double_variables, FLOAT_TYPE));
+    function_statements.push_back(create_local_variable_statement(double_variables, fp_type));
     function_statements.push_back(for_loop);
 }
 
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
index 50ef1d6b8d..e41d60b0f6 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp
@@ -120,10 +120,13 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     /// create new InstanceStruct
     std::shared_ptr<ast::InstanceStruct> create_instance_struct();
 
+  private:
+    /// floating-point type
+    ast::AstNodeType fp_type;
+
   public:
-    /// default integer and float node type
+    /// default integer type
     static const ast::AstNodeType INTEGER_TYPE;
-    static const ast::AstNodeType FLOAT_TYPE;
 
     // node count, voltage and node index variables
     static const std::string NODECOUNT_VAR;
@@ -131,7 +134,10 @@ class CodegenLLVMHelperVisitor: public visitor::AstVisitor {
     static const std::string NODE_INDEX_VAR;
 
     CodegenLLVMHelperVisitor(Platform& platform)
-        : platform(platform) {}
+        : platform(platform) {
+        fp_type = platform.is_single_precision() ? ast::AstNodeType::FLOAT
+                                                 : ast::AstNodeType::DOUBLE;
+    }
 
     const InstanceVarHelper& get_instance_var_helper() {
         return instance_var_helper;
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index c11ba6cc3b..32bc1b8b9d 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -225,6 +225,7 @@ llvm::Type* CodegenLLVMVisitor::get_codegen_var_type(const ast::CodegenVarType&
     switch (node.get_type()) {
     case ast::AstNodeType::BOOLEAN:
         return ir_builder.get_boolean_type();
+    case ast::AstNodeType::FLOAT:
     case ast::AstNodeType::DOUBLE:
         return ir_builder.get_fp_type();
     case ast::AstNodeType::INSTANCE_STRUCT:
@@ -255,6 +256,7 @@ llvm::Type* CodegenLLVMVisitor::get_instance_struct_type() {
 
         // Create the corresponding LLVM type.
         switch (nmodl_type) {
+        case ast::AstNodeType::FLOAT:
         case ast::AstNodeType::DOUBLE:
             member_types.push_back(is_pointer ? ir_builder.get_fp_ptr_type()
                                               : ir_builder.get_fp_type());
@@ -756,6 +758,10 @@ void CodegenLLVMVisitor::visit_double(const ast::Double& node) {
     ir_builder.create_fp_constant(node.get_value());
 }
 
+void CodegenLLVMVisitor::visit_float(const ast::Float& node) {
+    ir_builder.create_fp_constant(node.get_value());
+}
+
 void CodegenLLVMVisitor::visit_function_block(const ast::FunctionBlock& node) {
     // do nothing. \todo: remove old function blocks from ast.
 }
@@ -952,6 +958,7 @@ void CodegenLLVMVisitor::print_mechanism_range_var_structure() {
                                       var_name));                             \
         break;
 
+            DISPATCH(ast::AstNodeType::FLOAT, "float");
             DISPATCH(ast::AstNodeType::DOUBLE, "double");
             DISPATCH(ast::AstNodeType::INTEGER, "int");
 
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index a22f698431..a4072737df 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -173,6 +173,7 @@ class CodegenLLVMVisitor: public CodegenCVisitor {
     void visit_codegen_thread_id(const ast::CodegenThreadId& node) override;
     void visit_codegen_var_list_statement(const ast::CodegenVarListStatement& node) override;
     void visit_double(const ast::Double& node) override;
+    void visit_float(const ast::Float& node) override;
     void visit_function_block(const ast::FunctionBlock& node) override;
     void visit_function_call(const ast::FunctionCall& node) override;
     void visit_if_statement(const ast::IfStatement& node) override;
@@ -197,9 +198,6 @@ class CodegenLLVMVisitor: public CodegenCVisitor {
     void visit_else_statement(const ast::ElseStatement& node) override {
         visitor::ConstAstVisitor::visit_else_statement(node);
     }
-    void visit_float(const ast::Float& node) override {
-        visitor::ConstAstVisitor::visit_float(node);
-    }
     void visit_from_statement(const ast::FromStatement& node) override {
         visitor::ConstAstVisitor::visit_from_statement(node);
     }
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index b7b8c2268c..67dd556e1a 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -395,7 +395,7 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
         NeuronSolveVisitor().visit_program(*ast);
         SolveBlockVisitor().visit_program(*ast);
 
-        codegen::Platform simd_cpu_platform(/*use_single_precision=*/false,
+        codegen::Platform simd_cpu_platform(/*use_single_precision=*/true,
                                             /*instruction_width=*/4);
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
@@ -411,21 +411,21 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
         auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1);
 
         // Fill the instance struct data with some values for unit testing.
-        std::vector<double> x = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
-        std::vector<double> x0 = {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0};
-        std::vector<double> x1 = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+        std::vector<float> x = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
+        std::vector<float> x0 = {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0};
+        std::vector<float> x1 = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
 
-        std::vector<double> voltage = {3.0, 4.0, 7.0, 1.0, 2.0, 5.0, 8.0, 6.0, 10.0, 9.0};
+        std::vector<float> voltage = {3.0, 4.0, 7.0, 1.0, 2.0, 5.0, 8.0, 6.0, 10.0, 9.0};
         std::vector<int> node_index = {3, 4, 0, 1, 5, 7, 2, 6, 9, 8};
 
         InstanceTestInfo instance_info{&instance_data,
                                        llvm_visitor.get_instance_var_helper(),
                                        num_elements};
-        initialise_instance_variable<double>(instance_info, x, "x");
-        initialise_instance_variable<double>(instance_info, x0, "x0");
-        initialise_instance_variable<double>(instance_info, x1, "x1");
+        initialise_instance_variable<float>(instance_info, x, "x");
+        initialise_instance_variable<float>(instance_info, x0, "x0");
+        initialise_instance_variable<float>(instance_info, x1, "x1");
 
-        initialise_instance_variable<double>(instance_info, voltage, "voltage");
+        initialise_instance_variable<float>(instance_info, voltage, "voltage");
         initialise_instance_variable<int>(instance_info, node_index, "node_index");
 
         // Set up the JIT runner.
@@ -437,13 +437,13 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
             runner.run_with_argument<int, void*>("__nrn_state_test_wrapper",
                                                  instance_data.base_ptr);
             // Check that the main and remainder loops correctly change the data stored in x.
-            std::vector<double> x_expected = {10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
-            REQUIRE(check_instance_variable<double>(instance_info, x_expected, "x"));
+            std::vector<float> x_expected = {10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
+            REQUIRE(check_instance_variable<float>(instance_info, x_expected, "x"));
 
             // Check that the gather load produces correct results in y:
             //   y[id] = voltage[node_index[id]]
-            std::vector<double> y_expected = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
-            REQUIRE(check_instance_variable<double>(instance_info, y_expected, "y"));
+            std::vector<float> y_expected = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
+            REQUIRE(check_instance_variable<float>(instance_info, y_expected, "y"));
         }
     }
 }
diff --git a/test/unit/codegen/codegen_llvm_instance_struct.cpp b/test/unit/codegen/codegen_llvm_instance_struct.cpp
index 9c22fdda78..41cec12194 100644
--- a/test/unit/codegen/codegen_llvm_instance_struct.cpp
+++ b/test/unit/codegen/codegen_llvm_instance_struct.cpp
@@ -105,7 +105,7 @@ SCENARIO("Instance Struct creation", "[visitor][llvm][instance_struct]") {
             constexpr static double seed = 42;
             auto instance_data = generate_instance_data(nmodl_text,
                                                         /*opt_level=*/0,
-                                                        /*use_single_precision=*/true,
+                                                        /*use_single_precision=*/false,
                                                         /*vector_width*/ 1,
                                                         num_elements,
                                                         seed);

From a784b533badba948092793c72f4ff610cf4d65be Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Tue, 17 May 2022 18:23:17 +0200
Subject: [PATCH 091/105] [CP-859] Code generation changes for "inline"
 scopmath solvers. (#859) (#868)

* Use named structs instead of functions, pass instances to solvers.
* Goes with BlueBrain/CoreNeuron#809.
* Add fmt_line, fmt_start_block and restart_block methods to CodePrinter.
* gitlab-ci: support CVF_BRANCH variable.

Co-authored-by: Olli Lupton <oliver.lupton@epfl.ch>
---
 src/printer/code_printer.hpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/printer/code_printer.hpp b/src/printer/code_printer.hpp
index 4267f5f757..f6b703d3da 100644
--- a/src/printer/code_printer.hpp
+++ b/src/printer/code_printer.hpp
@@ -72,8 +72,6 @@ class CodePrinter {
     /// end a block and immediately start a new one (i.e. "[indent-1]} [expression] {\n")
     void restart_block(std::string const& expression);
 
-    void start_block(const std::string& text);
-
     void add_text(const std::string&);
 
     void add_line(const std::string&, int num_new_lines = 1);

From 253f639f35e998dd9a337e2fd67c2e0e512b9980 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Thu, 19 May 2022 14:24:45 +0200
Subject: [PATCH 092/105] [LLVM] Fixes compilation with LLVM codegen disabled
 (#867)

* Makes sure that llvm branch compiles with LLVM codegen and Python bindings disabled or enabled
* Only compile benchmark folder when LLVM backend is enabled
* Selectively enables options of Python Codegen and Python JIT Driver based on LLVM backend being enabled or disabled
* Fixes the order of linkage of static variables for the generation of nmodl binary
---
 CMakeLists.txt                 |  4 +++-
 src/codegen/codegen_driver.cpp |  4 ++++
 src/pybind/pynmodl.cpp         | 17 +++++++++++++++--
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6414a16830..18a78bcc94 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -227,7 +227,9 @@ set(MEMORYCHECK_COMMAND_OPTIONS
 # do not enable tests if nmodl is used as submodule
 if(NOT NMODL_AS_SUBPROJECT)
   include(CTest)
-  add_subdirectory(test/benchmark)
+  if(NMODL_ENABLE_LLVM)
+    add_subdirectory(test/benchmark)
+  endif()
   add_subdirectory(test/unit)
   add_subdirectory(test/integration)
 endif()
diff --git a/src/codegen/codegen_driver.cpp b/src/codegen/codegen_driver.cpp
index 19c3e24952..be00a66a7b 100644
--- a/src/codegen/codegen_driver.cpp
+++ b/src/codegen/codegen_driver.cpp
@@ -181,7 +181,11 @@ bool CodegenDriver::prepare_mod(std::shared_ptr<ast::Program> node, const std::s
     /// that old symbols (e.g. prime variables) are not lost
     update_symtab = true;
 
+#ifdef NMODL_LLVM_BACKEND
     if (cfg.nmodl_inline || cfg.llvm_ir) {
+#else
+    if (cfg.nmodl_inline) {
+#endif
         logger->info("Running nmodl inline visitor");
         InlineVisitor().visit_program(*node);
         ast_to_nmodl(*node, filepath("inline", "mod"));
diff --git a/src/pybind/pynmodl.cpp b/src/pybind/pynmodl.cpp
index a3176cc570..fc7fb569ba 100644
--- a/src/pybind/pynmodl.cpp
+++ b/src/pybind/pynmodl.cpp
@@ -6,11 +6,9 @@
  *************************************************************************/
 #include "ast/program.hpp"
 #include "codegen/codegen_driver.hpp"
-#include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "config/config.h"
 #include "parser/nmodl_driver.hpp"
 #include "pybind/pybind_utils.hpp"
-#include "test/benchmark/llvm_benchmark.hpp"
 #include "visitors/visitor_utils.hpp"
 
 #include <pybind11/iostream.h>
@@ -20,6 +18,11 @@
 #include <memory>
 #include <set>
 
+#ifdef NMODL_LLVM_BACKEND
+#include "codegen/llvm/codegen_llvm_visitor.hpp"
+#include "test/benchmark/llvm_benchmark.hpp"
+#endif
+
 /**
  * \dir
  * \brief Python Interface Implementation
@@ -109,9 +112,11 @@ static const char* const to_json = R"(
     '{"Program":[{"NeuronBlock":[{"StatementBlock":[]}]}]}'
 )";
 
+#ifdef NMODL_LLVM_BACKEND
 static const char* jit = R"(
     This is the Jit class documentation
 )";
+#endif
 
 }  // namespace docstring
 
@@ -135,6 +140,7 @@ class PyNmodlDriver: public nmodl::parser::NmodlDriver {
     }
 };
 
+#ifdef NMODL_LLVM_BACKEND
 class JitDriver {
   private:
     nmodl::codegen::Platform platform;
@@ -208,6 +214,7 @@ class JitDriver {
         return benchmark.run();
     }
 };
+#endif
 
 }  // namespace nmodl
 
@@ -244,8 +251,10 @@ PYBIND11_MODULE(_nmodl, m_nmodl) {
     py::class_<nmodl::codegen::CodeGenConfig> cfg(m_nmodl, "CodeGenConfig");
     cfg.def(py::init([]() {
            auto cfg = std::make_unique<nmodl::codegen::CodeGenConfig>();
+#ifdef NMODL_LLVM_BACKEND
            // set to more sensible defaults for python binding
            cfg->llvm_ir = true;
+#endif
            return cfg;
        }))
         .def_readwrite("sympy_analytic", &nmodl::codegen::CodeGenConfig::sympy_analytic)
@@ -274,6 +283,7 @@ PYBIND11_MODULE(_nmodl, m_nmodl) {
         .def_readwrite("nmodl_ast", &nmodl::codegen::CodeGenConfig::nmodl_ast)
         .def_readwrite("json_ast", &nmodl::codegen::CodeGenConfig::json_ast)
         .def_readwrite("json_perfstat", &nmodl::codegen::CodeGenConfig::json_perfstat)
+#ifdef NMODL_LLVM_BACKEND
         .def_readwrite("llvm_ir", &nmodl::codegen::CodeGenConfig::llvm_ir)
         .def_readwrite("llvm_float_type", &nmodl::codegen::CodeGenConfig::llvm_float_type)
         .def_readwrite("llvm_opt_level_ir", &nmodl::codegen::CodeGenConfig::llvm_opt_level_ir)
@@ -300,6 +310,9 @@ PYBIND11_MODULE(_nmodl, m_nmodl) {
              "instance_size"_a,
              "cuda_grid_dim_x"_a = 1,
              "cuda_block_dim_x"_a = 1);
+#else
+        ;
+#endif
 
     m_nmodl.def("to_nmodl",
                 static_cast<std::string (*)(const nmodl::ast::Ast&,

From 77c4f74c96b2333413038ffa25502a43b871f784 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Thu, 19 May 2022 17:03:04 +0200
Subject: [PATCH 093/105] [CP-870] Cherry-pick sympy fix from master (#872)

* Squash commit when rebasing with master

* [CP-870] fixup! CI errors with sympy 1.9 and 1.10 (#870)

* fixup! CI errors with sympy 1.9 and 1.10

- bin is a function in python: renamed variable in test to not get this error
- sympy now can solve z'=a/z+b/z/z. It is getting hard to find simple equations
that it cannot solve.

Warning: this is a downgrade of the testing capabilities. I could not find
an ode with simple functions (no sin, exp, log etc. Only simple multiplications
and additions) that cannot be solved by sympy. Best case the test hangs waiting
for sympy to solve a very long equation. For this reason, with this PR, the
fact that the code should return the equation untouched if it cannot be processed
is untested

* fixup! clang-format

* remove limit for sympy 1.9

* Relax constraints of GPU nodes to allocate in gitlab CI

Co-authored-by: Alessandro Cattabiani <cattabiani@users.noreply.github.com>
Co-authored-by: Alessandro Cattabiani <alessandro.cattabiani@epfl.ch>
---
 .gitlab-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 3d75cd928d..243113876a 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -83,7 +83,7 @@ test:intel:
     bb5_cpus_per_task: 1
     bb5_memory: 16G
     bb5_exclusive: full
-    bb5_constraint: gpu_32g # CascadeLake CPU & V100 GPU node
+    bb5_constraint: volta # V100 GPU node
 
 .build_allocation:
   variables:

From a5577bb23b246b9eae1ab82623c08258553a8fd3 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Mon, 23 May 2022 13:53:55 +0200
Subject: [PATCH 094/105] [LLVM][SIMD] Atomic updates support (#864)

* Adds code generation functionality for atomic writes on SIMD platforms
* Adds both execution and IR tests
* Refactors common GPU/SIMD atomics code

Co-authored-by: Ioannis Magkanaris <iomagkanaris@gmail.com>
---
 src/codegen/llvm/codegen_llvm_visitor.cpp    | 150 +++++++---
 src/codegen/llvm/llvm_ir_builder.cpp         |  79 +++++
 src/codegen/llvm/llvm_ir_builder.hpp         |  13 +
 test/benchmark/CMakeLists.txt                |   5 -
 test/unit/CMakeLists.txt                     |   2 +-
 test/unit/codegen/codegen_llvm_execution.cpp | 294 +++++++++++++++++++
 test/unit/codegen/codegen_llvm_ir.cpp        | 106 +++++++
 7 files changed, 602 insertions(+), 47 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 32bc1b8b9d..491f8ce02d 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -533,53 +533,121 @@ void CodegenLLVMVisitor::visit_codegen_atomic_statement(const ast::CodegenAtomic
     ast::BinaryOp op = ir_builder.extract_atomic_op(atomic_op);
 
     // For different platforms, we handle atomic updates differently!
-    if (platform.is_cpu_with_simd()) {
-        throw std::runtime_error("Error: no atomic update support for SIMD CPUs\n");
-    } else if (platform.is_gpu()) {
-        const auto& identifier = var->get_name();
-
-        // We only need to support atomic updates to instance struct members.
-        if (!identifier->is_codegen_instance_var())
-            throw std::runtime_error("Error: atomic updates for non-instance variable\n");
-
-        const auto& node = std::dynamic_pointer_cast<ast::CodegenInstanceVar>(identifier);
-        const auto& instance_name = node->get_instance_var()->get_node_name();
-        const auto& member_node = node->get_member_var();
-        const auto& member_name = member_node->get_node_name();
-
-        if (!instance_var_helper.is_an_instance_variable(member_name))
-            throw std::runtime_error("Error: " + member_name +
-                                     " is not a member of the instance variable\n");
-
-        llvm::Value* instance_ptr = ir_builder.create_load(instance_name);
-        int member_index = instance_var_helper.get_variable_index(member_name);
-        llvm::Value* member_ptr = ir_builder.get_struct_member_ptr(instance_ptr, member_index);
-
-        // Some sanity checks.
-        auto codegen_var_with_type = instance_var_helper.get_variable(member_name);
-        if (!codegen_var_with_type->get_is_pointer())
-            throw std::runtime_error(
-                "Error: atomic updates are allowed on pointer variables only\n");
-        const auto& member_var_name = std::dynamic_pointer_cast<ast::VarName>(member_node);
-        if (!member_var_name->get_name()->is_indexed_name())
-            throw std::runtime_error("Error: " + member_name + " is not an IndexedName\n");
-        const auto& member_indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(
-            member_var_name->get_name());
-        if (!member_indexed_name->get_length()->is_name())
-            throw std::runtime_error("Error: " + member_name + " must be indexed with a variable!");
-
-        llvm::Value* i64_index = get_index(*member_indexed_name);
-        llvm::Value* instance_member = ir_builder.create_load(member_ptr);
-        llvm::Value* ptr = ir_builder.create_inbounds_gep(instance_member, i64_index);
 
-        ir_builder.create_atomic_op(ptr, rhs, op);
-    } else {
-        // For non-SIMD CPUs, updates don't have to be atomic at all!
+    // For non-SIMD CPUs (or any scalar code on SIMD CPUs), updates don't have to be atomic at all!
+    const bool non_SIMD_cpu = platform.is_cpu() && !platform.is_cpu_with_simd();
+    if (non_SIMD_cpu || (platform.is_cpu_with_simd() && !ir_builder.vectorizing())) {
         llvm::Value* lhs = accept_and_get(node.get_lhs());
         ir_builder.create_binary_op(lhs, rhs, op);
         llvm::Value* result = ir_builder.pop_last_value();
 
         write_to_variable(*var, result);
+        return;
+    }
+
+    // Otherwise, we either have a GPU or a SIMD CPU. Double-check to be sure.
+    if (!platform.is_gpu() && !platform.is_cpu_with_simd())
+        throw std::runtime_error("Error: unknown platform - " + platform.get_name() + "\n");
+
+    const auto& identifier = var->get_name();
+    if (!identifier->is_codegen_instance_var())
+        throw std::runtime_error("Error: atomic updates for non-instance variable\n");
+
+    const auto& codegen_intance_node = std::dynamic_pointer_cast<ast::CodegenInstanceVar>(
+        identifier);
+    const auto& instance_name = codegen_intance_node->get_instance_var()->get_node_name();
+    const auto& member_node = codegen_intance_node->get_member_var();
+    const auto& member_name = member_node->get_node_name();
+
+    // Sanity checks. Not that there is a bit of duplication with `read_from_or_write_to_instance`
+    // but this is not crucial for now.
+    // TODO: remove this duplication!
+    if (!instance_var_helper.is_an_instance_variable(member_name))
+        throw std::runtime_error("Error: " + member_name +
+                                 " is not a member of the instance variable\n");
+    auto codegen_var_with_type = instance_var_helper.get_variable(member_name);
+    if (!codegen_var_with_type->get_is_pointer())
+        throw std::runtime_error("Error: atomic updates are allowed on pointer variables only\n");
+    const auto& member_var_name = std::dynamic_pointer_cast<ast::VarName>(member_node);
+    if (!member_var_name->get_name()->is_indexed_name())
+        throw std::runtime_error("Error: " + member_name + " is not an IndexedName\n");
+    const auto& member_indexed_name = std::dynamic_pointer_cast<ast::IndexedName>(
+        member_var_name->get_name());
+    if (!member_indexed_name->get_length()->is_name())
+        throw std::runtime_error("Error: " + member_name + " must be indexed with a variable!");
+
+    // First, load the pointer variable from instance struct and process its index.
+    llvm::Value* instance_ptr = ir_builder.create_load(instance_name);
+    const int member_index = instance_var_helper.get_variable_index(member_name);
+    llvm::Value* member_ptr = ir_builder.get_struct_member_ptr(instance_ptr, member_index);
+    llvm::Value* instance_member = ir_builder.create_load(member_ptr);
+    llvm::Value* i64_index = get_index(*member_indexed_name);
+
+    // For GPUs, we just need to create atomic add/subtract.
+    if (platform.is_gpu()) {
+        llvm::Value* ptr = ir_builder.create_inbounds_gep(instance_member, i64_index);
+        ir_builder.create_atomic_op(ptr, rhs, op);
+    } else {
+        // SIMD case is more elaborate. We will create a scalar block that will perform necessary
+        // update. The overall structure will be
+        //  +---------------------------+
+        //  | <for body code>           |
+        //  | <some initialisation>     |
+        //  | br %atomic                |
+        //  +---------------------------+
+        //                |
+        //                V
+        //  +-----------------------------+
+        //  | <atomic update code>        |
+        //  | %cmp = ...                  |<------+
+        //  | cond_br %cmp, %atomic, %rem |       |
+        //  +-----------------------------+       |
+        //      |                 |               |
+        //      |                 +---------------+
+        //      V
+        //  +---------------------------+
+        //  | <for body remaining code> |
+        //  |                           |
+        //  +---------------------------+
+
+        // Step 1: Create a vector of (replicated) starting addresses of the given member.
+        llvm::Value* start = ir_builder.create_member_addresses(instance_member);
+
+        // Step 2: Create a vector alloca that will store addresses of member values. Then also
+        // create an array of these addresses (as pointers). While this can be moved to `IRBuilder`,
+        // the amount of code is rather negligible and thus can be left here.
+        const int vector_width = platform.get_instruction_width();
+        llvm::Type* vi64_type = llvm::FixedVectorType::get(ir_builder.get_i64_type(), vector_width);
+        llvm::Type* array_type = llvm::ArrayType::get(ir_builder.get_fp_ptr_type(), vector_width);
+
+        llvm::Value* ptrs_vec = ir_builder.create_alloca(/*name=*/"ptrs", vi64_type);
+        llvm::Value* ptrs_arr =
+            ir_builder.create_bitcast(ptrs_vec,
+                                      llvm::PointerType::get(array_type, /*AddressSpace=*/0));
+
+        // Step 3: Calculate offsets of the values in the member by:
+        //     offset = start + (index * sizeof(fp_type))
+        // Store this vector to a temporary for later reuse.
+        llvm::Value* offsets = ir_builder.create_member_offsets(start, i64_index);
+        ir_builder.create_store(ptrs_vec, offsets);
+
+        // Step 4: Create a new block that  will be used for atomic code generation.
+        llvm::BasicBlock* body_bb = ir_builder.get_current_block();
+        llvm::BasicBlock* cond_bb = body_bb->getNextNode();
+        llvm::Function* func = body_bb->getParent();
+        llvm::BasicBlock* atomic_bb =
+            llvm::BasicBlock::Create(*context, /*Name=*/"atomic.update", func, cond_bb);
+        llvm::BasicBlock* remaining_body_bb =
+            llvm::BasicBlock::Create(*context, /*Name=*/"for.body.remaining", func, cond_bb);
+        ir_builder.create_br_and_set_insertion_point(atomic_bb);
+
+        // Step 5: Generate code for the atomic update: go through each element in the vector
+        // performing the computation.
+        llvm::Value* cmp = ir_builder.create_atomic_loop(ptrs_arr, rhs, op);
+
+        // Create branch to close the loop and restore the insertion point.
+        ir_builder.create_cond_br(cmp, remaining_body_bb, atomic_bb);
+        ir_builder.set_insertion_point(remaining_body_bb);
     }
 }
 
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index efbd7aa050..f0682fff91 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -315,6 +315,85 @@ void IRBuilder::create_atomic_op(llvm::Value* ptr, llvm::Value* update, ast::Bin
                             llvm::AtomicOrdering::SequentiallyConsistent);
 }
 
+llvm::Value* IRBuilder::create_member_addresses(llvm::Value* member_ptr) {
+    llvm::Module* m = builder.GetInsertBlock()->getParent()->getParent();
+
+    // Treat this member address as integer value.
+    llvm::Type* int_ptr_type = m->getDataLayout().getIntPtrType(builder.getContext());
+    llvm::Value* ptr_to_int = builder.CreatePtrToInt(member_ptr, int_ptr_type);
+
+    // Create a vector that has address at 0.
+    llvm::Type* vector_type = llvm::FixedVectorType::get(int_ptr_type,
+                                                         platform.get_instruction_width());
+    llvm::Value* zero = get_scalar_constant<llvm::ConstantInt>(get_i32_type(), 0);
+    llvm::Value* tmp =
+        builder.CreateInsertElement(llvm::UndefValue::get(vector_type), ptr_to_int, zero);
+
+    // Finally, use `shufflevector` with zeroinitializer to replicate the 0th element.
+    llvm::Value* select = llvm::Constant::getNullValue(vector_type);
+    return builder.CreateShuffleVector(tmp, llvm::UndefValue::get(vector_type), select);
+}
+
+llvm::Value* IRBuilder::create_member_offsets(llvm::Value* start, llvm::Value* indices) {
+    llvm::Value* factor = get_vector_constant<llvm::ConstantInt>(get_i64_type(),
+                                                                 platform.get_precision() / 8);
+    llvm::Value* offset = builder.CreateMul(indices, factor);
+    return builder.CreateAdd(start, offset);
+}
+
+llvm::Value* IRBuilder::create_atomic_loop(llvm::Value* ptrs_arr,
+                                           llvm::Value* rhs,
+                                           ast::BinaryOp op) {
+    const int vector_width = platform.get_instruction_width();
+    llvm::BasicBlock* curr = get_current_block();
+    llvm::BasicBlock* prev = curr->getPrevNode();
+    llvm::BasicBlock* next = curr->getNextNode();
+
+    // Some constant values.
+    llvm::Value* false_value = get_scalar_constant<llvm::ConstantInt>(get_boolean_type(), 0);
+    llvm::Value* zero = get_scalar_constant<llvm::ConstantInt>(get_i64_type(), 0);
+    llvm::Value* one = get_scalar_constant<llvm::ConstantInt>(get_i64_type(), 1);
+    llvm::Value* minus_one = get_scalar_constant<llvm::ConstantInt>(get_i64_type(), -1);
+
+    // First, we create a PHI node that holds the mask of active vector elements.
+    llvm::PHINode* mask = builder.CreatePHI(get_i64_type(), /*NumReservedValues=*/2);
+
+    // Intially, all elements are active.
+    llvm::Value* init_value = get_scalar_constant<llvm::ConstantInt>(get_i64_type(),
+                                                                     ~((~0) << vector_width));
+
+    // Find the index of the next active element and update the mask. This can be easily computed
+    // with:
+    //     index    = cttz(mask)
+    //     new_mask = mask & ((1 << index) ^ -1)
+    llvm::Value* index =
+        builder.CreateIntrinsic(llvm::Intrinsic::cttz, {get_i64_type()}, {mask, false_value});
+    llvm::Value* new_mask = builder.CreateShl(one, index);
+    new_mask = builder.CreateXor(new_mask, minus_one);
+    new_mask = builder.CreateAnd(mask, new_mask);
+
+    // Update PHI with appropriate values.
+    mask->addIncoming(init_value, prev);
+    mask->addIncoming(new_mask, curr);
+
+    // Get the pointer to the current value, the value itself and the update.b
+    llvm::Value* gep =
+        builder.CreateGEP(ptrs_arr->getType()->getPointerElementType(), ptrs_arr, {zero, index});
+    llvm::Value* ptr = create_load(gep);
+    llvm::Value* source = create_load(ptr);
+    llvm::Value* update = builder.CreateExtractElement(rhs, index);
+
+    // Perform the update and store the result back.
+    //     source = *ptr
+    //     *ptr = source + update
+    create_binary_op(source, update, op);
+    llvm::Value* result = pop_last_value();
+    create_store(ptr, result);
+
+    // Return condition to break out of atomic update loop.
+    return builder.CreateICmpEQ(new_mask, zero);
+}
+
 void IRBuilder::create_binary_op(llvm::Value* lhs, llvm::Value* rhs, ast::BinaryOp op) {
     // Check that both lhs and rhs have the same types.
     if (lhs->getType() != rhs->getType())
diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp
index 67db6fcded..537682b930 100644
--- a/src/codegen/llvm/llvm_ir_builder.hpp
+++ b/src/codegen/llvm/llvm_ir_builder.hpp
@@ -313,6 +313,19 @@ class IRBuilder {
     /// Generates an inbounds GEP instruction for the given value and returns calculated address.
     llvm::Value* create_inbounds_gep(llvm::Value* variable, llvm::Value* index);
 
+    /// Creates a vector splat of starting addresses of the given member.
+    llvm::Value* create_member_addresses(llvm::Value* member_ptr);
+
+    /// Creates IR for calculating offest to member values. For more context, see
+    /// `visit_codegen_atomic_statement` in LLVM visitor.
+    llvm::Value* create_member_offsets(llvm::Value* start, llvm::Value* indices);
+
+    /// Creates IR to perform scalar updates to instance member based on `ptrs_arr` for every
+    /// element in a vector by
+    ///     member[*ptrs_arr[i]] = member[*ptrs_arr[i]] op rhs.
+    /// Returns condition (i1 value) to break out of atomic update loop.
+    llvm::Value* create_atomic_loop(llvm::Value* ptrs_arr, llvm::Value* rhs, ast::BinaryOp op);
+
   private:
     /// Generates an inbounds GEP instruction for the given name and returns calculated address.
     llvm::Value* create_inbounds_gep(const std::string& variable_name, llvm::Value* index);
diff --git a/test/benchmark/CMakeLists.txt b/test/benchmark/CMakeLists.txt
index f8f6c762f0..b0f36e381c 100644
--- a/test/benchmark/CMakeLists.txt
+++ b/test/benchmark/CMakeLists.txt
@@ -32,11 +32,6 @@ if(NMODL_ENABLE_PYTHON_BINDINGS)
   file(GLOB modfiles "${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/kernels/*.mod")
   list(APPEND modfiles "${NMODL_PROJECT_SOURCE_DIR}/test/integration/mod/test_math.mod")
   foreach(modfile ${modfiles})
-    # For expsyn.mod set the vector width to 1 since atomic operations are not supported for vector
-    # widths > 1. See https://github.com/BlueBrain/nmodl/issues/857
-    if(${modfile} STREQUAL "${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/kernels/expsyn.mod")
-      set(extra_args "--vec 1")
-    endif()
     get_filename_component(modfile_name "${modfile}" NAME)
     add_test(NAME "PyJIT/${modfile_name}"
              COMMAND ${PYTHON_EXECUTABLE} ${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/benchmark.py
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index f293680d73..0dbc1df6a3 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -150,7 +150,7 @@ if(NMODL_ENABLE_LLVM)
     printer
     ${NMODL_WRAPPER_LIBS}
     ${LLVM_LIBS_TO_LINK})
-  set(CODEGEN_TEST testllvm)
+  set(CODEGEN_TEST "testllvm;test_llvm_runner")
 endif()
 
 # =============================================================================
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index 67dd556e1a..641bf93aa1 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -628,3 +628,297 @@ SCENARIO("Vectorised kernel with simple control flow", "[llvm][runner]") {
         }
     }
 }
+
+//=============================================================================
+// Kernel with atomic updates.
+//=============================================================================
+
+SCENARIO("Kernel with atomic updates", "[llvm][runner]") {
+    GIVEN("An atomic update") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+                USEION na READ ena WRITE ina
+                USEION ka READ eka WRITE ika
+            }
+
+            STATE { }
+
+            ASSIGNED {
+                v (mV)
+                ena (mV)
+                ina (mA/cm2)
+            }
+
+            BREAKPOINT { }
+
+            DERIVATIVE states { }
+
+            : The atomic update that we want to check is:
+            :
+            :     ion_ina_id = mech->ion_ina_index[id]
+            :     ion_ika_id = mech->ion_ika_index[id]
+            :     mech->ion_ina[ion_ina_id] += mech->ina[id]
+            :     mech->ion_ika[ion_ika_id] += mech->ika[id]
+        )";
+
+
+        NmodlDriver driver;
+        const auto& ast = driver.parse_string(nmodl_text);
+
+        // Run passes on the AST to generate LLVM.
+        SymtabVisitor().visit_program(*ast);
+        NeuronSolveVisitor().visit_program(*ast);
+        SolveBlockVisitor().visit_program(*ast);
+
+        codegen::Platform simd_cpu_platform(/*use_single_precision=*/false,
+                                            /*instruction_width=*/4);
+        codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
+                                                 /*output_dir=*/".",
+                                                 simd_cpu_platform,
+                                                 /*opt_level_ir=*/3);
+        llvm_visitor.visit_program(*ast);
+        llvm_visitor.wrap_kernel_functions();
+
+        // Create the instance struct data.
+        int num_elements = 5;
+        const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
+        auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct);
+        auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1);
+
+        // With these indices ion_ina[1] = 1 + 2 + 3 + 4 + 5 = 15.
+        std::vector<int> ion_ina_index = {1, 1, 1, 1, 1};
+        std::vector<double> ion_ina = {0.0, 0.0, 0.0, 0.0, 0.0};
+        std::vector<double> ina = {1.0, 2.0, 3.0, 4.0, 5.0};
+
+        // With these indices:
+        //     ion_ika[1] = 3 + 4 = 7.
+        //     ion_ika[2] = 1 + 20 = 21.
+        //     ion_ika[3] = -5 + 5 = 0.
+        std::vector<int> ion_ika_index = {2, 2, 1, 1, 3};
+        std::vector<double> ion_ika = {0.0, 0.0, 0.0, -5.0, 0.0};
+        std::vector<double> ika = {1.0, 20.0, 3.0, 4.0, 5.0};
+
+        InstanceTestInfo instance_info{&instance_data,
+                                       llvm_visitor.get_instance_var_helper(),
+                                       num_elements};
+
+        initialise_instance_variable(instance_info, ion_ina_index, "ion_ina_index");
+        initialise_instance_variable(instance_info, ion_ina, "ion_ina");
+        initialise_instance_variable(instance_info, ina, "ina");
+        initialise_instance_variable(instance_info, ion_ika_index, "ion_ika_index");
+        initialise_instance_variable(instance_info, ion_ika, "ion_ika");
+        initialise_instance_variable(instance_info, ika, "ika");
+
+        // Set up the JIT runner.
+        std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
+        TestRunner runner(std::move(module));
+        runner.initialize_driver();
+
+        THEN("updates are commputed correctly with vector instructions and optimizations on") {
+            runner.run_with_argument<int, void*>("__nrn_cur_test_wrapper", instance_data.base_ptr);
+            // Recall:
+            //     ion_ina_id = mech->ion_ina_index[id]
+            //     ion_ika_id = mech->ion_ika_index[id]
+            //     mech->ion_ina[ion_ina_id] += mech->ina[id]
+            //     mech->ion_ika[ion_ika_id] += mech->ika[id]
+            std::vector<double> ion_ina_expected = {0.0, 15.0, 0.0, 0.0, 0.0};
+            REQUIRE(check_instance_variable(instance_info, ion_ina_expected, "ion_ina"));
+
+            std::vector<double> ion_ika_expected = {0.0, 7.0, 21.0, 0.0, 0.0};
+            REQUIRE(check_instance_variable(instance_info, ion_ika_expected, "ion_ika"));
+        }
+    }
+
+    GIVEN("Another atomic update") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+                USEION na READ ena WRITE ina
+                USEION ka READ eka WRITE ika
+            }
+
+            STATE { }
+
+            ASSIGNED {
+                v (mV)
+                ena (mV)
+                ina (mA/cm2)
+            }
+
+            BREAKPOINT { }
+
+            DERIVATIVE states { }
+
+            : The atomic update that we want to check is again:
+            :
+            :     ion_ina_id = mech->ion_ina_index[id]
+            :     ion_ika_id = mech->ion_ika_index[id]
+            :     mech->ion_ina[ion_ina_id] += mech->ina[id]
+            :     mech->ion_ika[ion_ika_id] += mech->ika[id]
+        )";
+
+
+        NmodlDriver driver;
+        const auto& ast = driver.parse_string(nmodl_text);
+
+        // Run passes on the AST to generate LLVM.
+        SymtabVisitor().visit_program(*ast);
+        NeuronSolveVisitor().visit_program(*ast);
+        SolveBlockVisitor().visit_program(*ast);
+
+        codegen::Platform simd_cpu_platform(/*use_single_precision=*/false,
+                                            /*instruction_width=*/2);
+        codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
+                                                 /*output_dir=*/".",
+                                                 simd_cpu_platform,
+                                                 /*opt_level_ir=*/0);
+        llvm_visitor.visit_program(*ast);
+        llvm_visitor.wrap_kernel_functions();
+
+        // Create the instance struct data.
+        int num_elements = 6;
+        const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
+        auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct);
+        auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1);
+
+        // With these indices ion_ina[1] = 1 + 3 + 5 = 9.
+        // With these indices ion_ina[4] = 2 + 4 + 6 = 12.
+        std::vector<int> ion_ina_index = {1, 4, 1, 4, 1, 4};
+        std::vector<double> ion_ina = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+        std::vector<double> ina = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+
+        // With these indices:
+        //     ion_ika[1] = 3 + 4 + 5 = 12.
+        //     ion_ika[2] = 1 + 20 + 6 = 27.
+        std::vector<int> ion_ika_index = {2, 2, 1, 1, 1, 2};
+        std::vector<double> ion_ika = {0.0, 0.0, 0.0, -5.0, 0.0, 0.0};
+        std::vector<double> ika = {1.0, 20.0, 3.0, 4.0, 5.0, 6.0};
+
+        InstanceTestInfo instance_info{&instance_data,
+                                       llvm_visitor.get_instance_var_helper(),
+                                       num_elements};
+
+        initialise_instance_variable(instance_info, ion_ina_index, "ion_ina_index");
+        initialise_instance_variable(instance_info, ion_ina, "ion_ina");
+        initialise_instance_variable(instance_info, ina, "ina");
+        initialise_instance_variable(instance_info, ion_ika_index, "ion_ika_index");
+        initialise_instance_variable(instance_info, ion_ika, "ion_ika");
+        initialise_instance_variable(instance_info, ika, "ika");
+
+        // Set up the JIT runner.
+        std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
+        TestRunner runner(std::move(module));
+        runner.initialize_driver();
+
+        THEN("Atomic updates are correct without optimizations") {
+            runner.run_with_argument<int, void*>("__nrn_cur_test_wrapper", instance_data.base_ptr);
+            // Recall:
+            //     ion_ina_id = mech->ion_ina_index[id]
+            //     ion_ika_id = mech->ion_ika_index[id]
+            //     mech->ion_ina[ion_ina_id] += mech->ina[id]
+            //     mech->ion_ika[ion_ika_id] += mech->ika[id]
+            std::vector<double> ion_ina_expected = {0.0, 9.0, 0.0, 0.0, 12.0, 0.0};
+            REQUIRE(check_instance_variable(instance_info, ion_ina_expected, "ion_ina"));
+
+            std::vector<double> ion_ika_expected = {0.0, 12.0, 27.0, -5.0, 0.0, 0.0};
+            REQUIRE(check_instance_variable(instance_info, ion_ika_expected, "ion_ika"));
+        }
+    }
+
+    GIVEN("Atomic updates of rhs and d") {
+        std::string nmodl_text = R"(
+            NEURON {
+                POINT_PROCESS test
+                USEION na READ ena WRITE ina
+                USEION ka READ eka WRITE ika
+            }
+
+            STATE { }
+
+            ASSIGNED {
+                v (mV)
+                ena (mV)
+                ina (mA/cm2)
+            }
+
+            BREAKPOINT { }
+
+            DERIVATIVE states { }
+
+            : The atomic update that we want to check is again:
+            :
+            :     node_id = mech->node_index[id]
+            :     mech->vec_rhs[node_id] -= rhs
+            :     mech->vec_d[node_id] -= g
+        )";
+
+
+        NmodlDriver driver;
+        const auto& ast = driver.parse_string(nmodl_text);
+
+        // Run passes on the AST to generate LLVM.
+        SymtabVisitor().visit_program(*ast);
+        NeuronSolveVisitor().visit_program(*ast);
+        SolveBlockVisitor().visit_program(*ast);
+
+        codegen::Platform simd_cpu_platform(/*use_single_precision=*/false,
+                                            /*instruction_width=*/2);
+        codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
+                                                 /*output_dir=*/".",
+                                                 simd_cpu_platform,
+                                                 /*opt_level_ir=*/0);
+        llvm_visitor.visit_program(*ast);
+        llvm_visitor.wrap_kernel_functions();
+
+        // Create the instance struct data.
+        int num_elements = 6;
+        const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr();
+        auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct);
+        auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1);
+
+        // With these indices vec_rhs[1] = -0.2-1.e2/1.5*2-1.e2/3.4*6-1.e2/5.2*10 =
+        // -502.3116138763197.
+        // With these indices vec_rhs[4] =
+        // -0.54-1.e2/2.3*22.0-1.e2/4.1*8.0-1.e2/6.0*12.0 = -1351.103690349947.
+        // vec_d remains the same because the contribution of g each time is 0.0.
+        std::vector<int> node_index = {1, 4, 1, 4, 1, 4};
+        std::vector<double> ina = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+        std::vector<double> ika = {1.0, 20.0, 3.0, 4.0, 5.0, 6.0};
+        std::vector<double> vec_rhs = {0.64, -0.2, 1.1, 0.42, 0.54, -0.36};
+        std::vector<double> vec_d = {1.6, 2.5, 3.4, 4.3, 5.2, 6.1};
+        std::vector<int> node_area_index = {0, 1, 2, 3, 4, 5};
+        std::vector<double> node_area = {1.5, 2.3, 3.4, 4.1, 5.2, 6.0};
+
+        InstanceTestInfo instance_info{&instance_data,
+                                       llvm_visitor.get_instance_var_helper(),
+                                       num_elements};
+
+        initialise_instance_variable(instance_info, node_index, "node_index");
+        initialise_instance_variable(instance_info, ina, "ina");
+        initialise_instance_variable(instance_info, ika, "ika");
+        initialise_instance_variable(instance_info, vec_rhs, "vec_rhs");
+        initialise_instance_variable(instance_info, vec_d, "vec_d");
+        initialise_instance_variable(instance_info, node_area_index, "node_area_index");
+        initialise_instance_variable(instance_info, node_area, "node_area");
+
+        // Set up the JIT runner.
+        std::unique_ptr<llvm::Module> module = llvm_visitor.get_module();
+        TestRunner runner(std::move(module));
+        runner.initialize_driver();
+
+        THEN("Atomic updates are correct") {
+            runner.run_with_argument<int, void*>("__nrn_cur_test_wrapper", instance_data.base_ptr);
+            // Recall:
+            //     node_id = mech->node_index[id]
+            //     mech->vec_rhs[node_id] -= rhs
+            //     mech->vec_d[node_id] -= g
+            std::vector<double> vec_rhs_expected = {
+                0.64, -502.3116138763197, 1.1, 0.42, -1351.103690349947, -0.36};
+            REQUIRE(check_instance_variable(instance_info, vec_rhs_expected, "vec_rhs"));
+
+            std::vector<double> vec_d_expected = {1.6, 2.5, 3.4, 4.3, 5.2, 6.1};
+            REQUIRE(check_instance_variable(instance_info, vec_d_expected, "vec_d"));
+        }
+    }
+}
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 7e209e9123..77fb3577e0 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -1841,4 +1841,110 @@ SCENARIO("GPU kernel body IR generation", "[visitor][llvm][gpu]") {
             REQUIRE(std::regex_search(module_string, m, add));
         }
     }
+
+    GIVEN("For current update with atomic addition ") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+                USEION na READ ena WRITE ina
+            }
+
+            STATE { }
+
+            ASSIGNED {
+                v (mV)
+                ena (mV)
+                ina (mA/cm2)
+            }
+
+            BREAKPOINT {
+                SOLVE states METHOD cnexp
+            }
+
+            DERIVATIVE states { }
+        )";
+
+        THEN("corresponding LLVM atomic instruction is generated") {
+            std::string module_string = run_gpu_llvm_visitor(nmodl_text,
+                                                             /*opt_level=*/0,
+                                                             /*use_single_precision=*/false);
+            std::smatch m;
+
+            // Check for atomic addition.
+            std::regex add(R"(atomicrmw fadd double\* %.*, double %.* seq_cst)");
+            REQUIRE(std::regex_search(module_string, m, add));
+        }
+    }
+}
+
+//=============================================================================
+// Atomics for vectorised kernel
+//=============================================================================
+
+SCENARIO("A simple kernel with atomic current updates", "[visitor][llvm]") {
+    GIVEN("A simple atomic update") {
+        std::string nmodl_text = R"(
+            NEURON {
+                SUFFIX test
+                USEION na READ ena WRITE ina
+            }
+
+            STATE { }
+
+            ASSIGNED {
+                v (mV)
+                ena (mV)
+                ina (mA/cm2)
+            }
+
+            BREAKPOINT { }
+
+            DERIVATIVE states { }
+        )";
+
+        THEN("an atomic loop is created") {
+            std::string module_string = run_llvm_visitor(nmodl_text,
+                                                         /*opt_level=*/0,
+                                                         /*use_single_precision=*/true,
+                                                         /*vector_width=*/4);
+            std::smatch m;
+
+            // Check for correct %ptrs calculation and bitcast to an array.
+            std::regex ptrtoint(R"(ptrtoint float\* %.* to i64)");
+            std::regex insertelement(R"(insertelement <4 x i64> undef, i64 %.*, i32 0)");
+            std::regex shufflevector(
+                R"(shufflevector <4 x i64> %.*, <4 x i64> undef, <4 x i32> zeroinitializer)");
+            std::regex bitcast(R"(bitcast <4 x i64>\* %ptrs to \[4 x float\*\]\*)");
+            REQUIRE(std::regex_search(module_string, m, ptrtoint));
+            REQUIRE(std::regex_search(module_string, m, insertelement));
+            REQUIRE(std::regex_search(module_string, m, shufflevector));
+            REQUIRE(std::regex_search(module_string, m, bitcast));
+
+            // Check for %ptrs store and branch to atomic update block.
+            std::regex ptrs_store(R"(store <4 x i64> %.*, <4 x i64>\* %ptrs)");
+            std::regex atomic_branch(R"(br label %atomic\.update)");
+            REQUIRE(std::regex_search(module_string, m, ptrs_store));
+            REQUIRE(std::regex_search(module_string, m, atomic_branch));
+
+            // Check the scalar loop for atomic update mis implemented correctly.
+            std::regex atomic_update(
+                "  %.* = phi i64 \\[ 15, %for\\.body \\], \\[ %.*, %atomic\\.update \\]\n"
+                "  %.* = call i64 @llvm\\.cttz\\.i64\\(i64 %.*, i1 false\\)\n"
+                "  %.* = shl i64 1, %.*\n"
+                "  %.* = xor i64 %.*, -1\n"
+                "  %.* = and i64 %.*, %.*\n"
+                "  %.* = getelementptr \\[4 x float\\*\\], \\[4 x float\\*\\]\\* %.*, i64 0, i64 "
+                "%.*\n"
+                "  %.* = load float\\*, float\\*\\* %.*, align 8\n"
+                "  %.* = load float, float\\* %.*, align 4\n"
+                "  %.* = extractelement <4 x float> %.*, i64 %.*\n"
+                "  %.* = fadd float %.*, %.*\n"
+                "  store float %.*, float\\* %.*, align 4\n"
+                "  %.* = icmp eq i64 %.*, 0\n");
+            std::regex remaining(
+                R"(br i1 %.*, label %for\.body\.remaining, label %atomic\.update)");
+            REQUIRE(std::regex_search(module_string, m, atomic_update));
+            REQUIRE(std::regex_search(module_string, m, remaining));
+        }
+    }
 }

From bf3c125e1305df60c2877b567210a756f22ca799 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Mon, 18 Jul 2022 17:55:44 +0300
Subject: [PATCH 095/105] Install explicitly LLVM 13.0.1 in MacOS builds in
 Azure (#898)

---
 azure-pipelines.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 5fa8ce7acc..1954b2d42d 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -188,7 +188,7 @@ stages:
     - checkout: self
       submodules: True
     - script: |
-        brew install flex bison cmake python@3 gcc@8 llvm
+        brew install flex bison cmake python@3 gcc@8 llvm@13
         python3 -m pip install --upgrade pip setuptools
         python3 -m pip install --user 'Jinja2>=2.9.3' 'PyYAML>=3.13' pytest pytest-cov numpy 'sympy>=1.3'
       displayName: 'Install Dependencies'
@@ -196,7 +196,7 @@ stages:
         export PATH=/usr/local/opt/flex/bin:/usr/local/opt/bison/bin:$PATH;
         mkdir -p $(Build.Repository.LocalPath)/build
         cd $(Build.Repository.LocalPath)/build
-        cmake .. -DPYTHON_EXECUTABLE=$(which python3) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=RelWithDebInfo -DNMODL_ENABLE_PYTHON_BINDINGS=OFF -DLLVM_DIR=$(brew --prefix llvm)/lib/cmake/llvm -DNMODL_ENABLE_LLVM=ON
+        cmake .. -DPYTHON_EXECUTABLE=$(which python3) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=RelWithDebInfo -DNMODL_ENABLE_PYTHON_BINDINGS=OFF -DLLVM_DIR=$(brew --prefix llvm@13)/lib/cmake/llvm -DNMODL_ENABLE_LLVM=ON
         make -j 2
         if [ $? -ne 0 ]
         then

From 18856f436f0908ec9baa596a35d609ea387b7ac7 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Mon, 18 Jul 2022 20:01:39 +0300
Subject: [PATCH 096/105] Fix nrn_cur kernel code generation unit test (#892)

- Changed g RANGE variable name to g_var to avoid issue with confusing g local variable for current with the instance variable g
- Corrected expected code
---
 test/unit/codegen/codegen_llvm_visitor.cpp | 24 +++++++++++-----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/test/unit/codegen/codegen_llvm_visitor.cpp b/test/unit/codegen/codegen_llvm_visitor.cpp
index 1e3504ae61..c055c0747a 100644
--- a/test/unit/codegen/codegen_llvm_visitor.cpp
+++ b/test/unit/codegen/codegen_llvm_visitor.cpp
@@ -496,7 +496,7 @@ SCENARIO("Synapse: Derivative and breakpoint block llvm transformations",
                 USEION na READ ena WRITE ina
                 RANGE tau1, tau2, e, i
                 NONSPECIFIC_CURRENT i
-                RANGE g, gna
+                RANGE g_var, gna
             }
 
             UNITS {
@@ -514,7 +514,7 @@ SCENARIO("Synapse: Derivative and breakpoint block llvm transformations",
             ASSIGNED {
                 v (mV)
                 i (nA)
-                g (uS)
+                g_var (uS)
                 gna (S/cm2)
                 factor
             }
@@ -542,8 +542,8 @@ SCENARIO("Synapse: Derivative and breakpoint block llvm transformations",
             BREAKPOINT {
                 SOLVE state METHOD cnexp
                 ina = gna*(v-ena)
-                g = B-A
-                i = g*(v-e)
+                g_var = B-A
+                i = g_var*(v-e)
             }
 
             DERIVATIVE state {
@@ -574,31 +574,31 @@ SCENARIO("Synapse: Derivative and breakpoint block llvm transformations",
                     {
                         current = 0
                         mech->ina[id] = mech->gna[id]*(v-mech->ena[id])
-                        mech->g[id] = mech->B[id]-mech->A[id]
-                        mech->i[id] = mech->g[id]*(v-mech->e[id])
+                        mech->g_var[id] = mech->B[id]-mech->A[id]
+                        mech->i[id] = mech->g_var[id]*(v-mech->e[id])
                         current = current+mech->i[id]
                         current = current+mech->ina[id]
-                        mech->g[id] = current
+                        g = current
                     }
                     dina = mech->ina[id]
                     v = v_org
                     {
                         current = 0
                         mech->ina[id] = mech->gna[id]*(v-mech->ena[id])
-                        mech->g[id] = mech->B[id]-mech->A[id]
-                        mech->i[id] = mech->g[id]*(v-mech->e[id])
+                        mech->g_var[id] = mech->B[id]-mech->A[id]
+                        mech->i[id] = mech->g_var[id]*(v-mech->e[id])
                         current = current+mech->i[id]
                         current = current+mech->ina[id]
                         rhs = current
                     }
-                    mech->g[id] = (mech->g[id]-rhs)/0.001
+                    g = (g-rhs)/0.001
                     mech->ion_dinadv[ion_dinadv_id] = mech->ion_dinadv[ion_dinadv_id]+(dina-mech->ina[id])/0.001*1.e2/mech->node_area[node_area_id]
                     mech->ion_ina[ion_ina_id] += mech->ina[id]*(1.e2/mech->node_area[node_area_id])
                     mfactor = 1.e2/mech->node_area[node_area_id]
-                    mech->g[id] = mech->g[id]*mfactor
+                    g = g*mfactor
                     rhs = rhs*mfactor
                     mech->vec_rhs[node_id] = mech->vec_rhs[node_id]-rhs
-                    mech->vec_d[node_id] = mech->vec_d[node_id]+mech->g[id]
+                    mech->vec_d[node_id] = mech->vec_d[node_id]+g
                 }
             })";
 

From 44688e3bb9b1b746e7d0a1204424e25ca35d83a6 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Mon, 22 Aug 2022 15:44:42 +0200
Subject: [PATCH 097/105] [LLVM][refactoring] Annotations pass and no more
 wrappers (#893)

* New `Annotator` class to specialise how NMODL compute kernels (llvm::Functions) are annotated
* New `ReplacePass` class that replaces the standard math function calls of the kernels with the optimised math libraries passed to NMODL
* Replace kernel wrappers with generation of the compute functions with a `void*` parameter which is then casted to the proper struct type internally by the compute function
* Identify compute kernels `CodegenFunction` with a new `is_kernel` flag
  - `is_kernel` is mapped to LLVM metadata node `nmodl.compute-kernel` that allows any to easily identify compute kernels in LLVM IR
* Removes restriction for auto-vectorising loops by external compiler using certain LLVM IR loop metadata when the vector length is set to 1 in NMODL
---
 src/codegen/llvm/CMakeLists.txt               |   2 +
 src/codegen/llvm/annotation.cpp               | 105 ++++++++++
 src/codegen/llvm/annotation.hpp               |  77 +++++++
 .../llvm/codegen_llvm_helper_visitor.cpp      |   7 +-
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 139 +++----------
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  19 +-
 src/codegen/llvm/llvm_ir_builder.cpp          |  63 ++----
 src/codegen/llvm/llvm_ir_builder.hpp          |  13 +-
 src/codegen/llvm/llvm_utils.cpp               |  27 ++-
 src/codegen/llvm/llvm_utils.hpp               |   6 +
 .../llvm/replace_with_lib_functions.cpp       | 188 +++++++++---------
 .../llvm/replace_with_lib_functions.hpp       |  82 +++++---
 src/language/codegen.yaml                     |   3 +
 src/main.cpp                                  |   6 +-
 src/pybind/pynmodl.cpp                        |   2 +-
 test/benchmark/llvm_benchmark.cpp             |  23 +--
 test/benchmark/llvm_benchmark.hpp             |   7 +-
 test/unit/codegen/codegen_llvm_execution.cpp  |  56 ++++--
 test/unit/codegen/codegen_llvm_ir.cpp         |   9 -
 19 files changed, 482 insertions(+), 352 deletions(-)
 create mode 100644 src/codegen/llvm/annotation.cpp
 create mode 100644 src/codegen/llvm/annotation.hpp

diff --git a/src/codegen/llvm/CMakeLists.txt b/src/codegen/llvm/CMakeLists.txt
index 44f42a5313..828e48fb80 100644
--- a/src/codegen/llvm/CMakeLists.txt
+++ b/src/codegen/llvm/CMakeLists.txt
@@ -2,6 +2,8 @@
 # Codegen sources
 # =============================================================================
 set(LLVM_CODEGEN_SOURCE_FILES
+    ${CMAKE_CURRENT_SOURCE_DIR}/annotation.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/annotation.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_visitor.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_visitor.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_helper_visitor.cpp
diff --git a/src/codegen/llvm/annotation.cpp b/src/codegen/llvm/annotation.cpp
new file mode 100644
index 0000000000..6bf437e309
--- /dev/null
+++ b/src/codegen/llvm/annotation.cpp
@@ -0,0 +1,105 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#include "codegen/llvm/annotation.hpp"
+#include "codegen/llvm/target_platform.hpp"
+
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+
+static constexpr const char nmodl_annotations[] = "nmodl.annotations";
+static constexpr const char nmodl_compute_kernel[] = "nmodl.compute-kernel";
+
+namespace nmodl {
+namespace custom {
+
+void Annotator::add_nmodl_compute_kernel_annotation(llvm::Function& function) {
+    llvm::LLVMContext& context = function.getContext();
+    llvm::MDNode* node = llvm::MDNode::get(context, llvm::MDString::get(context, nmodl_compute_kernel));
+    function.setMetadata(nmodl_annotations, node);
+}
+
+bool Annotator::has_nmodl_compute_kernel_annotation(llvm::Function& function) {
+    if (!function.hasMetadata(nmodl_annotations))
+        return false;
+    
+    llvm::MDNode* node = function.getMetadata(nmodl_annotations);
+    std::string type = llvm::cast<llvm::MDString>(node->getOperand(0))->getString().str();
+    return type == nmodl_compute_kernel;
+}
+
+void DefaultCPUAnnotator::annotate(llvm::Function& function) const {
+    // By convention, the compute kernel does not free memory and does not
+    // throw exceptions.
+    function.setDoesNotFreeMemory();
+    function.setDoesNotThrow();
+
+    // We also want to specify that the pointers that instance struct holds
+    // do not alias, unless specified otherwise. In order to do that, we
+    // add a `noalias` attribute to the argument. As per Clang's
+    // specification:
+    //  > The `noalias` attribute indicates that the only memory accesses
+    //  > inside function are loads and stores from objects pointed to by
+    //  > its pointer-typed arguments, with arbitrary offsets.
+    function.addParamAttr(0, llvm::Attribute::NoAlias);
+
+    // Finally, specify that the mechanism data struct pointer does not
+    // capture and is read-only. 
+    function.addParamAttr(0, llvm::Attribute::NoCapture);
+    function.addParamAttr(0, llvm::Attribute::ReadOnly);
+}
+
+void CUDAAnnotator::annotate(llvm::Function& function) const {    
+    llvm::LLVMContext& context = function.getContext();
+    llvm::Module* m = function.getParent();
+
+    auto one = llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 1);
+    llvm::Metadata* metadata[] = {llvm::ValueAsMetadata::get(&function),
+                                  llvm::MDString::get(context, "kernel"),
+                                  llvm::ValueAsMetadata::get(one)};
+    llvm::MDNode* node = llvm::MDNode::get(context, metadata);
+
+    m->getOrInsertNamedMetadata("nvvm.annotations")->addOperand(node);
+}
+}  // namespace custom
+}  // namespace nmodl
+
+using nmodl::custom::Annotator;
+namespace llvm {
+
+char AnnotationPass::ID = 0;
+
+bool AnnotationPass::runOnModule(Module& module) {
+    bool modified = false;
+
+    for (auto& function: module.getFunctionList()) {
+        if (!function.isDeclaration() &&
+            Annotator::has_nmodl_compute_kernel_annotation(function)) {
+            annotator->annotate(function);
+            modified = true;
+        }
+    }
+
+    return modified;
+}
+
+void AnnotationPass::getAnalysisUsage(AnalysisUsage& au) const {
+    au.setPreservesCFG();
+    au.addPreserved<ScalarEvolutionWrapperPass>();
+    au.addPreserved<AAResultsWrapperPass>();
+    au.addPreserved<LoopAccessLegacyAnalysis>();
+    au.addPreserved<DemandedBitsWrapperPass>();
+    au.addPreserved<OptimizationRemarkEmitterWrapperPass>();
+    au.addPreserved<GlobalsAAWrapperPass>();
+}
+}  // namespace llvm
diff --git a/src/codegen/llvm/annotation.hpp b/src/codegen/llvm/annotation.hpp
new file mode 100644
index 0000000000..c2a8367713
--- /dev/null
+++ b/src/codegen/llvm/annotation.hpp
@@ -0,0 +1,77 @@
+/*************************************************************************
+ * Copyright (C) 2018-2020 Blue Brain Project
+ *
+ * This file is part of NMODL distributed under the terms of the GNU
+ * Lesser General Public License. See top-level LICENSE file for details.
+ *************************************************************************/
+
+#pragma once
+
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+
+namespace nmodl {
+namespace custom {
+
+/**
+ * \class Annotator
+ * \brief Base class that can be overriden to specify function annotations. 
+ */
+class Annotator {
+  public:
+    virtual void annotate(llvm::Function& function) const = 0;
+    virtual ~Annotator() = default;
+
+    /// Marks LLVM function as NMODL compute kernel. 
+    static void add_nmodl_compute_kernel_annotation(llvm::Function& function);
+
+    /// Returns true if LLVM function is marked as NMODL compute kernel. 
+    static bool has_nmodl_compute_kernel_annotation(llvm::Function& function);
+};
+
+/**
+ * \class DefaultAnnotator
+ * \brief Specifies how LLVM IR functions for CPU platforms are annotated. Used
+ * by default.
+ */
+class DefaultCPUAnnotator: public Annotator {
+  public:
+    void annotate(llvm::Function& function) const override;
+};
+
+/**
+ * \class CUDAAnnotator
+ * \brief Specifies how LLVM IR functions for CUDA platforms are annotated. This
+ * includes marking functions with "kernel" or "device" attributes.
+ */
+class CUDAAnnotator: public Annotator {
+  public:
+    void annotate(llvm::Function& function) const override;
+};
+}  // namespace custom
+}  // namespace nmodl
+
+using nmodl::custom::Annotator;
+namespace llvm {
+
+/**
+ * \class AnnotationPass
+ * \brief LLVM module pass that annotates NMODL compute kernels.
+ */
+class AnnotationPass: public ModulePass {
+  private:
+    // Underlying annotator that is applied to each LLVM function.
+    const Annotator* annotator;
+
+  public:
+    static char ID;
+
+    AnnotationPass(Annotator* annotator)
+        : ModulePass(ID)
+        , annotator(annotator) {}
+
+    bool runOnModule(Module& module) override;
+
+    void getAnalysisUsage(AnalysisUsage& au) const override;
+};
+}  // namespace llvm
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 06fde2bcd7..6e70a9aa63 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -192,7 +192,7 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
 
     /// we have all information for code generation function, create a new node
     /// which will be inserted later into AST
-    auto function = std::make_shared<ast::CodegenFunction>(fun_ret_type, name, arguments, block);
+    auto function = std::make_shared<ast::CodegenFunction>(fun_ret_type, name, arguments, block, /*is_kernel=*/0);
     if (node.get_token()) {
         function->set_token(*node.get_token()->clone());
     }
@@ -732,7 +732,7 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
 
     /// finally, create new function
     auto function =
-        std::make_shared<ast::CodegenFunction>(return_type, name, code_arguments, function_block);
+        std::make_shared<ast::CodegenFunction>(return_type, name, code_arguments, function_block, /*is_kernel=*/1);
     codegen_functions.push_back(function);
 
     // todo: remove this, temporary
@@ -1097,7 +1097,8 @@ void CodegenLLVMHelperVisitor::visit_breakpoint_block(ast::BreakpointBlock& node
         auto function = std::make_shared<ast::CodegenFunction>(return_type,
                                                                name,
                                                                code_arguments,
-                                                               function_block);
+                                                               function_block,
+                                                               /*is_kernel=*/1);
         codegen_functions.push_back(function);
 
         // todo: remove this, temporary
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 491f8ce02d..958e292ce8 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -7,6 +7,7 @@
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "codegen/llvm/llvm_utils.hpp"
+#include "codegen/llvm/annotation.hpp"
 
 #include "ast/all.hpp"
 #include "utils/logger.hpp"
@@ -25,10 +26,6 @@ namespace codegen {
 /*                                  Helper routines                                     */
 /****************************************************************************************/
 
-static std::string get_wrapper_name(const std::string& kernel_name) {
-    return "__" + kernel_name + "_wrapper";
-}
-
 /// A utility to check for supported Statement AST nodes.
 static bool is_supported_statement(const ast::Statement& statement) {
     return statement.is_codegen_atomic_statement() || statement.is_codegen_for_statement() ||
@@ -59,36 +56,6 @@ static bool can_vectorize(const ast::CodegenForStatement& statement, symtab::Sym
     return unsupported.empty() && supported.size() <= 1;
 }
 
-void CodegenLLVMVisitor::annotate_kernel_with_nvvm(llvm::Function* kernel,
-                                                   const std::string& annotation = "kernel") {
-    llvm::Metadata* metadata[] = {llvm::ValueAsMetadata::get(kernel),
-                                  llvm::MDString::get(*context, annotation),
-                                  llvm::ValueAsMetadata::get(
-                                      llvm::ConstantInt::get(llvm::Type::getInt32Ty(*context), 1))};
-    llvm::MDNode* node = llvm::MDNode::get(*context, metadata);
-    module->getOrInsertNamedMetadata("nvvm.annotations")->addOperand(node);
-}
-
-void CodegenLLVMVisitor::annotate_wrapper_kernels_with_nvvm() {
-    // First clear all the nvvm annotations from the module
-    auto module_named_metadata = module->getNamedMetadata("nvvm.annotations");
-    module->eraseNamedMetadata(module_named_metadata);
-
-    // Then each kernel should be annotated as "device" function and wrappers should be annotated as
-    // "kernel" functions
-    std::vector<std::string> kernel_names;
-    find_kernel_names(kernel_names);
-
-    for (const auto& kernel_name: kernel_names) {
-        // Get the kernel function.
-        auto kernel = module->getFunction(kernel_name);
-        // Get the kernel wrapper function.
-        auto kernel_wrapper = module->getFunction(get_wrapper_name(kernel_name));
-        annotate_kernel_with_nvvm(kernel, "device");
-        annotate_kernel_with_nvvm(kernel_wrapper, "kernel");
-    }
-}
-
 llvm::Value* CodegenLLVMVisitor::accept_and_get(const std::shared_ptr<ast::Node>& node) {
     node->accept(*this);
     return ir_builder.pop_last_value();
@@ -145,11 +112,18 @@ void CodegenLLVMVisitor::create_function_declaration(const ast::CodegenFunction&
     const auto& name = node.get_node_name();
     const auto& arguments = node.get_arguments();
 
-    // Procedure or function parameters are doubles by default.
     TypeVector arg_types;
-    for (size_t i = 0; i < arguments.size(); ++i)
-        arg_types.push_back(get_codegen_var_type(*arguments[i]->get_type()));
-
+    if (wrap_kernel_functions && node.get_is_kernel()) {
+        // We are wrapping NMODL compute kernels as a function taking void*. Thus,
+        // ignore struct pointer argument type and create a function signature with
+        // void* - actual conversion to struct pointer is done when generating
+        // the function body!
+        arg_types.push_back(ir_builder.get_i8_ptr_type());
+    } else {
+        // Otherwise, process argument types as usual.
+        for (size_t i = 0; i < arguments.size(); ++i)
+            arg_types.push_back(get_codegen_var_type(*arguments[i]->get_type()));
+    }
     llvm::Type* return_type = get_codegen_var_type(*node.get_return_type());
 
     // Create a function that is automatically inserted into module's symbol table.
@@ -215,7 +189,7 @@ void CodegenLLVMVisitor::find_kernel_names(std::vector<std::string>& container)
     auto& functions = module->getFunctionList();
     for (auto& func: functions) {
         const std::string name = func.getName().str();
-        if (is_kernel_function(name)) {
+        if (Annotator::has_nmodl_compute_kernel_annotation(func)) {
             container.push_back(name);
         }
     }
@@ -419,65 +393,6 @@ void CodegenLLVMVisitor::write_to_variable(const ast::VarName& node, llvm::Value
     }
 }
 
-void CodegenLLVMVisitor::wrap_kernel_functions() {
-    // First, identify all kernels.
-    std::vector<std::string> kernel_names;
-    find_kernel_names(kernel_names);
-
-    for (const auto& kernel_name: kernel_names) {
-        // Get the kernel function.
-        auto kernel = module->getFunction(kernel_name);
-
-        // Create a wrapper void function that takes a void pointer as a single argument.
-        llvm::Type* return_type;
-        if (platform.is_gpu()) {
-            return_type = ir_builder.get_void_type();
-        } else {
-            return_type = ir_builder.get_i32_type();
-        }
-        llvm::Type* void_ptr_type = ir_builder.get_i8_ptr_type();
-        llvm::Function* wrapper_func = llvm::Function::Create(
-            llvm::FunctionType::get(return_type, {void_ptr_type}, /*isVarArg=*/false),
-            llvm::Function::ExternalLinkage,
-            get_wrapper_name(kernel_name),
-            *module);
-
-        // Optionally, add debug information for the wrapper function.
-        if (add_debug_information) {
-            debug_builder.add_function_debug_info(wrapper_func);
-        }
-
-        ir_builder.create_block_and_set_insertion_point(wrapper_func);
-
-        // Proceed with bitcasting the void pointer to the struct pointer type, calling the kernel
-        // and adding a terminator.
-        llvm::Value* bitcasted = ir_builder.create_bitcast(wrapper_func->getArg(0),
-                                                           kernel->getArg(0)->getType());
-        ValueVector args;
-        args.push_back(bitcasted);
-        ir_builder.create_function_call(kernel, args, /*use_result=*/false);
-
-        // create return instructions and annotate wrapper with certain attributes depending on
-        // the backend type
-        if (platform.is_gpu()) {
-            // return void
-            ir_builder.create_return();
-        } else {
-            // Create a 0 return value and a return instruction.
-            ir_builder.create_i32_constant(0);
-            ir_builder.create_return(ir_builder.pop_last_value());
-            ir_builder.set_function(wrapper_func);
-            ir_builder.set_kernel_attributes();
-        }
-        ir_builder.clear_function();
-    }
-    // for GPU we need to first clear all the annotations and then reapply them
-    if (platform.is_gpu()) {
-        annotate_wrapper_kernels_with_nvvm();
-    }
-}
-
-
 /****************************************************************************************/
 /*                            Overloaded visitor routines                               */
 /****************************************************************************************/
@@ -712,7 +627,6 @@ void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatem
     // Extract the condition to decide whether to branch to the loop body or loop exit.
     llvm::Value* cond = accept_and_get(node.get_condition());
     llvm::BranchInst* loop_br = ir_builder.create_cond_br(cond, for_body, exit);
-    ir_builder.set_loop_metadata(loop_br);
     ir_builder.set_insertion_point(for_body);
 
     // If not processing remainder of the loop, start vectorization.
@@ -753,19 +667,27 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
     block->accept(v);
 
     // Allocate parameters on the stack and add them to the symbol table.
-    ir_builder.allocate_function_arguments(func, arguments);
+    if (wrap_kernel_functions && node.get_is_kernel()) {
+        // If we wrap NMODL compute kernel, the parameter will be void*! Hence,
+        // we get the actual struct pointer type and allocate parameters on the
+        // stack with additional bitcast.
+        llvm::Type* struct_ty = get_codegen_var_type(*arguments[0]->get_type());
+        ir_builder.allocate_and_wrap_kernel_arguments(func, arguments, struct_ty);
+    } else {
+        // Otherwise, nothing specific needed.
+        ir_builder.allocate_function_arguments(func, arguments);
+    }
 
     // Process function or procedure body. If the function is a compute kernel, enable
-    // vectorization or add NVVM annotations. If this is the case, the return statement is
-    // handled in a separate visitor.
-    if (is_kernel_function(name)) {
+    // vectorization. If this is the case, the return statement is handled in a
+    // separate visitor.
+    if (node.get_is_kernel()) {
         if (platform.is_cpu_with_simd()) {
             ir_builder.generate_vector_ir();
             block->accept(*this);
             ir_builder.generate_scalar_ir();
         } else if (platform.is_gpu()) {
             block->accept(*this);
-            annotate_kernel_with_nvvm(func);
         } else {  // scalar
             block->accept(*this);
         }
@@ -775,8 +697,8 @@ void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node
 
     // If function is a compute kernel, add a void terminator explicitly, since there is no
     // `CodegenReturnVar` node. Also, set the necessary attributes.
-    if (is_kernel_function(name)) {
-        ir_builder.set_kernel_attributes();
+    if (node.get_is_kernel()) {
+        custom::Annotator::add_nmodl_compute_kernel_annotation(*func);
         ir_builder.create_return();
     }
 
@@ -978,9 +900,12 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
         utils::optimise_module(*module, opt_level_ir);
     }
 
-    // Optionally, replace LLVM math intrinsics with library calls.
+    // Pass 1: replace LLVM math intrinsics with library calls.
     utils::replace_with_lib_functions(platform, *module);
 
+    // Pass 2: annotate NMODL compute kernels.
+    utils::annotate(platform, *module);
+
     // Handle GPU optimizations (CUDA platfroms only for now).
     if (platform.is_gpu()) {
         // We only support CUDA backends anyway, so this works for now.
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index a4072737df..c4333e75ee 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -83,13 +83,18 @@ class CodegenLLVMVisitor: public CodegenCVisitor {
     /// Target platform for the code generation.
     Platform platform;
 
+    /// Wrap calls to NMODL compute kernels by taking void* and casting to
+    /// appropriate struct type pointer. Used when executing kernels with JIT.
+    bool wrap_kernel_functions;
+
   public:
     CodegenLLVMVisitor(const std::string& mod_filename,
                        const std::string& output_dir,
                        Platform& platform,
                        int opt_level_ir,
                        bool add_debug_information = false,
-                       std::vector<std::string> fast_math_flags = {})
+                       std::vector<std::string> fast_math_flags = {},
+                       bool wrap_kernel_functions = false)
         : CodegenCVisitor(mod_filename,
                           output_dir,
                           platform.is_single_precision() ? "float" : "double",
@@ -102,7 +107,8 @@ class CodegenLLVMVisitor: public CodegenCVisitor {
         , opt_level_ir(opt_level_ir)
         , add_debug_information(add_debug_information)
         , ir_builder(*context, platform, fast_math_flags)
-        , debug_builder(*module) {
+        , debug_builder(*module)
+        , wrap_kernel_functions(wrap_kernel_functions) {
         instance_struct_type_suffix = "_instance_var__type";
         print_procedures_and_functions = false;
     }
@@ -112,7 +118,8 @@ class CodegenLLVMVisitor: public CodegenCVisitor {
                        Platform& platform,
                        int opt_level_ir,
                        bool add_debug_information = false,
-                       std::vector<std::string> fast_math_flags = {})
+                       std::vector<std::string> fast_math_flags = {},
+                       bool wrap_kernel_functions = false)
         : CodegenCVisitor(mod_filename,
                           stream,
                           platform.is_single_precision() ? "float" : "double",
@@ -125,7 +132,8 @@ class CodegenLLVMVisitor: public CodegenCVisitor {
         , opt_level_ir(opt_level_ir)
         , add_debug_information(add_debug_information)
         , ir_builder(*context, platform, fast_math_flags)
-        , debug_builder(*module) {
+        , debug_builder(*module)
+        , wrap_kernel_functions(wrap_kernel_functions) {
         instance_struct_type_suffix = "_instance_var__type";
         print_procedures_and_functions = false;
     }
@@ -289,9 +297,6 @@ class CodegenLLVMVisitor: public CodegenCVisitor {
      * The first argument should be an object of \c mechanism_instance_struct_type_name
      */
     CodegenLLVMVisitor::ParamVector get_compute_function_parameter();
-    /// Wraps all kernel function calls into wrapper functions that use `void*` to pass the data to
-    /// the kernel.
-    void wrap_kernel_functions();
 
     /// print compute functions relevant for this backend
     void print_compute_functions() override;
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index f0682fff91..490e20d13d 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -158,6 +158,22 @@ void IRBuilder::allocate_function_arguments(llvm::Function* function,
     }
 }
 
+void IRBuilder::allocate_and_wrap_kernel_arguments(llvm::Function* function,
+                                                   const ast::CodegenVarWithTypeVector& nmodl_arguments,
+                                                   llvm::Type* struct_type) {
+    // In theory, this should never happen but let's guard anyway.
+    if (nmodl_arguments.size() != 1) {
+        throw std::runtime_error("Error: NMODL computer kernel must have a single argument\n");
+    }
+
+    // Bitcast void* pointer provided as compute kernel argument to mechanism data type.
+    llvm::Value* data_ptr = create_bitcast(function->getArg(0), struct_type);
+
+    std::string arg_name = nmodl_arguments[0].get()->get_node_name();
+    llvm::Value* alloca = create_alloca(arg_name, struct_type);
+    builder.CreateStore(data_ptr, alloca);
+}
+
 std::string IRBuilder::get_current_function_name() {
     return current_function->getName().str();
 }
@@ -203,53 +219,6 @@ void IRBuilder::create_intrinsic(const std::string& name,
     }
 }
 
-void IRBuilder::set_kernel_attributes() {
-    // By convention, the compute kernel does not free memory and does not throw exceptions.
-    current_function->setDoesNotFreeMemory();
-    current_function->setDoesNotThrow();
-
-    // We also want to specify that the pointers that instance struct holds do not alias, unless
-    // specified otherwise. In order to do that, we add a `noalias` attribute to the argument. As
-    // per Clang's specification:
-    //  > The `noalias` attribute indicates that the only memory accesses inside function are loads
-    //  > and stores from objects pointed to by its pointer-typed arguments, with arbitrary
-    //  > offsets.
-    current_function->addParamAttr(0, llvm::Attribute::NoAlias);
-
-    // Finally, specify that the struct pointer does not capture and is read-only.
-    current_function->addParamAttr(0, llvm::Attribute::NoCapture);
-    current_function->addParamAttr(0, llvm::Attribute::ReadOnly);
-}
-
-/****************************************************************************************/
-/*                                LLVM metadata utilities                               */
-/****************************************************************************************/
-
-void IRBuilder::set_loop_metadata(llvm::BranchInst* branch) {
-    llvm::LLVMContext& context = builder.getContext();
-    MetadataVector loop_metadata;
-
-    // Add nullptr to reserve the first place for loop's metadata self-reference.
-    loop_metadata.push_back(nullptr);
-
-    // If `vector_width` is 1, explicitly disable vectorization for benchmarking purposes.
-    if (platform.is_cpu() && platform.get_instruction_width() == 1) {
-        llvm::MDString* name = llvm::MDString::get(context, "llvm.loop.vectorize.enable");
-        llvm::Value* false_value = llvm::ConstantInt::get(get_boolean_type(), 0);
-        llvm::ValueAsMetadata* value = llvm::ValueAsMetadata::get(false_value);
-        loop_metadata.push_back(llvm::MDNode::get(context, {name, value}));
-    }
-
-    // No metadata to add.
-    if (loop_metadata.size() <= 1)
-        return;
-
-    // Add loop's metadata self-reference and attach it to the branch.
-    llvm::MDNode* metadata = llvm::MDNode::get(context, loop_metadata);
-    metadata->replaceOperandWith(0, metadata);
-    branch->setMetadata(llvm::LLVMContext::MD_loop, metadata);
-}
-
 /****************************************************************************************/
 /*                             LLVM instruction utilities                               */
 /****************************************************************************************/
diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp
index 537682b930..62cfea5145 100644
--- a/src/codegen/llvm/llvm_ir_builder.hpp
+++ b/src/codegen/llvm/llvm_ir_builder.hpp
@@ -25,7 +25,6 @@ static constexpr const unsigned double_precision = 64;
 
 /// Some typedefs.
 using ConstantVector = std::vector<llvm::Constant*>;
-using MetadataVector = std::vector<llvm::Metadata*>;
 using TypeVector = std::vector<llvm::Type*>;
 using ValueVector = std::vector<llvm::Value*>;
 
@@ -153,6 +152,12 @@ class IRBuilder {
     void allocate_function_arguments(llvm::Function* function,
                                      const ast::CodegenVarWithTypeVector& nmodl_arguments);
 
+    /// Generates LLVM IR to allocate the arguments of the NMODL compute kernel
+    /// on the stack, bitcasting void* pointer to mechanism struct pointers.
+    void allocate_and_wrap_kernel_arguments(llvm::Function* function,
+                                            const ast::CodegenVarWithTypeVector& nmodl_arguments,
+                                            llvm::Type* struct_type);
+
     llvm::Value* create_alloca(const std::string& name, llvm::Type* type);
 
     /// Generates IR for allocating an array.
@@ -301,12 +306,6 @@ class IRBuilder {
     /// Sets builder's insertion point to the given block.
     void set_insertion_point(llvm::BasicBlock* block);
 
-    /// Sets the necessary attributes for the kernel and its arguments.
-    void set_kernel_attributes();
-
-    /// Sets the loop metadata for the given branch from the loop.
-    void set_loop_metadata(llvm::BranchInst* branch);
-
     /// Pops the last visited value from the value stack.
     llvm::Value* pop_last_value();
 
diff --git a/src/codegen/llvm/llvm_utils.cpp b/src/codegen/llvm/llvm_utils.cpp
index f6590fec5b..e74d878989 100644
--- a/src/codegen/llvm/llvm_utils.cpp
+++ b/src/codegen/llvm/llvm_utils.cpp
@@ -7,6 +7,7 @@
 
 #include "codegen/llvm/llvm_utils.hpp"
 #include "codegen/llvm/replace_with_lib_functions.hpp"
+#include "codegen/llvm/annotation.hpp"
 
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
@@ -178,8 +179,32 @@ void optimise_module(llvm::Module& module, int opt_level, llvm::TargetMachine* t
 
 void replace_with_lib_functions(codegen::Platform& platform, llvm::Module& module) {
     llvm::legacy::PassManager pm;
-    pm.add(new llvm::ReplaceMathFunctions(platform));
+
+    Replacer *replacer = nullptr;
+    if (platform.is_CUDA_gpu()) {
+        replacer = new custom::CUDAReplacer();
+    } else {
+        replacer = new custom::DefaultCPUReplacer(platform.get_math_library());
+    }
+    pm.add(new llvm::ReplacePass(replacer));
+    pm.run(module);
+
+    delete replacer;
+}
+
+void annotate(codegen::Platform& platform, llvm::Module& module) {
+    llvm::legacy::PassManager pm;
+
+    Annotator *annotator = nullptr;
+    if (platform.is_CUDA_gpu()) {
+        annotator = new custom::CUDAAnnotator();
+    } else {
+        annotator = new custom::DefaultCPUAnnotator();
+    }
+    pm.add(new llvm::AnnotationPass(annotator));
     pm.run(module);
+
+    delete annotator;
 }
 
 /****************************************************************************************/
diff --git a/src/codegen/llvm/llvm_utils.hpp b/src/codegen/llvm/llvm_utils.hpp
index 9763718ab0..f4bce67fb4 100644
--- a/src/codegen/llvm/llvm_utils.hpp
+++ b/src/codegen/llvm/llvm_utils.hpp
@@ -31,6 +31,12 @@ std::string get_module_ptx(llvm::TargetMachine& tm, llvm::Module& module);
 /// Replaces calls to LLVM intrinsics with appropriate library calls.
 void replace_with_lib_functions(codegen::Platform& platform, llvm::Module& module);
 
+/// Annotates LLVM module with appropriate metadata.
+/// TODO: this function and replace_with_lib_functions will be chnaged
+/// oncePlatform evolves into PlatformConfig which would be responsible
+/// for platform-dependent pass initialisation.
+void annotate(codegen::Platform& platform, llvm::Module& module);
+
 /// Optimises the given LLVM IR module for NVPTX targets.
 void optimise_module_for_nvptx(const codegen::Platform& platform,
                                llvm::Module& module,
diff --git a/src/codegen/llvm/replace_with_lib_functions.cpp b/src/codegen/llvm/replace_with_lib_functions.cpp
index 07d6dd8f04..140142fdc0 100644
--- a/src/codegen/llvm/replace_with_lib_functions.cpp
+++ b/src/codegen/llvm/replace_with_lib_functions.cpp
@@ -18,17 +18,45 @@
 #include "llvm/IR/IntrinsicsNVPTX.h"
 #include "llvm/IR/LegacyPassManager.h"
 
+namespace nmodl {
+namespace custom {
+
+Patterns DefaultCPUReplacer::patterns() const {
+    throw std::runtime_error("Error: DefaultCPUReplacer has no patterns and uses built-in LLVM passes instead.\n");
+}
+
+std::string DefaultCPUReplacer::get_library_name() {
+    return this->library_name;
+}
+
+Patterns CUDAReplacer::patterns() const {
+    return {
+        {"llvm.exp.f32", "__nv_expf"},
+        {"llvm.exp.f64", "__nv_exp"},
+        {"llvm.pow.f32", "__nv_powf"},
+        {"llvm.pow.f64", "__nv_pow"},
+        {"llvm.log.f32", "__nv_logf"},
+        {"llvm.log.f64", "__nv_log"},
+        {"llvm.fabs.f32", "__nv_fabsf"},
+        {"llvm.fabs.f64", "__nv_fabs"}
+    };
+}
+}  // namespace custom
+}  // namespace nmodl
+
+using nmodl::custom::DefaultCPUReplacer;
 namespace llvm {
 
-char ReplaceMathFunctions::ID = 0;
+char ReplacePass::ID = 0;
 
-bool ReplaceMathFunctions::runOnModule(Module& module) {
-    legacy::FunctionPassManager fpm(&module);
+bool ReplacePass::runOnModule(Module& module) {
     bool modified = false;
 
     // If the platform supports SIMD, replace math intrinsics with library
     // functions.
-    if (platform->is_cpu_with_simd()) {
+    if (dynamic_cast<const DefaultCPUReplacer*>(replacer)) {
+        legacy::FunctionPassManager fpm(&module);
+
         // First, get the target library information and add vectorizable functions for the
         // specified vector library.
         Triple triple(sys::getDefaultTargetTriple());
@@ -38,28 +66,55 @@ bool ReplaceMathFunctions::runOnModule(Module& module) {
         // Add passes that replace math intrinsics with calls.
         fpm.add(new TargetLibraryInfoWrapperPass(tli));
         fpm.add(new ReplaceWithVeclibLegacy);
-    }
-
-    // For CUDA GPUs, replace with calls to libdevice.
-    if (platform->is_CUDA_gpu()) {
-        fpm.add(new ReplaceWithLibdevice);
-    }
 
-    // Run passes.
-    fpm.doInitialization();
-    for (auto& function: module.getFunctionList()) {
-        if (!function.isDeclaration())
-            modified |= fpm.run(function);
+        // Run passes.
+        fpm.doInitialization();
+        for (auto& function: module.getFunctionList()) {
+            if (!function.isDeclaration())
+                modified |= fpm.run(function);
+        }
+        fpm.doFinalization();
+    } else {
+        // Otherwise, the replacer is not default and we need to apply patterns
+        // from it to each function!
+        for (auto& function: module.getFunctionList()) {
+            if (!function.isDeclaration()) {
+                // Try to replace a call instruction.
+                std::vector<CallInst*> replaced_calls;
+                for (auto& instruction: instructions(function)) {
+                    if (auto* call_inst = dyn_cast<CallInst>(&instruction)) {
+                        if (replace_call(*call_inst)) {
+                            replaced_calls.push_back(call_inst);
+                            modified = true;
+                        }
+                    }
+                }
+
+                // Remove calls to replaced functions.
+                for (auto* call_inst: replaced_calls) {
+                    call_inst->eraseFromParent();
+                }
+            }
+        }
     }
-    fpm.doFinalization();
 
     return modified;
 }
 
-void ReplaceMathFunctions::add_vectorizable_functions_from_vec_lib(TargetLibraryInfoImpl& tli,
-                                                                   Triple& triple) {
+void ReplacePass::getAnalysisUsage(AnalysisUsage& au) const {
+    au.setPreservesCFG();
+    au.addPreserved<ScalarEvolutionWrapperPass>();
+    au.addPreserved<AAResultsWrapperPass>();
+    au.addPreserved<LoopAccessLegacyAnalysis>();
+    au.addPreserved<DemandedBitsWrapperPass>();
+    au.addPreserved<OptimizationRemarkEmitterWrapperPass>();
+    au.addPreserved<GlobalsAAWrapperPass>();
+}
+
+void ReplacePass::add_vectorizable_functions_from_vec_lib(TargetLibraryInfoImpl& tli,
+                                                          Triple& triple) {
     // Since LLVM does not support SLEEF as a vector library yet, process it separately.
-    if (platform->get_math_library() == "SLEEF") {
+    if (((DefaultCPUReplacer*)replacer)->get_library_name() == "SLEEF") {
 // clang-format off
 #define FIXED(w) ElementCount::getFixed(w)
 // clang-format on
@@ -110,10 +165,10 @@ void ReplaceMathFunctions::add_vectorizable_functions_from_vec_lib(TargetLibrary
             {"none", VecLib::NoLibrary},
             {"SVML", VecLib::SVML}};
 
-        const auto& library = llvm_supported_vector_libraries.find(platform->get_math_library());
+        const auto& library = llvm_supported_vector_libraries.find(((DefaultCPUReplacer*)replacer)->get_library_name());
         if (library == llvm_supported_vector_libraries.end())
             throw std::runtime_error("Error: unknown vector library - " +
-                                     platform->get_math_library() + "\n");
+                                     ((DefaultCPUReplacer*)replacer)->get_library_name() + "\n");
 
         // Add vectorizable functions to the target library info.
         if (library->second != VecLib::LIBMVEC_X86 || (triple.isX86() && triple.isArch64Bit())) {
@@ -122,77 +177,27 @@ void ReplaceMathFunctions::add_vectorizable_functions_from_vec_lib(TargetLibrary
     }
 }
 
-void ReplaceWithLibdevice::getAnalysisUsage(AnalysisUsage& au) const {
-    au.setPreservesCFG();
-    au.addPreserved<ScalarEvolutionWrapperPass>();
-    au.addPreserved<AAResultsWrapperPass>();
-    au.addPreserved<LoopAccessLegacyAnalysis>();
-    au.addPreserved<DemandedBitsWrapperPass>();
-    au.addPreserved<OptimizationRemarkEmitterWrapperPass>();
-    au.addPreserved<GlobalsAAWrapperPass>();
-}
-
-bool ReplaceWithLibdevice::runOnFunction(Function& function) {
-    bool modified = false;
-
-    // Try to replace math intrinsics.
-    std::vector<CallInst*> replaced_calls;
-    for (auto& instruction: instructions(function)) {
-        if (auto* call_inst = dyn_cast<CallInst>(&instruction)) {
-            if (replace_call(*call_inst)) {
-                replaced_calls.push_back(call_inst);
-                modified = true;
-            }
-        }
-    }
-
-    // Remove calls to replaced intrinsics.
-    for (auto* call_inst: replaced_calls) {
-        call_inst->eraseFromParent();
-    }
-
-    return modified;
-}
-
-bool ReplaceWithLibdevice::replace_call(CallInst& call_inst) {
+bool ReplacePass::replace_call(CallInst& call_inst) {
     Module* m = call_inst.getModule();
     Function* function = call_inst.getCalledFunction();
 
-    // Replace math intrinsics only!
-    auto id = function->getIntrinsicID();
-    bool is_nvvm_intrinsic = id == Intrinsic::nvvm_read_ptx_sreg_ntid_x ||
-                             id == Intrinsic::nvvm_read_ptx_sreg_nctaid_x ||
-                             id == Intrinsic::nvvm_read_ptx_sreg_ctaid_x ||
-                             id == Intrinsic::nvvm_read_ptx_sreg_tid_x;
-    if (id == Intrinsic::not_intrinsic || is_nvvm_intrinsic)
-        return false;
+    // Get supported replacement patterns.
+    Patterns patterns = replacer->patterns();
 
-    // Map of supported replacements. For now it is only exp and pow.
-    static const std::map<std::string, std::string> libdevice_name = {{"llvm.exp.f32", "__nv_expf"},
-                                                                      {"llvm.exp.f64", "__nv_exp"},
-                                                                      {"llvm.pow.f32", "__nv_powf"},
-                                                                      {"llvm.pow.f64", "__nv_pow"},
-                                                                      {"llvm.log.f32", "__nv_logf"},
-                                                                      {"llvm.log.f64", "__nv_log"},
-                                                                      {"llvm.fabs.f32",
-                                                                       "__nv_fabsf"},
-                                                                      {"llvm.fabs.f64",
-                                                                       "__nv_fabs"}};
-
-    // If replacement is not supported, abort.
+    // Check if replacement is not supported.
     std::string old_name = function->getName().str();
-    auto it = libdevice_name.find(old_name);
-    if (it == libdevice_name.end())
-        throw std::runtime_error("Error: replacements for " + old_name + " are not supported!\n");
-
-    // Get (or create) libdevice function.
-    Function* libdevice_func = m->getFunction(it->second);
-    if (!libdevice_func) {
-        libdevice_func = Function::Create(function->getFunctionType(),
-                                          Function::ExternalLinkage,
-                                          it->second,
-                                          *m);
-        libdevice_func->copyAttributesFrom(function);
+    auto it = patterns.find(old_name);
+    if (it == patterns.end())
+        return false;
+
+    // Get (or create) new function.
+    Function* new_func = m->getFunction(it->second);
+    if (!new_func) {
+        new_func = Function::Create(function->getFunctionType(),
+                                    Function::ExternalLinkage,
+                                    it->second,
+                                    *m);
+        new_func->copyAttributesFrom(function);
     }
 
     // Create a call to libdevice function with the same operands.
@@ -200,7 +205,7 @@ bool ReplaceWithLibdevice::replace_call(CallInst& call_inst) {
     std::vector<Value*> args(call_inst.arg_operands().begin(), call_inst.arg_operands().end());
     SmallVector<OperandBundleDef, 1> op_bundles;
     call_inst.getOperandBundlesAsDefs(op_bundles);
-    CallInst* new_call = builder.CreateCall(libdevice_func, args, op_bundles);
+    CallInst* new_call = builder.CreateCall(new_func, args, op_bundles);
 
     // Replace all uses of old instruction with the new one. Also, copy
     // fast math flags if necessary.
@@ -211,11 +216,4 @@ bool ReplaceWithLibdevice::replace_call(CallInst& call_inst) {
 
     return true;
 }
-
-char ReplaceWithLibdevice::ID = 0;
-static RegisterPass<ReplaceWithLibdevice> X("libdevice-replacement",
-                                            "Pass replacing math functions with calls to libdevice",
-                                            false,
-                                            false);
-
 }  // namespace llvm
diff --git a/src/codegen/llvm/replace_with_lib_functions.hpp b/src/codegen/llvm/replace_with_lib_functions.hpp
index 5bf38ba85f..dfd97d3bbd 100644
--- a/src/codegen/llvm/replace_with_lib_functions.hpp
+++ b/src/codegen/llvm/replace_with_lib_functions.hpp
@@ -14,52 +14,82 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Host.h"
 
-using nmodl::codegen::Platform;
+using Patterns = std::map<std::string, std::string>;
 
-namespace llvm {
+namespace nmodl {
+namespace custom {
 
 /**
- * \class ReplaceMathFunctions
- * \brief A module LLVM pass that replaces math intrinsics with
- * SIMD or libdevice library calls.
+ * \class Replacer
+ * \brief Base class that can be overriden to specify how LLVM math intrinsics
+ * are replaced.
  */
-class ReplaceMathFunctions: public ModulePass {
-  private:
-    const Platform* platform;
+class Replacer {
+  public:
+    virtual Patterns patterns() const = 0;
+    virtual ~Replacer() = default;
+};
 
+/**
+ * \class DefaultCPUReplacer
+ * \brief Specifies how LLVM IR math functions are replaced on CPUs by default.
+ * Here we reuse LLVM's API so patterns() has no meaning and throws an error
+ * instead! `DefaultCPUReplacer` threfore cannot be overriden.
+ */
+class DefaultCPUReplacer: public Replacer {
+  private:
+    std::string library_name;
   public:
-    static char ID;
+    DefaultCPUReplacer(std::string library_name)
+      : Replacer(), library_name(library_name) {}
 
-    ReplaceMathFunctions(const Platform& platform)
-        : ModulePass(ID)
-        , platform(&platform) {}
+    Patterns patterns() const final override;
 
-    bool runOnModule(Module& module) override;
+    /// Returns the name of underlying library for which this default
+    /// replacer is used.
+    std::string get_library_name();
+};
 
-  private:
-    /// Populates `tli` with vectorizable function definitions.
-    void add_vectorizable_functions_from_vec_lib(TargetLibraryInfoImpl& tli, Triple& triple);
+/**
+ * \class CUDAReplacer
+ * \brief Specifies replacement patterns for CUDA platforms.
+ */
+class CUDAReplacer: public Replacer {
+  public:
+    Patterns patterns() const override;
 };
+}  // namespace custom
+}  // namespace nmodl
+
+using nmodl::custom::Replacer;
+namespace llvm {
 
 /**
- * \class ReplaceWithLibdevice
- * \brief A function LLVM pass that replaces math intrinsics with
- * libdevice library calls.
+ * \class ReplacePass
+ * \brief A module LLVM pass that replaces math intrinsics with
+ * library calls.
  */
-class ReplaceWithLibdevice: public FunctionPass {
+class ReplacePass: public ModulePass {
+  private:
+    // Underlying replacer that provides replacement patterns.
+    const Replacer* replacer;
+
   public:
     static char ID;
 
-    ReplaceWithLibdevice()
-        : llvm::FunctionPass(ID) {}
+    ReplacePass(Replacer* replacer)
+        : ModulePass(ID)
+        , replacer(replacer) {}
 
-    void getAnalysisUsage(AnalysisUsage& au) const override;
+    bool runOnModule(Module& module) override;
 
-    bool runOnFunction(Function& function) override;
+    void getAnalysisUsage(AnalysisUsage& au) const override;
 
   private:
-    /// Replaces call instruction to intrinsic with libdevice call.
+    /// Populates `tli` with vectorizable function definitions (hook for default replacements).
+    void add_vectorizable_functions_from_vec_lib(TargetLibraryInfoImpl& tli, Triple& triple);
+
+    /// Replaces call instruction with a new call from Replacer's patterns.
     bool replace_call(CallInst& call_inst);
 };
-
 }  // namespace llvm
diff --git a/src/language/codegen.yaml b/src/language/codegen.yaml
index 02adca49f4..d6f18e96d2 100644
--- a/src/language/codegen.yaml
+++ b/src/language/codegen.yaml
@@ -156,6 +156,9 @@
                                   brief: "Body of the function"
                                   type: StatementBlock
                                   getter: {override: true}
+                              - is_kernel:
+                                  brief: "If function is compute kernel"
+                                  type: int
                         - InstanceStruct:
                             nmodl: "INSTANCE_STRUCT "
                             members:
diff --git a/src/main.cpp b/src/main.cpp
index dc42b58043..f05a8a8cc2 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -369,6 +369,9 @@ int main(int argc, const char* argv[]) {
                 // information and not in LLVM visitor.
                 int llvm_opt_level = llvm_benchmark ? 0 : cfg.llvm_opt_level_ir;
 
+                // If benchmarking, kernel functions should be wrapped taking void*.
+                bool wrap_kernel_functions = llvm_benchmark;
+
                 // Create platform abstraction.
                 PlatformID pid = cfg.llvm_gpu_name == "default" ? PlatformID::CPU : PlatformID::GPU;
                 const std::string name = cfg.llvm_gpu_name == "default" ? cfg.llvm_cpu_name
@@ -393,7 +396,8 @@ int main(int argc, const char* argv[]) {
                                            platform,
                                            llvm_opt_level,
                                            !cfg.llvm_no_debug,
-                                           cfg.llvm_fast_math_flags);
+                                           cfg.llvm_fast_math_flags,
+                                           wrap_kernel_functions);
                 visitor.visit_program(*ast);
                 if (cfg.nmodl_ast) {
                     NmodlPrintVisitor(filepath("llvm", "mod")).visit_program(*ast);
diff --git a/src/pybind/pynmodl.cpp b/src/pybind/pynmodl.cpp
index fc7fb569ba..3ecdc1eaaa 100644
--- a/src/pybind/pynmodl.cpp
+++ b/src/pybind/pynmodl.cpp
@@ -198,7 +198,7 @@ class JitDriver {
             utils::make_path(cfg.scratch_dir);
         }
         cg_driver.prepare_mod(node, modname);
-        nmodl::codegen::CodegenLLVMVisitor visitor(modname, cfg.output_dir, platform, 0);
+        nmodl::codegen::CodegenLLVMVisitor visitor(modname, cfg.output_dir, platform, 0, false, {}, true);
         visitor.visit_program(*node);
         const GPUExecutionParameters gpu_execution_parameters{cuda_grid_dim_x, cuda_block_dim_x};
         nmodl::benchmark::LLVMBenchmark benchmark(visitor,
diff --git a/test/benchmark/llvm_benchmark.cpp b/test/benchmark/llvm_benchmark.cpp
index 010bc2edf3..3ccf84e5c0 100644
--- a/test/benchmark/llvm_benchmark.cpp
+++ b/test/benchmark/llvm_benchmark.cpp
@@ -24,24 +24,6 @@ namespace nmodl {
 namespace benchmark {
 
 BenchmarkResults LLVMBenchmark::run() {
-    // create functions
-    generate_llvm();
-    // Finally, run the benchmark and log the measurements.
-    return run_benchmark();
-}
-
-void LLVMBenchmark::generate_llvm() {
-    // First, visit the AST to build the LLVM IR module and wrap the kernel function calls.
-    auto start = std::chrono::steady_clock::now();
-    llvm_visitor.wrap_kernel_functions();
-    auto end = std::chrono::steady_clock::now();
-
-    // Log the time taken to visit the AST and build LLVM IR.
-    std::chrono::duration<double> diff = end - start;
-    logger->info("Created LLVM IR module from NMODL AST in {} sec", diff.count());
-}
-
-BenchmarkResults LLVMBenchmark::run_benchmark() {
     // Set the codegen data helper and find the kernels.
     auto codegen_data = codegen::CodegenDataHelper(llvm_visitor.get_instance_struct_ptr());
     std::vector<std::string> kernel_names;
@@ -102,16 +84,15 @@ BenchmarkResults LLVMBenchmark::run_benchmark() {
             }
 
             // Record the execution time of the kernel.
-            std::string wrapper_name = "__" + kernel_name + "_wrapper";
             auto start = std::chrono::steady_clock::now();
 #ifdef NMODL_LLVM_CUDA_BACKEND
             if (platform.is_CUDA_gpu()) {
-                cuda_runner->run_with_argument<void*>(wrapper_name,
+                cuda_runner->run_with_argument<void*>(kernel_name,
                                                       instance_data.base_ptr,
                                                       gpu_execution_parameters);
             } else {
 #endif
-                cpu_runner->run_with_argument<int, void*>(wrapper_name, instance_data.base_ptr);
+                cpu_runner->run_with_argument<int, void*>(kernel_name, instance_data.base_ptr);
 #ifdef NMODL_LLVM_CUDA_BACKEND
             }
 #endif
diff --git a/test/benchmark/llvm_benchmark.hpp b/test/benchmark/llvm_benchmark.hpp
index f03e9ea52d..38c32784de 100644
--- a/test/benchmark/llvm_benchmark.hpp
+++ b/test/benchmark/llvm_benchmark.hpp
@@ -120,15 +120,10 @@ class LLVMBenchmark {
         , opt_level_codegen(opt_level_codegen)
         , gpu_execution_parameters(gpu_exec_params) {}
 
-    /// Runs the benchmark.
+    /// Runs the main body of the benchmark, executing the compute kernels.
     BenchmarkResults run();
 
   private:
-    /// Visits the AST to construct the LLVM IR module.
-    void generate_llvm();
-
-    /// Runs the main body of the benchmark, executing the compute kernels.
-    BenchmarkResults run_benchmark();
 
     /// Sets the log output stream (file or console).
     void set_log_output();
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index 641bf93aa1..73c91b6e4d 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -315,9 +315,11 @@ SCENARIO("Simple scalar kernel", "[llvm][runner]") {
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
                                                  cpu_platform,
-                                                 /*opt_level_ir=*/0);
+                                                 /*opt_level_ir=*/0,
+                                                 /*add_debug_information=*/false,
+                                                 /*fast_math_flags=*/{},
+                                                 /*wrap_kernel_functions=*/true);
         llvm_visitor.visit_program(*ast);
-        llvm_visitor.wrap_kernel_functions();
 
         // Create the instance struct data.
         int num_elements = 4;
@@ -343,7 +345,7 @@ SCENARIO("Simple scalar kernel", "[llvm][runner]") {
         runner.initialize_driver();
 
         THEN("Values in struct have changed according to the formula") {
-            runner.run_with_argument<int, void*>("__nrn_state_test_wrapper",
+            runner.run_with_argument<int, void*>("nrn_state_test",
                                                  instance_data.base_ptr);
             std::vector<double> x_expected = {4.0, 3.0, 2.0, 1.0};
             REQUIRE(check_instance_variable(instance_info, x_expected, "x"));
@@ -400,9 +402,11 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
                                                  simd_cpu_platform,
-                                                 /*opt_level_ir=*/3);
+                                                 /*opt_level_ir=*/3,
+                                                 /*add_debug_information=*/false,
+                                                 /*fast_math_flags=*/{},
+                                                 /*wrap_kernel_functions=*/true);
         llvm_visitor.visit_program(*ast);
-        llvm_visitor.wrap_kernel_functions();
 
         // Create the instance struct data.
         int num_elements = 10;
@@ -434,7 +438,7 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
         runner.initialize_driver();
 
         THEN("Values in struct have changed according to the formula") {
-            runner.run_with_argument<int, void*>("__nrn_state_test_wrapper",
+            runner.run_with_argument<int, void*>("nrn_state_test",
                                                  instance_data.base_ptr);
             // Check that the main and remainder loops correctly change the data stored in x.
             std::vector<float> x_expected = {10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
@@ -484,9 +488,11 @@ SCENARIO("Vectorised kernel with scatter instruction", "[llvm][runner]") {
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
                                                  simd_cpu_platform,
-                                                 /*opt_level_ir=*/0);
+                                                 /*opt_level_ir=*/0,
+                                                 /*add_debug_information=*/false,
+                                                 /*fast_math_flags=*/{},
+                                                 /*wrap_kernel_functions=*/true);
         llvm_visitor.visit_program(*ast);
-        llvm_visitor.wrap_kernel_functions();
 
         // Create the instance struct data.
         int num_elements = 5;
@@ -512,7 +518,7 @@ SCENARIO("Vectorised kernel with scatter instruction", "[llvm][runner]") {
         runner.initialize_driver();
 
         THEN("Ion values in struct have been updated correctly") {
-            runner.run_with_argument<int, void*>("__nrn_state_test_wrapper",
+            runner.run_with_argument<int, void*>("nrn_state_test",
                                                  instance_data.base_ptr);
             // cai[id] = ion_cai[ion_cai_index[id]]
             // cai[id] += 1
@@ -577,9 +583,11 @@ SCENARIO("Vectorised kernel with simple control flow", "[llvm][runner]") {
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
                                                  simd_cpu_platform,
-                                                 /*opt_level_ir=*/0);
+                                                 /*opt_level_ir=*/0,
+                                                 /*add_debug_information=*/false,
+                                                 /*fast_math_flags=*/{},
+                                                 /*wrap_kernel_functions=*/true);
         llvm_visitor.visit_program(*ast);
-        llvm_visitor.wrap_kernel_functions();
 
         // Create the instance struct data.
         int num_elements = 5;
@@ -613,7 +621,7 @@ SCENARIO("Vectorised kernel with simple control flow", "[llvm][runner]") {
         runner.initialize_driver();
 
         THEN("Masked instructions are generated") {
-            runner.run_with_argument<int, void*>("__nrn_state_test_wrapper",
+            runner.run_with_argument<int, void*>("nrn_state_test",
                                                  instance_data.base_ptr);
             std::vector<double> w_expected = {20.0, 20.0, 60.0, 40.0, 50.0};
             REQUIRE(check_instance_variable(instance_info, w_expected, "w"));
@@ -676,9 +684,11 @@ SCENARIO("Kernel with atomic updates", "[llvm][runner]") {
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
                                                  simd_cpu_platform,
-                                                 /*opt_level_ir=*/3);
+                                                 /*opt_level_ir=*/3,
+                                                 /*add_debug_information=*/false,
+                                                 /*fast_math_flags=*/{},
+                                                 /*wrap_kernel_functions=*/true);
         llvm_visitor.visit_program(*ast);
-        llvm_visitor.wrap_kernel_functions();
 
         // Create the instance struct data.
         int num_elements = 5;
@@ -716,7 +726,7 @@ SCENARIO("Kernel with atomic updates", "[llvm][runner]") {
         runner.initialize_driver();
 
         THEN("updates are commputed correctly with vector instructions and optimizations on") {
-            runner.run_with_argument<int, void*>("__nrn_cur_test_wrapper", instance_data.base_ptr);
+            runner.run_with_argument<int, void*>("nrn_cur_test", instance_data.base_ptr);
             // Recall:
             //     ion_ina_id = mech->ion_ina_index[id]
             //     ion_ika_id = mech->ion_ika_index[id]
@@ -772,9 +782,11 @@ SCENARIO("Kernel with atomic updates", "[llvm][runner]") {
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
                                                  simd_cpu_platform,
-                                                 /*opt_level_ir=*/0);
+                                                 /*opt_level_ir=*/0,
+                                                 /*add_debug_information=*/false,
+                                                 /*fast_math_flags=*/{},
+                                                 /*wrap_kernel_functions=*/true);
         llvm_visitor.visit_program(*ast);
-        llvm_visitor.wrap_kernel_functions();
 
         // Create the instance struct data.
         int num_elements = 6;
@@ -812,7 +824,7 @@ SCENARIO("Kernel with atomic updates", "[llvm][runner]") {
         runner.initialize_driver();
 
         THEN("Atomic updates are correct without optimizations") {
-            runner.run_with_argument<int, void*>("__nrn_cur_test_wrapper", instance_data.base_ptr);
+            runner.run_with_argument<int, void*>("nrn_cur_test", instance_data.base_ptr);
             // Recall:
             //     ion_ina_id = mech->ion_ina_index[id]
             //     ion_ika_id = mech->ion_ika_index[id]
@@ -867,9 +879,11 @@ SCENARIO("Kernel with atomic updates", "[llvm][runner]") {
         codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown",
                                                  /*output_dir=*/".",
                                                  simd_cpu_platform,
-                                                 /*opt_level_ir=*/0);
+                                                 /*opt_level_ir=*/0,
+                                                 /*add_debug_information=*/false,
+                                                 /*fast_math_flags=*/{},
+                                                 /*wrap_kernel_functions=*/true);
         llvm_visitor.visit_program(*ast);
-        llvm_visitor.wrap_kernel_functions();
 
         // Create the instance struct data.
         int num_elements = 6;
@@ -908,7 +922,7 @@ SCENARIO("Kernel with atomic updates", "[llvm][runner]") {
         runner.initialize_driver();
 
         THEN("Atomic updates are correct") {
-            runner.run_with_argument<int, void*>("__nrn_cur_test_wrapper", instance_data.base_ptr);
+            runner.run_with_argument<int, void*>("nrn_cur_test", instance_data.base_ptr);
             // Recall:
             //     node_id = mech->node_index[id]
             //     mech->vec_rhs[node_id] -= rhs
diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp
index 77fb3577e0..ff7601d05f 100644
--- a/test/unit/codegen/codegen_llvm_ir.cpp
+++ b/test/unit/codegen/codegen_llvm_ir.cpp
@@ -1015,15 +1015,6 @@ SCENARIO("Scalar state kernel", "[visitor][llvm]") {
             REQUIRE(std::regex_search(module_string, m, condition));
             REQUIRE(std::regex_search(module_string, m, cond_br));
 
-            // Check that loop metadata is attached to the scalar kernel.
-            std::regex loop_metadata(R"(!llvm\.loop !0)");
-            std::regex loop_metadata_self_reference(R"(!0 = distinct !\{!0, !1\})");
-            std::regex loop_metadata_disable_vectorization(
-                R"(!1 = !\{!\"llvm\.loop\.vectorize\.enable\", i1 false\})");
-            REQUIRE(std::regex_search(module_string, m, loop_metadata));
-            REQUIRE(std::regex_search(module_string, m, loop_metadata_self_reference));
-            REQUIRE(std::regex_search(module_string, m, loop_metadata_disable_vectorization));
-
             // Check for correct loads from the struct with GEPs.
             std::regex load_from_struct(
                 "  %.* = load %.*__instance_var__type\\*, %.*__instance_var__type\\*\\* %.*\n"

From d2211e06477ed5b94d834a6fbc634b00e7139976 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Wed, 24 Aug 2022 10:18:01 +0300
Subject: [PATCH 098/105] [LLVM] Instantiate CodegenLLVMVisitor of PyJIT with
 fast math and debug symbols options (#912)

* Added options for fast math and debug symbols on CodegenLLVMVisitor instantiated by NMODL PyJIT
* Disable debug symbols in GPU PyJIT test

Co-authored-by: Ioannis Magkanaris <ioannis.magkanaris@epfl.ch>
---
 src/pybind/pynmodl.cpp      | 8 +++++++-
 test/benchmark/benchmark.py | 4 ++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/pybind/pynmodl.cpp b/src/pybind/pynmodl.cpp
index 3ecdc1eaaa..38e9cb4b53 100644
--- a/src/pybind/pynmodl.cpp
+++ b/src/pybind/pynmodl.cpp
@@ -198,7 +198,13 @@ class JitDriver {
             utils::make_path(cfg.scratch_dir);
         }
         cg_driver.prepare_mod(node, modname);
-        nmodl::codegen::CodegenLLVMVisitor visitor(modname, cfg.output_dir, platform, 0, false, {}, true);
+        nmodl::codegen::CodegenLLVMVisitor visitor(modname,
+                                                   cfg.output_dir,
+                                                   platform,
+                                                   0,
+                                                   !cfg.llvm_no_debug,
+                                                   cfg.llvm_fast_math_flags,
+                                                   true);
         visitor.visit_program(*node);
         const GPUExecutionParameters gpu_execution_parameters{cuda_grid_dim_x, cuda_block_dim_x};
         nmodl::benchmark::LLVMBenchmark benchmark(visitor,
diff --git a/test/benchmark/benchmark.py b/test/benchmark/benchmark.py
index 9144fa549d..55c86bba46 100644
--- a/test/benchmark/benchmark.py
+++ b/test/benchmark/benchmark.py
@@ -25,12 +25,16 @@ def main():
     cfg = nmodl.CodeGenConfig()
     cfg.llvm_vector_width = args.vec
     cfg.llvm_opt_level_ir = 2
+    cfg.llvm_fast_math_flags = ["nnan", "contract", "afn"]
+    cfg.llvm_no_debug = False
     cfg.nmodl_ast = True
     fname = args.file
     if args.gpu:  # GPU enabled
         cfg.llvm_math_library = "libdevice"
         cfg.llvm_gpu_name = "nvptx64"
         cfg.llvm_gpu_target_architecture = "sm_70"
+        # Disable debug symbols generation for GPU code since the PTX generated is not valid
+        cfg.llvm_no_debug = True
         if not os.environ.get("CUDA_HOME"):
             raise RuntimeError("CUDA_HOME environment variable not set")
         cfg.shared_lib_paths = [os.getenv("CUDA_HOME") + "/nvvm/libdevice/libdevice.10.bc"]

From 08555eea7fa7357f43cbacca7d6c9aff76758540 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <ioannis.magkanaris@epfl.ch>
Date: Thu, 15 Sep 2022 20:17:50 +0300
Subject: [PATCH 099/105] Fix issues after rebase

---
 src/codegen/CMakeLists.txt                    | 10 ++++++++++
 src/codegen/codegen_c_visitor.hpp             | 19 +------------------
 src/codegen/codegen_driver.cpp                | 16 +++++++++-------
 src/codegen/llvm/codegen_llvm_visitor.cpp     | 16 ++++------------
 src/codegen/llvm/codegen_llvm_visitor.hpp     |  2 +-
 src/main.cpp                                  |  3 +--
 test/benchmark/CMakeLists.txt                 |  1 +
 test/unit/CMakeLists.txt                      |  1 +
 .../codegen/codegen_llvm_instance_struct.cpp  |  2 +-
 test/unit/codegen/codegen_llvm_visitor.cpp    |  4 ++--
 10 files changed, 31 insertions(+), 43 deletions(-)

diff --git a/src/codegen/CMakeLists.txt b/src/codegen/CMakeLists.txt
index 8ae5e8b140..aeb8beddf2 100644
--- a/src/codegen/CMakeLists.txt
+++ b/src/codegen/CMakeLists.txt
@@ -16,6 +16,16 @@ add_library(
 add_dependencies(codegen lexer util visitor)
 target_link_libraries(codegen PRIVATE util)
 
+# ~~~
+# pybind11::embed adds PYTHON_LIBRARIES to target_link_libraries. To avoid link to
+# libpython, we can use `pybind11::module` interface library from pybind11.
+# ~~~
+if(NOT LINK_AGAINST_PYTHON)
+  target_link_libraries(codegen PRIVATE pybind11::module)
+else()
+  target_link_libraries(codegen PRIVATE pybind11::embed)
+endif()
+
 # copy to build directory to make usable from build directory
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/fast_math.ispc
                ${CMAKE_BINARY_DIR}/include/nmodl/fast_math.ispc COPYONLY)
diff --git a/src/codegen/codegen_c_visitor.hpp b/src/codegen/codegen_c_visitor.hpp
index 45ab7b7441..e419951a65 100644
--- a/src/codegen/codegen_c_visitor.hpp
+++ b/src/codegen/codegen_c_visitor.hpp
@@ -1087,23 +1087,6 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
     virtual void print_backend_compute_routine_decl();
 
 
-    /**
-     * Print channel iterations from which tasks are created
-     *
-     * \note This is not used for the C backend
-     * \param type
-     */
-    virtual void print_channel_iteration_task_begin(BlockType type);
-
-
-    /**
-     * Print end of channel iteration for task
-     *
-     * \note This is not used for the C backend
-     */
-    virtual void print_channel_iteration_task_end();
-
-
     /**
      * Print block start for tiling on channel iteration
      */
@@ -1737,7 +1720,7 @@ class CodegenCVisitor: public visitor::ConstAstVisitor {
      * @param print_initialisers Whether or not default values for variables
      *                           be included in the struct declaration.
      */
-    virtual void print_mechanism_range_var_structure();
+    virtual void print_mechanism_range_var_structure(bool print_initialisers);
 
     /**
      * Print the function that initialize instance structure
diff --git a/src/codegen/codegen_driver.cpp b/src/codegen/codegen_driver.cpp
index be00a66a7b..abec225214 100644
--- a/src/codegen/codegen_driver.cpp
+++ b/src/codegen/codegen_driver.cpp
@@ -8,12 +8,14 @@
 #include <string>
 
 #include "codegen/codegen_driver.hpp"
+#include "codegen/codegen_transform_visitor.hpp"
 #include "codegen_compatibility_visitor.hpp"
 #include "utils/logger.hpp"
 #include "visitors/after_cvode_to_cnexp_visitor.hpp"
 #include "visitors/ast_visitor.hpp"
 #include "visitors/constant_folder_visitor.hpp"
 #include "visitors/global_var_visitor.hpp"
+#include "visitors/implicit_argument_visitor.hpp"
 #include "visitors/inline_visitor.hpp"
 #include "visitors/ispc_rename_visitor.hpp"
 #include "visitors/kinetic_block_visitor.hpp"
@@ -237,20 +239,20 @@ bool CodegenDriver::prepare_mod(std::shared_ptr<ast::Program> node, const std::s
         ast_to_nmodl(*node, filepath("solveblock", "mod"));
     }
 
-    if (json_perfstat) {
+    if (cfg.json_perfstat) {
         std::string file{scratch_dir};
         file.append("/");
         file.append(modfile);
         file.append(".perf.json");
         logger->info("Writing performance statistics to {}", file);
-        PerfVisitor(file).visit_program(*ast);
+        PerfVisitor(file).visit_program(*node);
     }
 
     {
         // Add implicit arguments (like celsius, nt) to NEURON functions (like
         // nrn_ghk, at_time) whose signatures we have to massage.
-        ImplicitArgumentVisitor{}.visit_program(*ast);
-        SymtabVisitor(update_symtab).visit_program(*ast);
+        ImplicitArgumentVisitor{}.visit_program(*node);
+        SymtabVisitor(update_symtab).visit_program(*node);
     }
 
     {
@@ -260,9 +262,9 @@ bool CodegenDriver::prepare_mod(std::shared_ptr<ast::Program> node, const std::s
     }
 
     {
-        CodegenTransformVisitor{}.visit_program(*ast);
-        ast_to_nmodl(*ast, filepath("TransformVisitor"));
-        SymtabVisitor(update_symtab).visit_program(*ast);
+        CodegenTransformVisitor{}.visit_program(*node);
+        ast_to_nmodl(*node, filepath("TransformVisitor", "mod"));
+        SymtabVisitor(update_symtab).visit_program(*node);
     }
     return true;
 }
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index 958e292ce8..e66b024672 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -929,14 +929,14 @@ void CodegenLLVMVisitor::visit_program(const ast::Program& node) {
     utils::save_ir_to_ll_file(*module, output_dir + "/" + mod_filename);
 }
 
-void CodegenLLVMVisitor::print_mechanism_range_var_structure() {
+void CodegenLLVMVisitor::print_mechanism_range_var_structure(bool) {
     printer->add_newline(2);
     printer->add_line("/** Instance Struct passed as argument to LLVM IR kernels */");
     printer->start_block(fmt::format("struct {} ", instance_struct()));
     for (const auto& variable: instance_var_helper.instance->get_codegen_vars()) {
         auto is_pointer = variable->get_is_pointer();
         auto name = to_nmodl(variable->get_name());
-        auto qualifier = is_constant_variable(name) ? k_const() : "";
+        auto qualifier = is_constant_variable(name) ? "const " : "";
         auto nmodl_type = variable->get_type()->get_type();
         auto pointer = is_pointer ? "*" : "";
         auto var_name = variable->get_node_name();
@@ -970,17 +970,11 @@ void CodegenLLVMVisitor::print_instance_variable_setup() {
         print_setup_range_variable();
     }
 
-    if (shadow_vector_setup_required()) {
-        print_shadow_vector_setup();
-    }
     printer->add_newline(2);
     printer->add_line("/** initialize mechanism instance variables */");
     printer->start_block("static inline void setup_instance(NrnThread* nt, Memb_list* ml) ");
     printer->add_line(
         fmt::format("{0}* inst = ({0}*) mem_alloc(1, sizeof({0}));", instance_struct()));
-    if (channel_task_dependency_enabled() && !info.codegen_shadow_variables.empty()) {
-        printer->add_line("setup_shadow_vectors(inst, ml);");
-    }
 
     std::string stride;
     printer->add_line("int pnodecount = ml->_nodecount_padded;");
@@ -1001,8 +995,7 @@ void CodegenLLVMVisitor::print_instance_variable_setup() {
         auto range_var_type = get_range_var_float_type(var);
         if (float_type == range_var_type) {
             auto variable = fmt::format("ml->data+{}{}", id, stride);
-            auto device_variable = get_variable_device_pointer(variable, float_type_pointer);
-            printer->add_line(fmt::format("inst->{} = {};", name, device_variable));
+            printer->add_line(fmt::format("inst->{} = {};", name, variable));
         } else {
             printer->add_line(fmt::format(
                 "inst->{} = setup_range_variable(ml->data+{}{}, pnodecount);", name, id, stride));
@@ -1025,8 +1018,7 @@ void CodegenLLVMVisitor::print_instance_variable_setup() {
             variable = "nt->_data";
             type = info.artificial_cell ? "void*" : float_type_pointer;
         }
-        auto device_variable = get_variable_device_pointer(variable, type);
-        printer->add_line(fmt::format("inst->{} = {};", name, device_variable));
+        printer->add_line(fmt::format("inst->{} = {};", name, variable));
     }
 
     int index_id = 0;
diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
index c4333e75ee..43b0cbd3c7 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.hpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -261,7 +261,7 @@ class CodegenLLVMVisitor: public CodegenCVisitor {
     void print_wrapper_routines() override;
     void print_wrapper_headers_include();
     void print_data_structures();
-    void print_mechanism_range_var_structure() override;
+    void print_mechanism_range_var_structure(bool) override;
     void print_instance_variable_setup() override;
 
     /**
diff --git a/src/main.cpp b/src/main.cpp
index f05a8a8cc2..80e3fcb5fb 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -14,7 +14,6 @@
 #include "codegen/codegen_c_visitor.hpp"
 #include "codegen/codegen_cuda_visitor.hpp"
 #include "codegen/codegen_ispc_visitor.hpp"
-#include "codegen/codegen_transform_visitor.hpp"
 #ifdef NMODL_LLVM_BACKEND
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
 #include "test/benchmark/llvm_benchmark.hpp"
@@ -277,7 +276,7 @@ int main(int argc, const char* argv[]) {
 
     // if any of the other backends is used we force the C backend to be off.
     if (cfg.ispc_backend) {
-        c_backend = false;
+        cfg.c_backend = false;
     }
 
     utils::make_path(cfg.output_dir);
diff --git a/test/benchmark/CMakeLists.txt b/test/benchmark/CMakeLists.txt
index b0f36e381c..aec532d712 100644
--- a/test/benchmark/CMakeLists.txt
+++ b/test/benchmark/CMakeLists.txt
@@ -16,6 +16,7 @@ endif()
 include_directories(${LLVM_INCLUDE_DIRS})
 add_library(llvm_benchmark STATIC ${LLVM_BENCHMARK_SOURCE_FILES})
 add_dependencies(llvm_benchmark lexer util visitor)
+target_link_libraries(llvm_benchmark PRIVATE util)
 if(NMODL_ENABLE_LLVM_CUDA)
   target_link_libraries(llvm_benchmark PRIVATE CUDA::cudart CUDA::nvrtc)
 endif()
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 0dbc1df6a3..6242da5f14 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -112,6 +112,7 @@ if(NMODL_ENABLE_LLVM)
 
   add_library(benchmark_data STATIC codegen/codegen_data_helper.cpp)
   add_dependencies(benchmark_data lexer)
+  target_link_libraries(benchmark_data PRIVATE util)
 
   add_executable(
     testllvm visitor/main.cpp codegen/codegen_llvm_ir.cpp codegen/codegen_data_helper.cpp
diff --git a/test/unit/codegen/codegen_llvm_instance_struct.cpp b/test/unit/codegen/codegen_llvm_instance_struct.cpp
index 41cec12194..32fb9a1b6d 100644
--- a/test/unit/codegen/codegen_llvm_instance_struct.cpp
+++ b/test/unit/codegen/codegen_llvm_instance_struct.cpp
@@ -5,7 +5,7 @@
  * Lesser General Public License. See top-level LICENSE file for details.
  *************************************************************************/
 
-#include <catch/catch.hpp>
+#include <catch2/catch.hpp>
 
 #include "ast/all.hpp"
 #include "ast/program.hpp"
diff --git a/test/unit/codegen/codegen_llvm_visitor.cpp b/test/unit/codegen/codegen_llvm_visitor.cpp
index c055c0747a..f86552459b 100644
--- a/test/unit/codegen/codegen_llvm_visitor.cpp
+++ b/test/unit/codegen/codegen_llvm_visitor.cpp
@@ -5,7 +5,7 @@
  * Lesser General Public License. See top-level LICENSE file for details.
  *************************************************************************/
 
-#include <catch/catch.hpp>
+#include <catch2/catch.hpp>
 
 #include "ast/program.hpp"
 #include "codegen/codegen_helper_visitor.hpp"
@@ -45,7 +45,7 @@ std::string get_wrapper_instance_struct(const std::string& nmodl_text) {
     codegen::CodegenLLVMVisitor llvm_visitor("hh.mod", oss, cpu_platform, 0);
     llvm_visitor.visit_program(*ast);
     strbuf.str("");
-    llvm_visitor.print_mechanism_range_var_structure();
+    llvm_visitor.print_mechanism_range_var_structure(false);
     llvm_visitor.print_instance_variable_setup();
     return strbuf.str();
 }

From 2188bb2eaa1c3be754abf74cf9d79dc768dd3a50 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <ioannis.magkanaris@epfl.ch>
Date: Thu, 15 Sep 2022 20:25:05 +0300
Subject: [PATCH 100/105] Update desired cmake version in github actions

---
 .github/workflows/coverage.yml  | 2 +-
 .github/workflows/nmodl-ci.yml  | 2 +-
 .github/workflows/nmodl-doc.yml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index 4f9e17eac7..350ef1da65 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -18,7 +18,7 @@ on:
 env:
   CMAKE_BUILD_PARALLEL_LEVEL: 3
   CTEST_PARALLEL_LEVEL: 1
-  DESIRED_CMAKE_VERSION: 3.15.0
+  DESIRED_CMAKE_VERSION: 3.17.0
   PYTHON_VERSION: 3.8
 
 jobs:
diff --git a/.github/workflows/nmodl-ci.yml b/.github/workflows/nmodl-ci.yml
index 7223bfeeba..b08e344881 100644
--- a/.github/workflows/nmodl-ci.yml
+++ b/.github/workflows/nmodl-ci.yml
@@ -17,7 +17,7 @@ on:
 env:
   CTEST_PARALLEL_LEVEL: 1
   PYTHON_VERSION: 3.8
-  DESIRED_CMAKE_VERSION: 3.15.0
+  DESIRED_CMAKE_VERSION: 3.17.0
 
 jobs:
   ci:
diff --git a/.github/workflows/nmodl-doc.yml b/.github/workflows/nmodl-doc.yml
index f1d9e75fdf..167ae91911 100644
--- a/.github/workflows/nmodl-doc.yml
+++ b/.github/workflows/nmodl-doc.yml
@@ -17,7 +17,7 @@ on:
 env:
   BUILD_TYPE: Release
   PYTHON_VERSION: 3.8
-  DESIRED_CMAKE_VERSION: 3.15.0
+  DESIRED_CMAKE_VERSION: 3.17.0
 
 jobs:
   ci:

From 958e9cd0b119ffbba114b5d38023c6a709cb3675 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <ioannis.magkanaris@epfl.ch>
Date: Thu, 15 Sep 2022 20:30:37 +0300
Subject: [PATCH 101/105] Make Clang and CMake format happy

---
 src/codegen/codegen_info.cpp                  |  2 +-
 src/codegen/llvm/annotation.cpp               | 12 ++++----
 src/codegen/llvm/annotation.hpp               |  6 ++--
 .../llvm/codegen_llvm_helper_visitor.cpp      |  7 +++--
 src/codegen/llvm/codegen_llvm_visitor.cpp     |  2 +-
 src/codegen/llvm/llvm_ir_builder.cpp          |  7 +++--
 src/codegen/llvm/llvm_utils.cpp               |  6 ++--
 .../llvm/replace_with_lib_functions.cpp       | 28 +++++++++----------
 .../llvm/replace_with_lib_functions.hpp       |  4 ++-
 test/benchmark/CMakeLists.txt                 |  2 +-
 test/benchmark/jit_driver.hpp                 |  4 +--
 test/benchmark/llvm_benchmark.hpp             |  1 -
 test/unit/codegen/codegen_llvm_execution.cpp  | 12 +++-----
 13 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/src/codegen/codegen_info.cpp b/src/codegen/codegen_info.cpp
index e2415fb6cb..dc461d6399 100644
--- a/src/codegen/codegen_info.cpp
+++ b/src/codegen/codegen_info.cpp
@@ -371,7 +371,7 @@ void CodegenInfo::get_float_variables() {
 
     if (breakpoint_exist()) {
         std::string name = vectorize ? naming::CONDUCTANCE_UNUSED_VARIABLE
-                                          : naming::CONDUCTANCE_VARIABLE;
+                                     : naming::CONDUCTANCE_VARIABLE;
 
         // make sure conductance variable like `g` is not already defined
         if (auto r = std::find_if(codegen_float_variables.cbegin(),
diff --git a/src/codegen/llvm/annotation.cpp b/src/codegen/llvm/annotation.cpp
index 6bf437e309..1427b8cd1a 100644
--- a/src/codegen/llvm/annotation.cpp
+++ b/src/codegen/llvm/annotation.cpp
@@ -25,14 +25,15 @@ namespace custom {
 
 void Annotator::add_nmodl_compute_kernel_annotation(llvm::Function& function) {
     llvm::LLVMContext& context = function.getContext();
-    llvm::MDNode* node = llvm::MDNode::get(context, llvm::MDString::get(context, nmodl_compute_kernel));
+    llvm::MDNode* node = llvm::MDNode::get(context,
+                                           llvm::MDString::get(context, nmodl_compute_kernel));
     function.setMetadata(nmodl_annotations, node);
 }
 
 bool Annotator::has_nmodl_compute_kernel_annotation(llvm::Function& function) {
     if (!function.hasMetadata(nmodl_annotations))
         return false;
-    
+
     llvm::MDNode* node = function.getMetadata(nmodl_annotations);
     std::string type = llvm::cast<llvm::MDString>(node->getOperand(0))->getString().str();
     return type == nmodl_compute_kernel;
@@ -54,12 +55,12 @@ void DefaultCPUAnnotator::annotate(llvm::Function& function) const {
     function.addParamAttr(0, llvm::Attribute::NoAlias);
 
     // Finally, specify that the mechanism data struct pointer does not
-    // capture and is read-only. 
+    // capture and is read-only.
     function.addParamAttr(0, llvm::Attribute::NoCapture);
     function.addParamAttr(0, llvm::Attribute::ReadOnly);
 }
 
-void CUDAAnnotator::annotate(llvm::Function& function) const {    
+void CUDAAnnotator::annotate(llvm::Function& function) const {
     llvm::LLVMContext& context = function.getContext();
     llvm::Module* m = function.getParent();
 
@@ -83,8 +84,7 @@ bool AnnotationPass::runOnModule(Module& module) {
     bool modified = false;
 
     for (auto& function: module.getFunctionList()) {
-        if (!function.isDeclaration() &&
-            Annotator::has_nmodl_compute_kernel_annotation(function)) {
+        if (!function.isDeclaration() && Annotator::has_nmodl_compute_kernel_annotation(function)) {
             annotator->annotate(function);
             modified = true;
         }
diff --git a/src/codegen/llvm/annotation.hpp b/src/codegen/llvm/annotation.hpp
index c2a8367713..b7349a147f 100644
--- a/src/codegen/llvm/annotation.hpp
+++ b/src/codegen/llvm/annotation.hpp
@@ -15,17 +15,17 @@ namespace custom {
 
 /**
  * \class Annotator
- * \brief Base class that can be overriden to specify function annotations. 
+ * \brief Base class that can be overriden to specify function annotations.
  */
 class Annotator {
   public:
     virtual void annotate(llvm::Function& function) const = 0;
     virtual ~Annotator() = default;
 
-    /// Marks LLVM function as NMODL compute kernel. 
+    /// Marks LLVM function as NMODL compute kernel.
     static void add_nmodl_compute_kernel_annotation(llvm::Function& function);
 
-    /// Returns true if LLVM function is marked as NMODL compute kernel. 
+    /// Returns true if LLVM function is marked as NMODL compute kernel.
     static bool has_nmodl_compute_kernel_annotation(llvm::Function& function);
 };
 
diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
index 6e70a9aa63..af28ee3e2e 100644
--- a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp
@@ -192,7 +192,8 @@ void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) {
 
     /// we have all information for code generation function, create a new node
     /// which will be inserted later into AST
-    auto function = std::make_shared<ast::CodegenFunction>(fun_ret_type, name, arguments, block, /*is_kernel=*/0);
+    auto function = std::make_shared<ast::CodegenFunction>(
+        fun_ret_type, name, arguments, block, /*is_kernel=*/0);
     if (node.get_token()) {
         function->set_token(*node.get_token()->clone());
     }
@@ -731,8 +732,8 @@ void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) {
     code_arguments.emplace_back(instance_var);
 
     /// finally, create new function
-    auto function =
-        std::make_shared<ast::CodegenFunction>(return_type, name, code_arguments, function_block, /*is_kernel=*/1);
+    auto function = std::make_shared<ast::CodegenFunction>(
+        return_type, name, code_arguments, function_block, /*is_kernel=*/1);
     codegen_functions.push_back(function);
 
     // todo: remove this, temporary
diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp
index e66b024672..a114150491 100644
--- a/src/codegen/llvm/codegen_llvm_visitor.cpp
+++ b/src/codegen/llvm/codegen_llvm_visitor.cpp
@@ -6,8 +6,8 @@
  *************************************************************************/
 
 #include "codegen/llvm/codegen_llvm_visitor.hpp"
-#include "codegen/llvm/llvm_utils.hpp"
 #include "codegen/llvm/annotation.hpp"
+#include "codegen/llvm/llvm_utils.hpp"
 
 #include "ast/all.hpp"
 #include "utils/logger.hpp"
diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
index 490e20d13d..c8ad11c205 100644
--- a/src/codegen/llvm/llvm_ir_builder.cpp
+++ b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -158,9 +158,10 @@ void IRBuilder::allocate_function_arguments(llvm::Function* function,
     }
 }
 
-void IRBuilder::allocate_and_wrap_kernel_arguments(llvm::Function* function,
-                                                   const ast::CodegenVarWithTypeVector& nmodl_arguments,
-                                                   llvm::Type* struct_type) {
+void IRBuilder::allocate_and_wrap_kernel_arguments(
+    llvm::Function* function,
+    const ast::CodegenVarWithTypeVector& nmodl_arguments,
+    llvm::Type* struct_type) {
     // In theory, this should never happen but let's guard anyway.
     if (nmodl_arguments.size() != 1) {
         throw std::runtime_error("Error: NMODL computer kernel must have a single argument\n");
diff --git a/src/codegen/llvm/llvm_utils.cpp b/src/codegen/llvm/llvm_utils.cpp
index e74d878989..075af794b0 100644
--- a/src/codegen/llvm/llvm_utils.cpp
+++ b/src/codegen/llvm/llvm_utils.cpp
@@ -6,8 +6,8 @@
  *************************************************************************/
 
 #include "codegen/llvm/llvm_utils.hpp"
-#include "codegen/llvm/replace_with_lib_functions.hpp"
 #include "codegen/llvm/annotation.hpp"
+#include "codegen/llvm/replace_with_lib_functions.hpp"
 
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
@@ -180,7 +180,7 @@ void optimise_module(llvm::Module& module, int opt_level, llvm::TargetMachine* t
 void replace_with_lib_functions(codegen::Platform& platform, llvm::Module& module) {
     llvm::legacy::PassManager pm;
 
-    Replacer *replacer = nullptr;
+    Replacer* replacer = nullptr;
     if (platform.is_CUDA_gpu()) {
         replacer = new custom::CUDAReplacer();
     } else {
@@ -195,7 +195,7 @@ void replace_with_lib_functions(codegen::Platform& platform, llvm::Module& modul
 void annotate(codegen::Platform& platform, llvm::Module& module) {
     llvm::legacy::PassManager pm;
 
-    Annotator *annotator = nullptr;
+    Annotator* annotator = nullptr;
     if (platform.is_CUDA_gpu()) {
         annotator = new custom::CUDAAnnotator();
     } else {
diff --git a/src/codegen/llvm/replace_with_lib_functions.cpp b/src/codegen/llvm/replace_with_lib_functions.cpp
index 140142fdc0..984e83d912 100644
--- a/src/codegen/llvm/replace_with_lib_functions.cpp
+++ b/src/codegen/llvm/replace_with_lib_functions.cpp
@@ -22,7 +22,8 @@ namespace nmodl {
 namespace custom {
 
 Patterns DefaultCPUReplacer::patterns() const {
-    throw std::runtime_error("Error: DefaultCPUReplacer has no patterns and uses built-in LLVM passes instead.\n");
+    throw std::runtime_error(
+        "Error: DefaultCPUReplacer has no patterns and uses built-in LLVM passes instead.\n");
 }
 
 std::string DefaultCPUReplacer::get_library_name() {
@@ -30,16 +31,14 @@ std::string DefaultCPUReplacer::get_library_name() {
 }
 
 Patterns CUDAReplacer::patterns() const {
-    return {
-        {"llvm.exp.f32", "__nv_expf"},
-        {"llvm.exp.f64", "__nv_exp"},
-        {"llvm.pow.f32", "__nv_powf"},
-        {"llvm.pow.f64", "__nv_pow"},
-        {"llvm.log.f32", "__nv_logf"},
-        {"llvm.log.f64", "__nv_log"},
-        {"llvm.fabs.f32", "__nv_fabsf"},
-        {"llvm.fabs.f64", "__nv_fabs"}
-    };
+    return {{"llvm.exp.f32", "__nv_expf"},
+            {"llvm.exp.f64", "__nv_exp"},
+            {"llvm.pow.f32", "__nv_powf"},
+            {"llvm.pow.f64", "__nv_pow"},
+            {"llvm.log.f32", "__nv_logf"},
+            {"llvm.log.f64", "__nv_log"},
+            {"llvm.fabs.f32", "__nv_fabsf"},
+            {"llvm.fabs.f64", "__nv_fabs"}};
 }
 }  // namespace custom
 }  // namespace nmodl
@@ -114,7 +113,7 @@ void ReplacePass::getAnalysisUsage(AnalysisUsage& au) const {
 void ReplacePass::add_vectorizable_functions_from_vec_lib(TargetLibraryInfoImpl& tli,
                                                           Triple& triple) {
     // Since LLVM does not support SLEEF as a vector library yet, process it separately.
-    if (((DefaultCPUReplacer*)replacer)->get_library_name() == "SLEEF") {
+    if (((DefaultCPUReplacer*) replacer)->get_library_name() == "SLEEF") {
 // clang-format off
 #define FIXED(w) ElementCount::getFixed(w)
 // clang-format on
@@ -165,10 +164,11 @@ void ReplacePass::add_vectorizable_functions_from_vec_lib(TargetLibraryInfoImpl&
             {"none", VecLib::NoLibrary},
             {"SVML", VecLib::SVML}};
 
-        const auto& library = llvm_supported_vector_libraries.find(((DefaultCPUReplacer*)replacer)->get_library_name());
+        const auto& library = llvm_supported_vector_libraries.find(
+            ((DefaultCPUReplacer*) replacer)->get_library_name());
         if (library == llvm_supported_vector_libraries.end())
             throw std::runtime_error("Error: unknown vector library - " +
-                                     ((DefaultCPUReplacer*)replacer)->get_library_name() + "\n");
+                                     ((DefaultCPUReplacer*) replacer)->get_library_name() + "\n");
 
         // Add vectorizable functions to the target library info.
         if (library->second != VecLib::LIBMVEC_X86 || (triple.isX86() && triple.isArch64Bit())) {
diff --git a/src/codegen/llvm/replace_with_lib_functions.hpp b/src/codegen/llvm/replace_with_lib_functions.hpp
index dfd97d3bbd..5385505686 100644
--- a/src/codegen/llvm/replace_with_lib_functions.hpp
+++ b/src/codegen/llvm/replace_with_lib_functions.hpp
@@ -39,9 +39,11 @@ class Replacer {
 class DefaultCPUReplacer: public Replacer {
   private:
     std::string library_name;
+
   public:
     DefaultCPUReplacer(std::string library_name)
-      : Replacer(), library_name(library_name) {}
+        : Replacer()
+        , library_name(library_name) {}
 
     Patterns patterns() const final override;
 
diff --git a/test/benchmark/CMakeLists.txt b/test/benchmark/CMakeLists.txt
index aec532d712..f2f45e1f41 100644
--- a/test/benchmark/CMakeLists.txt
+++ b/test/benchmark/CMakeLists.txt
@@ -44,7 +44,7 @@ if(NMODL_ENABLE_PYTHON_BINDINGS)
     if(NMODL_ENABLE_LLVM_CUDA)
       add_test(NAME "PyJIT/${modfile_name}_gpu"
                COMMAND ${PYTHON_EXECUTABLE} ${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/benchmark.py
-                      --file ${modfile} --gpu ${extra_args})
+                       --file ${modfile} --gpu ${extra_args})
       message(STATUS "CUDA_HOME is ${CUDAToolkit_TARGET_DIR}")
       set_tests_properties(
         "PyJIT/${modfile_name}_gpu"
diff --git a/test/benchmark/jit_driver.hpp b/test/benchmark/jit_driver.hpp
index 3569c4bd4f..96b46a447c 100644
--- a/test/benchmark/jit_driver.hpp
+++ b/test/benchmark/jit_driver.hpp
@@ -60,7 +60,7 @@ class JITDriver {
         if (!expected_symbol)
             throw std::runtime_error("Error: entry-point symbol not found in JIT\n");
 
-        auto(*res)() = (ReturnType(*)())(intptr_t) expected_symbol->getAddress();
+        auto (*res)() = (ReturnType(*)())(intptr_t) expected_symbol->getAddress();
         ReturnType result = res();
         return result;
     }
@@ -72,7 +72,7 @@ class JITDriver {
         if (!expected_symbol)
             throw std::runtime_error("Error: entry-point symbol not found in JIT\n");
 
-        auto(*res)(ArgType) = (ReturnType(*)(ArgType))(intptr_t) expected_symbol->getAddress();
+        auto (*res)(ArgType) = (ReturnType(*)(ArgType))(intptr_t) expected_symbol->getAddress();
         ReturnType result = res(arg);
         return result;
     }
diff --git a/test/benchmark/llvm_benchmark.hpp b/test/benchmark/llvm_benchmark.hpp
index 38c32784de..0d63bf78ba 100644
--- a/test/benchmark/llvm_benchmark.hpp
+++ b/test/benchmark/llvm_benchmark.hpp
@@ -124,7 +124,6 @@ class LLVMBenchmark {
     BenchmarkResults run();
 
   private:
-
     /// Sets the log output stream (file or console).
     void set_log_output();
 };
diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp
index 73c91b6e4d..2c9262902b 100644
--- a/test/unit/codegen/codegen_llvm_execution.cpp
+++ b/test/unit/codegen/codegen_llvm_execution.cpp
@@ -345,8 +345,7 @@ SCENARIO("Simple scalar kernel", "[llvm][runner]") {
         runner.initialize_driver();
 
         THEN("Values in struct have changed according to the formula") {
-            runner.run_with_argument<int, void*>("nrn_state_test",
-                                                 instance_data.base_ptr);
+            runner.run_with_argument<int, void*>("nrn_state_test", instance_data.base_ptr);
             std::vector<double> x_expected = {4.0, 3.0, 2.0, 1.0};
             REQUIRE(check_instance_variable(instance_info, x_expected, "x"));
         }
@@ -438,8 +437,7 @@ SCENARIO("Simple vectorised kernel", "[llvm][runner]") {
         runner.initialize_driver();
 
         THEN("Values in struct have changed according to the formula") {
-            runner.run_with_argument<int, void*>("nrn_state_test",
-                                                 instance_data.base_ptr);
+            runner.run_with_argument<int, void*>("nrn_state_test", instance_data.base_ptr);
             // Check that the main and remainder loops correctly change the data stored in x.
             std::vector<float> x_expected = {10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
             REQUIRE(check_instance_variable<float>(instance_info, x_expected, "x"));
@@ -518,8 +516,7 @@ SCENARIO("Vectorised kernel with scatter instruction", "[llvm][runner]") {
         runner.initialize_driver();
 
         THEN("Ion values in struct have been updated correctly") {
-            runner.run_with_argument<int, void*>("nrn_state_test",
-                                                 instance_data.base_ptr);
+            runner.run_with_argument<int, void*>("nrn_state_test", instance_data.base_ptr);
             // cai[id] = ion_cai[ion_cai_index[id]]
             // cai[id] += 1
             std::vector<double> cai_expected = {6.0, 4.0, 5.0, 2.0, 3.0};
@@ -621,8 +618,7 @@ SCENARIO("Vectorised kernel with simple control flow", "[llvm][runner]") {
         runner.initialize_driver();
 
         THEN("Masked instructions are generated") {
-            runner.run_with_argument<int, void*>("nrn_state_test",
-                                                 instance_data.base_ptr);
+            runner.run_with_argument<int, void*>("nrn_state_test", instance_data.base_ptr);
             std::vector<double> w_expected = {20.0, 20.0, 60.0, 40.0, 50.0};
             REQUIRE(check_instance_variable(instance_info, w_expected, "w"));
 

From c5678e55e8241685e72ef799acd061888a23b65a Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <ioannis.magkanaris@epfl.ch>
Date: Thu, 15 Sep 2022 20:35:28 +0300
Subject: [PATCH 102/105] Disable llvm backend by default

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 18a78bcc94..5b8a2c83f7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,8 +22,8 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
 # =============================================================================
 option(NMODL_ENABLE_PYTHON_BINDINGS "Enable pybind11 based python bindings" ON)
 option(NMODL_ENABLE_LEGACY_UNITS "Use original faraday, R, etc. instead of 2019 nist constants" OFF)
-option(NMODL_ENABLE_LLVM "Enable LLVM based code generation" ON)
-option(NMODL_ENABLE_LLVM_GPU "Enable LLVM based GPU code generation" ON)
+option(NMODL_ENABLE_LLVM "Enable LLVM based code generation" OFF)
+option(NMODL_ENABLE_LLVM_GPU "Enable LLVM based GPU code generation" OFF)
 option(NMODL_ENABLE_LLVM_CUDA "Enable LLVM CUDA backend to run GPU benchmark" OFF)
 option(NMODL_ENABLE_JIT_EVENT_LISTENERS "Enable JITEventListener for Perf and Vtune" OFF)
 

From fee9a0baf895b21c8a72ea02c4dcf69b4f21c8c5 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Fri, 16 Sep 2022 13:07:35 +0200
Subject: [PATCH 103/105] Fix benchmark linking with visitors

---
 test/benchmark/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/benchmark/CMakeLists.txt b/test/benchmark/CMakeLists.txt
index f2f45e1f41..da72c43514 100644
--- a/test/benchmark/CMakeLists.txt
+++ b/test/benchmark/CMakeLists.txt
@@ -16,7 +16,7 @@ endif()
 include_directories(${LLVM_INCLUDE_DIRS})
 add_library(llvm_benchmark STATIC ${LLVM_BENCHMARK_SOURCE_FILES})
 add_dependencies(llvm_benchmark lexer util visitor)
-target_link_libraries(llvm_benchmark PRIVATE util)
+target_link_libraries(llvm_benchmark PRIVATE util visitor)
 if(NMODL_ENABLE_LLVM_CUDA)
   target_link_libraries(llvm_benchmark PRIVATE CUDA::cudart CUDA::nvrtc)
 endif()

From a967a3f60f096efee9737c3029b35bd956544d29 Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Fri, 16 Sep 2022 16:08:52 +0200
Subject: [PATCH 104/105] Setup sanitizer options for new test

---
 test/integration/CMakeLists.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/integration/CMakeLists.txt b/test/integration/CMakeLists.txt
index e0bb995c30..a760ff96f8 100644
--- a/test/integration/CMakeLists.txt
+++ b/test/integration/CMakeLists.txt
@@ -11,8 +11,10 @@ file(GLOB modfiles "${NMODL_PROJECT_SOURCE_DIR}/test/integration/mod/*.mod")
 foreach(modfile ${modfiles})
   get_filename_component(modfile_name "${modfile}" NAME)
   add_test(NAME ${modfile_name} COMMAND ${CMAKE_BINARY_DIR}/bin/nmodl ${modfile})
-  cpp_cc_configure_sanitizers(TEST ${modfile_name})
   add_test(NAME ${modfile_name}_oacc COMMAND ${PROJECT_BINARY_DIR}/bin/nmodl ${modfile} host --c
                                              acc --oacc)
   add_test(NAME ${modfile_name}_ispc COMMAND ${PROJECT_BINARY_DIR}/bin/nmodl ${modfile} host --ispc)
+  cpp_cc_configure_sanitizers(TEST ${modfile_name})
+  cpp_cc_configure_sanitizers(TEST ${modfile_name}_oacc)
+  cpp_cc_configure_sanitizers(TEST ${modfile_name}_ispc)
 endforeach()

From de5b1447c554c7443e8ca6748becde9a657ab89e Mon Sep 17 00:00:00 2001
From: Ioannis Magkanaris <iomagkanaris@gmail.com>
Date: Fri, 16 Sep 2022 17:16:02 +0200
Subject: [PATCH 105/105] Added supression for undefined behavior in std random
 number generator

---
 .sanitizers/undefined.supp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.sanitizers/undefined.supp b/.sanitizers/undefined.supp
index 6eb545faad..eb93e8c175 100644
--- a/.sanitizers/undefined.supp
+++ b/.sanitizers/undefined.supp
@@ -1,3 +1,4 @@
 implicit-integer-sign-change:double vector[2] Eigen::internal::pabs<double vector[2]>(double vector[2] const&)
 unsigned-integer-overflow:nmodl::fast_math::vexp(double)
 unsigned-integer-overflow:nmodl::fast_math::vexpm1(double)
+unsigned-integer-overflow:std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::_M_gen_rand()
\ No newline at end of file