diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 4f9e17eac7..350ef1da65 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -18,7 +18,7 @@ on: env: CMAKE_BUILD_PARALLEL_LEVEL: 3 CTEST_PARALLEL_LEVEL: 1 - DESIRED_CMAKE_VERSION: 3.15.0 + DESIRED_CMAKE_VERSION: 3.17.0 PYTHON_VERSION: 3.8 jobs: diff --git a/.github/workflows/nmodl-ci.yml b/.github/workflows/nmodl-ci.yml index 7223bfeeba..b08e344881 100644 --- a/.github/workflows/nmodl-ci.yml +++ b/.github/workflows/nmodl-ci.yml @@ -17,7 +17,7 @@ on: env: CTEST_PARALLEL_LEVEL: 1 PYTHON_VERSION: 3.8 - DESIRED_CMAKE_VERSION: 3.15.0 + DESIRED_CMAKE_VERSION: 3.17.0 jobs: ci: diff --git a/.github/workflows/nmodl-doc.yml b/.github/workflows/nmodl-doc.yml index f1d9e75fdf..167ae91911 100644 --- a/.github/workflows/nmodl-doc.yml +++ b/.github/workflows/nmodl-doc.yml @@ -17,7 +17,7 @@ on: env: BUILD_TYPE: Release PYTHON_VERSION: 3.8 - DESIRED_CMAKE_VERSION: 3.15.0 + DESIRED_CMAKE_VERSION: 3.17.0 jobs: ci: diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ee3c248543..243113876a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -41,7 +41,8 @@ trigger cvf: .spack_nmodl: variables: SPACK_PACKAGE: nmodl - SPACK_PACKAGE_SPEC: ~legacy-unit+python + SPACK_PACKAGE_SPEC: ~legacy-unit+python+llvm + SPACK_INSTALL_EXTRA_FLAGS: -v spack_setup: extends: .spack_setup_ccache @@ -65,14 +66,6 @@ build:intel: variables: SPACK_PACKAGE_COMPILER: intel -build:nvhpc: - extends: - - .spack_build - - .spack_nmodl - variables: - SPACK_PACKAGE_COMPILER: nvhpc - SPACK_PACKAGE_DEPENDENCIES: ^bison%gcc^flex%gcc^py-jinja2%gcc^py-sympy%gcc^py-pyyaml%gcc - .nmodl_tests: variables: # https://github.com/BlueBrain/nmodl/issues/737 @@ -84,8 +77,30 @@ test:intel: - .nmodl_tests needs: ["build:intel"] -test:nvhpc: +.benchmark_config: + variables: + bb5_ntasks: 1 + bb5_cpus_per_task: 1 + bb5_memory: 16G + bb5_exclusive: full + bb5_constraint: volta # V100 GPU node + +.build_allocation: + variables: + bb5_ntasks: 2 # so we block 16 cores + bb5_cpus_per_task: 8 # ninja -j {this} + bb5_memory: 76G # ~16*384/80 + +build_cuda:gcc: + extends: [.spack_build, .build_allocation] + variables: + SPACK_PACKAGE: nmodl + SPACK_PACKAGE_SPEC: ~legacy-unit+python+llvm+llvm_cuda + SPACK_INSTALL_EXTRA_FLAGS: -v + SPACK_PACKAGE_COMPILER: gcc + +test_benchmark:gcc: extends: + - .benchmark_config - .ctest - - .nmodl_tests - needs: ["build:nvhpc"] + needs: ["build_cuda:gcc"] diff --git a/.sanitizers/undefined.supp b/.sanitizers/undefined.supp index 6eb545faad..eb93e8c175 100644 --- a/.sanitizers/undefined.supp +++ b/.sanitizers/undefined.supp @@ -1,3 +1,4 @@ implicit-integer-sign-change:double vector[2] Eigen::internal::pabs(double vector[2] const&) unsigned-integer-overflow:nmodl::fast_math::vexp(double) unsigned-integer-overflow:nmodl::fast_math::vexpm1(double) +unsigned-integer-overflow:std::mersenne_twister_engine::_M_gen_rand() \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index e9c5942c33..5b8a2c83f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ # See top-level LICENSE file for details. # ============================================================================= -cmake_minimum_required(VERSION 3.15 FATAL_ERROR) +cmake_minimum_required(VERSION 3.17 FATAL_ERROR) project(NMODL LANGUAGES CXX) @@ -22,6 +22,11 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin) # ============================================================================= option(NMODL_ENABLE_PYTHON_BINDINGS "Enable pybind11 based python bindings" ON) option(NMODL_ENABLE_LEGACY_UNITS "Use original faraday, R, etc. instead of 2019 nist constants" OFF) +option(NMODL_ENABLE_LLVM "Enable LLVM based code generation" OFF) +option(NMODL_ENABLE_LLVM_GPU "Enable LLVM based GPU code generation" OFF) +option(NMODL_ENABLE_LLVM_CUDA "Enable LLVM CUDA backend to run GPU benchmark" OFF) +option(NMODL_ENABLE_JIT_EVENT_LISTENERS "Enable JITEventListener for Perf and Vtune" OFF) + if(NMODL_ENABLE_LEGACY_UNITS) add_definitions(-DUSE_LEGACY_UNITS) endif() @@ -174,6 +179,21 @@ cpp_cc_find_python_module(sympy 1.3 REQUIRED) cpp_cc_find_python_module(textwrap 0.9 REQUIRED) cpp_cc_find_python_module(yaml 3.12 REQUIRED) +# ============================================================================= +# Find LLVM dependencies +# ============================================================================= +if(NMODL_ENABLE_LLVM) + include(cmake/LLVMHelper.cmake) + include_directories(${LLVM_INCLUDE_DIRS}) + add_definitions(-DNMODL_LLVM_BACKEND) + if(NMODL_ENABLE_LLVM_CUDA) + enable_language(CUDA) + find_package(CUDAToolkit) + include_directories(${CUDAToolkit_INCLUDE_DIRS}) + add_definitions(-DNMODL_LLVM_CUDA_BACKEND) + endif() +endif() + # ============================================================================= # Compiler specific flags for external submodules # ============================================================================= @@ -207,6 +227,9 @@ set(MEMORYCHECK_COMMAND_OPTIONS # do not enable tests if nmodl is used as submodule if(NOT NMODL_AS_SUBPROJECT) include(CTest) + if(NMODL_ENABLE_LLVM) + add_subdirectory(test/benchmark) + endif() add_subdirectory(test/unit) add_subdirectory(test/integration) endif() @@ -271,6 +294,19 @@ message(STATUS "Python Bindings | ${NMODL_ENABLE_PYTHON_BINDINGS}") message(STATUS "Flex | ${FLEX_EXECUTABLE}") message(STATUS "Bison | ${BISON_EXECUTABLE}") message(STATUS "Python | ${PYTHON_EXECUTABLE}") +message(STATUS "LLVM Codegen | ${NMODL_ENABLE_LLVM}") +if(NMODL_ENABLE_LLVM) + message(STATUS " VERSION | ${LLVM_PACKAGE_VERSION}") + message(STATUS " INCLUDE | ${LLVM_INCLUDE_DIRS}") + message(STATUS " CMAKE | ${LLVM_CMAKE_DIR}") + message(STATUS " JIT LISTENERS | ${NMODL_ENABLE_JIT_EVENT_LISTENERS}") +endif() +message(STATUS "LLVM CUDA Codegen | ${NMODL_ENABLE_LLVM_CUDA}") +if(NMODL_ENABLE_LLVM_CUDA) + message(STATUS " CUDA VERSION | ${CUDAToolkit_VERSION}") + message(STATUS " INCLUDE | ${CUDAToolkit_INCLUDE_DIRS}") + message(STATUS " LIBRARY | ${CUDAToolkit_LIBRARY_DIR}") +endif() message(STATUS "--------------+--------------------------------------------------------------") message(STATUS " See documentation : https://github.com/BlueBrain/nmodl/") message(STATUS "--------------+--------------------------------------------------------------") diff --git a/INSTALL.md b/INSTALL.md index cf42c44ac9..36c4e047af 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -21,7 +21,7 @@ To build the project from source, a modern C++ compiler with C++14 support is ne - flex (>=2.6) - bison (>=3.0) -- CMake (>=3.15) +- CMake (>=3.17) - Python (>=3.7) - Python packages : jinja2 (>=2.10), pyyaml (>=3.13), pytest (>=4.0.0), sympy (>=1.3), textwrap @@ -31,7 +31,7 @@ Typically the versions of bison and flex provided by the system are outdated and To get recent version of all dependencies we recommend using [homebrew](https://brew.sh/): ```sh -brew install flex bison cmake python3 +brew install flex bison cmake python3 llvm ``` The necessary Python packages can then easily be added using the pip3 command. @@ -57,7 +57,7 @@ export PATH=/opt/homebrew/opt/flex/bin:/opt/homebrew/opt/bison/bin:$PATH On Ubuntu (>=18.04) flex/bison versions are recent enough and are installed along with the system toolchain: ```sh -apt-get install flex bison gcc python3 python3-pip +apt-get install flex bison gcc python3 python3-pip llvm-dev llvm-runtime llvm clang-format clang ``` The Python dependencies are installed using: @@ -79,6 +79,15 @@ cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/nmodl make -j && make install ``` +If `llvm-config` is not in PATH then set LLVM_DIR as: + +```sh +cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DLLVM_DIR=/path/to/llvm/install/lib/cmake/llvm + +# on OSX +cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DLLVM_DIR=`brew --prefix llvm`/lib/cmake/llvm +``` + And set PYTHONPATH as: ```sh @@ -132,6 +141,29 @@ export NMODL_WRAPLIB=/opt/nmodl/lib/libpywrapper.so **Note**: In order for all unit tests to function correctly when building without linking against libpython we must set `NMODL_PYLIB` before running cmake! +### Using CUDA backend to run benchmarks + +`NMODL` supports generating code and compiling it for execution on an `NVIDIA` GPU via its benchmark infrastructure using the `LLVM` backend. To enable the `CUDA` backend to compile and execute the GPU code we need to set the following `CMake` flag during compilation of `NMODL`: +``` +-DNMODL_ENABLE_LLVM_CUDA=ON +``` + +To find the need `CUDA` libraries (`cudart` and `nvrtc`) it's needed to have CUDA Toolkit installed on your system. +This can be done by installing the CUDA Toolkit from the [CUDA Toolkit website](https://developer.nvidia.com/cuda-downloads) or by installing the `CUDA` spack package and loading the corresponding module. + +Then given a supported MOD file you can execute the benchmark on GPU in you supported NVIDIA GPU by running the following command: +``` +./bin/nmodl .mod llvm --no-debug --ir --opt-level-ir 3 gpu --target-arch "sm_80" --name "nvptx64" --math-library libdevice benchmark --run --libs "${CUDA_ROOT}/nvvm/libdevice/libdevice.10.bc" --opt-level-codegen 3 --instance-size 10000000 --repeat 2 --grid-dim-x 4096 --block-dim-x 256 +``` +The above command executes the benchmark on a GPU with `Compute Architecture` `sm_80` and links the generated code to the `libdevice` optimized math library provided by `NVIDIA`. +Using the above command you can also select the optimization level of the generated code, the instance size of the generated data, the number of repetitions and the grid and block dimensions for the GPU execution. + +**Note**: In order for the CUDA backend to be able to compile and execute the generated code on GPU the CUDA Toolkit version installed needs to have the same version as the `CUDA` installed by the NVIDIA driver in the system that will be used to run the benchmark. +You can find the CUDA Toolkit version by running the following command: +``` +nvidia-smi +``` +and noting the `CUDA Version` stated there. For example if `CUDA Version` reported by `nvidia-smi` is CUDA 11.4 you need to install the `CUDA Toolkit 11.4.*` to be able to compile and execute the GPU code. ## Testing the Installed Module diff --git a/azure-pipelines.yml b/azure-pipelines.yml index d430513328..1954b2d42d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -99,9 +99,13 @@ stages: url="https://github.com/ispc/ispc/releases/download/${ispc_version}/ispc-${ispc_version}${ispc_version_suffix}-${url_os}.tar.gz"; mkdir $(pwd)/$CMAKE_PKG/ispc wget --quiet --output-document=- $url | tar -xvzf - -C $(pwd)/$CMAKE_PKG/ispc --strip 1; + # install llvm nightly (future v13) TODO: this will fail now, FIX this! + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 13 env: - CMAKE_VER: 'v3.15.0' - CMAKE_PKG: 'cmake-3.15.0-Linux-x86_64' + CMAKE_VER: 'v3.17.0' + CMAKE_PKG: 'cmake-3.17.0-Linux-x86_64' displayName: 'Install Dependencies' - script: | export PATH=$(pwd)/$CMAKE_PKG/bin:/home/vsts/.local/bin:$PATH @@ -109,7 +113,7 @@ stages: mkdir -p $(Build.Repository.LocalPath)/build cd $(Build.Repository.LocalPath)/build cmake --version - cmake .. -DPYTHON_EXECUTABLE=$(which python3.7) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=Release + cmake .. -DPYTHON_EXECUTABLE=$(which python3.7) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=Release -DNMODL_ENABLE_LLVM=ON -DLLVM_DIR=/usr/lib/llvm-13/share/llvm/cmake/ make -j 2 if [ $? -ne 0 ] then @@ -119,7 +123,7 @@ stages: make install #this is needed for the integration tests env CTEST_OUTPUT_ON_FAILURE=1 make test env: - CMAKE_PKG: 'cmake-3.15.0-Linux-x86_64' + CMAKE_PKG: 'cmake-3.17.0-Linux-x86_64' displayName: 'Build and Run Unit Tests' - script: | export PATH=$(pwd)/$CMAKE_PKG/bin:/home/vsts/.local/bin:$PATH @@ -150,7 +154,7 @@ stages: fi ./bin/nrnivmodl-core $(Build.Repository.LocalPath)/test/integration/mod env: - CMAKE_PKG: 'cmake-3.15.0-Linux-x86_64' + CMAKE_PKG: 'cmake-3.17.0-Linux-x86_64' SHELL: 'bash' displayName: 'Build Neuron and Run Integration Tests' - script: | @@ -174,17 +178,17 @@ stages: fi ./bin/nrnivmodl-core $(Build.Repository.LocalPath)/test/integration/mod env: - CMAKE_PKG: 'cmake-3.15.0-Linux-x86_64' + CMAKE_PKG: 'cmake-3.17.0-Linux-x86_64' displayName: 'Build CoreNEURON and Run Integration Tests with ISPC compiler' - job: 'osx11' pool: - vmImage: 'macOS-11' - displayName: 'MacOS (11), AppleClang 12.0' + vmImage: 'macOS-10.15' + displayName: 'MacOS (10.15), AppleClang 13.0 (trunk, May 2021)' steps: - checkout: self submodules: True - script: | - brew install flex bison cmake python@3 gcc@8 + brew install flex bison cmake python@3 gcc@8 llvm@13 python3 -m pip install --upgrade pip setuptools python3 -m pip install --user 'Jinja2>=2.9.3' 'PyYAML>=3.13' pytest pytest-cov numpy 'sympy>=1.3' displayName: 'Install Dependencies' @@ -192,7 +196,7 @@ stages: export PATH=/usr/local/opt/flex/bin:/usr/local/opt/bison/bin:$PATH; mkdir -p $(Build.Repository.LocalPath)/build cd $(Build.Repository.LocalPath)/build - cmake .. -DPYTHON_EXECUTABLE=$(which python3) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=RelWithDebInfo -DNMODL_ENABLE_PYTHON_BINDINGS=OFF + cmake .. -DPYTHON_EXECUTABLE=$(which python3) -DCMAKE_INSTALL_PREFIX=$HOME/nmodl -DCMAKE_BUILD_TYPE=RelWithDebInfo -DNMODL_ENABLE_PYTHON_BINDINGS=OFF -DLLVM_DIR=$(brew --prefix llvm@13)/lib/cmake/llvm -DNMODL_ENABLE_LLVM=ON make -j 2 if [ $? -ne 0 ] then @@ -237,9 +241,11 @@ stages: ./bin/nrnivmodl-core $(Build.Repository.LocalPath)/test/integration/mod env: SHELL: 'bash' + condition: false displayName: 'Build Neuron and Run Integration Tests' - job: 'manylinux_wheels' timeoutInMinutes: 45 + condition: eq(1,2) pool: vmImage: 'ubuntu-20.04' strategy: @@ -289,6 +295,7 @@ stages: - template: ci/upload-wheels.yml - job: 'macos_wheels' timeoutInMinutes: 45 + condition: eq(1,2) pool: vmImage: 'macOS-11' strategy: diff --git a/cmake/LLVMHelper.cmake b/cmake/LLVMHelper.cmake new file mode 100644 index 0000000000..717a597f95 --- /dev/null +++ b/cmake/LLVMHelper.cmake @@ -0,0 +1,73 @@ +# ============================================================================= +# LLVM/Clang needs to be linked with either libc++ or libstdc++ +# ============================================================================= + +find_package(LLVM REQUIRED CONFIG) + +# include LLVM libraries +set(NMODL_LLVM_COMPONENTS + aggressiveinstcombine + analysis + codegen + core + executionengine + instcombine + ipo + mc + native + nvptxcodegen + nvptxdesc + nvptxinfo + orcjit + target + transformutils + scalaropts + support) + +if(NMODL_ENABLE_JIT_EVENT_LISTENERS) + list(APPEND NMODL_LLVM_COMPONENTS inteljitevents perfjitevents) +endif() + +llvm_map_components_to_libnames(LLVM_LIBS_TO_LINK ${NMODL_LLVM_COMPONENTS}) + +set(CMAKE_REQUIRED_INCLUDES ${LLVM_INCLUDE_DIRS}) +set(CMAKE_REQUIRED_LIBRARIES ${LLVM_LIBS_TO_LINK}) + +if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NMODL_ENABLE_LLVM) + include(CheckCXXSourceCompiles) + + # simple code to test LLVM library linking + set(CODE_TO_TEST + " + #include + using namespace llvm; + int main(int argc, char* argv[]) { + std::unique_ptr> Builder; + }") + + # first compile without any flags + check_cxx_source_compiles("${CODE_TO_TEST}" LLVM_LIB_LINK_TEST) + + # if standard compilation fails + if(NOT LLVM_LIB_LINK_TEST) + # try libstdc++ first + set(CMAKE_REQUIRED_FLAGS "-stdlib=libstdc++") + check_cxx_source_compiles("${CODE_TO_TEST}" LLVM_LIBSTDCPP_TEST) + # on failure, try libc++ + if(NOT LLVM_LIBSTDCPP_TEST) + set(CMAKE_REQUIRED_FLAGS "-stdlib=libc++") + check_cxx_source_compiles("${CODE_TO_TEST}" LLVM_LIBCPP_TEST) + endif() + # if either library works then add it to CXX flags + if(LLVM_LIBSTDCPP_TEST OR LLVM_LIBCPP_TEST) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_REQUIRED_FLAGS}") + message( + STATUS + "Adding ${CMAKE_REQUIRED_FLAGS} to CMAKE_CXX_FLAGS, required to link with LLVM libraries") + else() + message( + STATUS + "WARNING : -stdlib=libstdcx++ or -stdlib=libc++ didn't work to link with LLVM library") + endif() + endif() +endif() diff --git a/setup.py b/setup.py index 5b853ee569..2631f447f3 100644 --- a/setup.py +++ b/setup.py @@ -104,7 +104,7 @@ def _config_exe(exe_name): ] -cmake_args = ["-DPYTHON_EXECUTABLE=" + sys.executable] +cmake_args = ["-DPYTHON_EXECUTABLE=" + sys.executable, "-DNMODL_ENABLE_LLVM=OFF", "-DNMODL_ENABLE_PYTHON_BINDINGS=ON"] if "bdist_wheel" in sys.argv: cmake_args.append("-DLINK_AGAINST_PYTHON=FALSE") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7a81895ff3..697f92ce51 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -34,6 +34,11 @@ target_link_libraries( util lexer ${NMODL_WRAPPER_LIBS}) + +if(NMODL_ENABLE_LLVM) + target_link_libraries(nmodl llvm_codegen llvm_benchmark benchmark_data ${LLVM_LIBS_TO_LINK}) +endif() + cpp_cc_configure_sanitizers(TARGET nmodl) # ============================================================================= diff --git a/src/ast/ast_common.hpp b/src/ast/ast_common.hpp index c2d3ffc7fa..1766dea62b 100644 --- a/src/ast/ast_common.hpp +++ b/src/ast/ast_common.hpp @@ -43,9 +43,12 @@ namespace ast { * * NMODL support different binary operators and this * type is used to store their value in the AST. + * + * \note `+=` and `-=` are not supported by NMODL but they + * are added for code generation nodes. */ typedef enum { - BOP_ADDITION, ///< \+ + BOP_ADDITION = 0, ///< \+ BOP_SUBTRACTION, ///< -- BOP_MULTIPLICATION, ///< \c * BOP_DIVISION, ///< \/ @@ -58,7 +61,9 @@ typedef enum { BOP_LESS_EQUAL, ///< <= BOP_ASSIGN, ///< = BOP_NOT_EQUAL, ///< != - BOP_EXACT_EQUAL ///< == + BOP_EXACT_EQUAL, ///< == + BOP_ADD_ASSIGN, ///< \+= + BOP_SUB_ASSIGN ///< \-= } BinaryOp; /** @@ -68,7 +73,7 @@ typedef enum { * is used to lookup the corresponding symbol for the operator. */ static const std::string BinaryOpNames[] = - {"+", "-", "*", "/", "^", "&&", "||", ">", "<", ">=", "<=", "=", "!=", "=="}; + {"+", "-", "*", "/", "^", "&&", "||", ">", "<", ">=", "<=", "=", "!=", "==", "+=", "-="}; /// enum type for unary operators typedef enum { UOP_NOT, UOP_NEGATION } UnaryOp; @@ -100,6 +105,20 @@ typedef enum { LTMINUSGT, LTLT, MINUSGT } ReactionOp; /// string representation of ast::ReactionOp static const std::string ReactionOpNames[] = {"<->", "<<", "->"}; +/** + * Get corresponding ast::BinaryOp for given string + * @param op Binary operator in string format + * @return ast::BinaryOp for given string + */ +static inline BinaryOp string_to_binaryop(const std::string& op) { + /// check if binary operator supported otherwise error + auto it = std::find(std::begin(BinaryOpNames), std::end(BinaryOpNames), op); + if (it == std::end(BinaryOpNames)) { + throw std::runtime_error("Error in string_to_binaryop, can't find " + op); + } + int pos = std::distance(std::begin(BinaryOpNames), it); + return static_cast(pos); +} /** @} */ // end of ast_prop } // namespace ast diff --git a/src/codegen/CMakeLists.txt b/src/codegen/CMakeLists.txt index d261fa7acf..aeb8beddf2 100644 --- a/src/codegen/CMakeLists.txt +++ b/src/codegen/CMakeLists.txt @@ -11,16 +11,32 @@ add_library( codegen_helper_visitor.cpp codegen_info.cpp codegen_ispc_visitor.cpp - codegen_utils.cpp) + codegen_utils.cpp + codegen_driver.cpp) add_dependencies(codegen lexer util visitor) target_link_libraries(codegen PRIVATE util) +# ~~~ +# pybind11::embed adds PYTHON_LIBRARIES to target_link_libraries. To avoid link to +# libpython, we can use `pybind11::module` interface library from pybind11. +# ~~~ +if(NOT LINK_AGAINST_PYTHON) + target_link_libraries(codegen PRIVATE pybind11::module) +else() + target_link_libraries(codegen PRIVATE pybind11::embed) +endif() + # copy to build directory to make usable from build directory configure_file(${CMAKE_CURRENT_SOURCE_DIR}/fast_math.ispc ${CMAKE_BINARY_DIR}/include/nmodl/fast_math.ispc COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/fast_math.hpp ${CMAKE_BINARY_DIR}/include/nmodl/fast_math.hpp COPYONLY) +# build llvm visitor if enabled +if(NMODL_ENABLE_LLVM) + add_subdirectory(llvm) +endif() + # ============================================================================= # Install include files # ============================================================================= diff --git a/src/codegen/codegen_acc_visitor.cpp b/src/codegen/codegen_acc_visitor.cpp index 0c8838475c..c9cbfe028d 100644 --- a/src/codegen/codegen_acc_visitor.cpp +++ b/src/codegen/codegen_acc_visitor.cpp @@ -185,8 +185,8 @@ void CodegenAccVisitor::print_net_init_acc_serial_annotation_block_end() { } void CodegenAccVisitor::print_nrn_cur_matrix_shadow_update() { - auto rhs_op = operator_for_rhs(); - auto d_op = operator_for_d(); + const auto& rhs_op = info.operator_for_rhs(); + const auto& d_op = info.operator_for_d(); if (info.point_process) { print_atomic_reduction_pragma(); } @@ -202,8 +202,8 @@ void CodegenAccVisitor::print_fast_imem_calculation() { return; } - auto rhs_op = operator_for_rhs(); - auto d_op = operator_for_d(); + const auto& rhs_op = info.operator_for_rhs(); + const auto& d_op = info.operator_for_d(); printer->start_block("if (nt->nrn_fast_imem)"); if (info.point_process) { print_atomic_reduction_pragma(); diff --git a/src/codegen/codegen_c_visitor.cpp b/src/codegen/codegen_c_visitor.cpp index 4e800013a4..c4c88327f9 100644 --- a/src/codegen/codegen_c_visitor.cpp +++ b/src/codegen/codegen_c_visitor.cpp @@ -322,80 +322,6 @@ void CodegenCVisitor::visit_update_dt(const ast::UpdateDt& node) { /* Common helper routines */ /****************************************************************************************/ - -/** - * \details Certain statements like unit, comment, solve can/need to be skipped - * during code generation. Note that solve block is wrapped in expression - * statement and hence we have to check inner expression. It's also true - * for the initial block defined inside net receive block. - */ -bool CodegenCVisitor::statement_to_skip(const Statement& node) { - // clang-format off - if (node.is_unit_state() - || node.is_line_comment() - || node.is_block_comment() - || node.is_solve_block() - || node.is_conductance_hint() - || node.is_table_statement()) { - return true; - } - // clang-format on - if (node.is_expression_statement()) { - auto expression = dynamic_cast(&node)->get_expression(); - if (expression->is_solve_block()) { - return true; - } - if (expression->is_initial_block()) { - return true; - } - } - return false; -} - - -bool CodegenCVisitor::net_send_buffer_required() const noexcept { - if (net_receive_required() && !info.artificial_cell) { - if (info.net_event_used || info.net_send_used || info.is_watch_used()) { - return true; - } - } - return false; -} - - -bool CodegenCVisitor::net_receive_buffering_required() const noexcept { - return info.point_process && !info.artificial_cell && info.net_receive_node != nullptr; -} - - -bool CodegenCVisitor::nrn_state_required() const noexcept { - if (info.artificial_cell) { - return false; - } - return info.nrn_state_block != nullptr || breakpoint_exist(); -} - - -bool CodegenCVisitor::nrn_cur_required() const noexcept { - return info.breakpoint_node != nullptr && !info.currents.empty(); -} - - -bool CodegenCVisitor::net_receive_exist() const noexcept { - return info.net_receive_node != nullptr; -} - - -bool CodegenCVisitor::breakpoint_exist() const noexcept { - return info.breakpoint_node != nullptr; -} - - -bool CodegenCVisitor::net_receive_required() const noexcept { - return net_receive_exist(); -} - - /** * \details When floating point data type is not default (i.e. double) then we * have to copy old array to new type (for range variables). @@ -420,7 +346,7 @@ bool CodegenCVisitor::state_variable(const std::string& name) const { int CodegenCVisitor::position_of_float_var(const std::string& name) const { int index = 0; - for (const auto& var: codegen_float_variables) { + for (const auto& var: info.codegen_float_variables) { if (var->get_name() == name) { return index; } @@ -432,7 +358,7 @@ int CodegenCVisitor::position_of_float_var(const std::string& name) const { int CodegenCVisitor::position_of_int_var(const std::string& name) const { int index = 0; - for (const auto& var: codegen_int_variables) { + for (const auto& var: info.codegen_int_variables) { if (var.symbol->get_name() == name) { return index; } @@ -557,11 +483,11 @@ int CodegenCVisitor::float_variables_size() const { float_size++; } /// for g_unused variable - if (breakpoint_exist()) { + if (info.breakpoint_exist()) { float_size++; } /// for tsave variable - if (net_receive_exist()) { + if (info.net_receive_exist()) { float_size++; } return float_size; @@ -752,6 +678,22 @@ bool CodegenCVisitor::is_constant_variable(const std::string& name) const { is_constant = true; } } + // Check whether the variable exists in the codegen_int_variables of the CodegenInfo struct + // which hold information whether the variables are const or not + const auto& int_variable_it = std::find_if(info.codegen_int_variables.begin(), + info.codegen_int_variables.end(), + [&name](const IndexVariableInfo& var) { + return var.symbol->get_name() == name; + }); + const auto& const_variable_it = std::find_if(info.constant_variables.begin(), + info.constant_variables.end(), + [&name](const IndexVariableInfo& var) { + return var.symbol->get_name() == name; + }); + is_constant = is_constant || + (int_variable_it != info.codegen_int_variables.end() && + int_variable_it->is_constant) || + const_variable_it != info.constant_variables.end(); return is_constant; } @@ -826,198 +768,6 @@ void CodegenCVisitor::update_index_semantics() { } -std::vector CodegenCVisitor::get_float_variables() { - // sort with definition order - auto comparator = [](const SymbolType& first, const SymbolType& second) -> bool { - return first->get_definition_order() < second->get_definition_order(); - }; - - auto assigned = info.assigned_vars; - auto states = info.state_vars; - - // each state variable has corresponding Dstate variable - for (auto& state: states) { - auto name = "D" + state->get_name(); - auto symbol = make_symbol(name); - if (state->is_array()) { - symbol->set_as_array(state->get_length()); - } - symbol->set_definition_order(state->get_definition_order()); - assigned.push_back(symbol); - } - std::sort(assigned.begin(), assigned.end(), comparator); - - auto variables = info.range_parameter_vars; - variables.insert(variables.end(), - info.range_assigned_vars.begin(), - info.range_assigned_vars.end()); - variables.insert(variables.end(), info.range_state_vars.begin(), info.range_state_vars.end()); - variables.insert(variables.end(), assigned.begin(), assigned.end()); - - if (info.vectorize) { - variables.push_back(make_symbol(naming::VOLTAGE_UNUSED_VARIABLE)); - } - - if (breakpoint_exist()) { - std::string name = info.vectorize ? naming::CONDUCTANCE_UNUSED_VARIABLE - : naming::CONDUCTANCE_VARIABLE; - - // make sure conductance variable like `g` is not already defined - if (auto r = std::find_if(variables.cbegin(), - variables.cend(), - [&](const auto& s) { return name == s->get_name(); }); - r == variables.cend()) { - variables.push_back(make_symbol(name)); - } - } - - if (net_receive_exist()) { - variables.push_back(make_symbol(naming::T_SAVE_VARIABLE)); - } - return variables; -} - - -/** - * IndexVariableInfo has following constructor arguments: - * - symbol - * - is_vdata (false) - * - is_index (false - * - is_integer (false) - * - * Which variables are constant qualified? - * - * - node area is read only - * - read ion variables are read only - * - style_ionname is index / offset - */ -// NOLINTNEXTLINE(readability-function-cognitive-complexity) -std::vector CodegenCVisitor::get_int_variables() { - std::vector variables; - if (info.point_process) { - variables.emplace_back(make_symbol(naming::NODE_AREA_VARIABLE)); - variables.back().is_constant = true; - /// note that this variable is not printed in neuron implementation - if (info.artificial_cell) { - variables.emplace_back(make_symbol(naming::POINT_PROCESS_VARIABLE), true); - } else { - variables.emplace_back(make_symbol(naming::POINT_PROCESS_VARIABLE), false, false, true); - variables.back().is_constant = true; - } - } - - for (const auto& ion: info.ions) { - bool need_style = false; - std::unordered_map ion_vars; // used to keep track of the variables to - // not have doubles between read/write. Same - // name variables are allowed - for (const auto& var: ion.reads) { - const std::string name = naming::ION_VARNAME_PREFIX + var; - variables.emplace_back(make_symbol(name)); - variables.back().is_constant = true; - ion_vars[name] = static_cast(variables.size() - 1); - } - - /// symbol for di_ion_dv var - std::shared_ptr ion_di_dv_var = nullptr; - - for (const auto& var: ion.writes) { - const std::string name = naming::ION_VARNAME_PREFIX + var; - - const auto ion_vars_it = ion_vars.find(name); - if (ion_vars_it != ion_vars.end()) { - variables[ion_vars_it->second].is_constant = false; - } else { - variables.emplace_back(make_symbol(naming::ION_VARNAME_PREFIX + var)); - } - if (ion.is_ionic_current(var)) { - ion_di_dv_var = make_symbol(std::string(naming::ION_VARNAME_PREFIX) + "di" + - ion.name + "dv"); - } - if (ion.is_intra_cell_conc(var) || ion.is_extra_cell_conc(var)) { - need_style = true; - } - } - - /// insert after read/write variables but before style ion variable - if (ion_di_dv_var != nullptr) { - variables.emplace_back(ion_di_dv_var); - } - - if (need_style) { - variables.emplace_back(make_symbol("style_" + ion.name), false, true); - variables.back().is_constant = true; - } - } - - for (const auto& var: info.pointer_variables) { - auto name = var->get_name(); - if (var->has_any_property(NmodlType::pointer_var)) { - variables.emplace_back(make_symbol(name)); - } else { - variables.emplace_back(make_symbol(name), true); - } - } - - if (info.diam_used) { - variables.emplace_back(make_symbol(naming::DIAM_VARIABLE)); - } - - if (info.area_used) { - variables.emplace_back(make_symbol(naming::AREA_VARIABLE)); - } - - // for non-artificial cell, when net_receive buffering is enabled - // then tqitem is an offset - if (info.net_send_used) { - if (info.artificial_cell) { - variables.emplace_back(make_symbol(naming::TQITEM_VARIABLE), true); - } else { - variables.emplace_back(make_symbol(naming::TQITEM_VARIABLE), false, false, true); - variables.back().is_constant = true; - } - info.tqitem_index = static_cast(variables.size() - 1); - } - - /** - * \note Variables for watch statements : there is one extra variable - * used in coreneuron compared to actual watch statements for compatibility - * with neuron (which uses one extra Datum variable) - */ - if (!info.watch_statements.empty()) { - for (int i = 0; i < info.watch_statements.size() + 1; i++) { - variables.emplace_back(make_symbol(fmt::format("watch{}", i)), false, false, true); - } - } - return variables; -} - - -/** - * \details When we enable fine level parallelism at channel level, we have do updates - * to ion variables in atomic way. As cpus don't have atomic instructions in - * simd loop, we have to use shadow vectors for every ion variables. Here - * we return list of all such variables. - * - * \todo If conductances are specified, we don't need all below variables - */ -std::vector CodegenCVisitor::get_shadow_variables() { - std::vector variables; - for (const auto& ion: info.ions) { - for (const auto& var: ion.writes) { - variables.push_back({make_symbol(shadow_varname(naming::ION_VARNAME_PREFIX + var))}); - if (ion.is_ionic_current(var)) { - variables.push_back({make_symbol(shadow_varname( - std::string(naming::ION_VARNAME_PREFIX) + "di" + ion.name + "dv"))}); - } - } - } - variables.push_back({make_symbol("ml_rhs")}); - variables.push_back({make_symbol("ml_d")}); - return variables; -} - - /****************************************************************************************/ /* Routines must be overloaded in backend */ /****************************************************************************************/ @@ -1037,6 +787,9 @@ std::string CodegenCVisitor::get_parameter_str(const ParamVector& params) { return param; } +void CodegenCVisitor::print_backend_compute_routine_decl() { + // backend specific, do nothing +} void CodegenCVisitor::print_channel_iteration_tiling_block_begin(BlockType /* type */) { // no tiling for cpu backend, just get loop bounds @@ -1132,13 +885,19 @@ bool CodegenCVisitor::nrn_cur_reduction_loop_required() { } +void CodegenCVisitor::print_channel_iteration_loop(const std::string& start = "start", + const std::string& end = "end") { + printer->start_block(fmt::format("for (int id = {}; id < {}; id++)", start, end)); +} + + /** * \details For CPU backend we iterate over all node counts. For cuda we use thread * index to check if block needs to be executed or not. */ void CodegenCVisitor::print_channel_iteration_block_begin(BlockType type) { print_channel_iteration_block_parallel_hint(type); - printer->start_block("for (int id = start; id < end; id++)"); + print_channel_iteration_loop(); } @@ -1160,8 +919,8 @@ void CodegenCVisitor::print_nrn_cur_matrix_shadow_update() { printer->add_line("shadow_rhs[id] = rhs;"); printer->add_line("shadow_d[id] = g;"); } else { - auto rhs_op = operator_for_rhs(); - auto d_op = operator_for_d(); + const auto& rhs_op = info.operator_for_rhs(); + const auto& d_op = info.operator_for_d(); print_atomic_reduction_pragma(); printer->fmt_line("vec_rhs[node_id] {} rhs;", rhs_op); print_atomic_reduction_pragma(); @@ -1171,8 +930,8 @@ void CodegenCVisitor::print_nrn_cur_matrix_shadow_update() { void CodegenCVisitor::print_nrn_cur_matrix_shadow_reduction() { - auto rhs_op = operator_for_rhs(); - auto d_op = operator_for_d(); + const auto& rhs_op = info.operator_for_rhs(); + const auto& d_op = info.operator_for_d(); if (info.point_process) { printer->add_line("int node_id = node_index[id];"); print_atomic_reduction_pragma(); @@ -1189,7 +948,7 @@ void CodegenCVisitor::print_atomic_reduction_pragma() { void CodegenCVisitor::print_shadow_reduction_block_begin() { - printer->start_block("for (int id = start; id < end; id++)"); + print_channel_iteration_loop(); } @@ -1326,7 +1085,7 @@ void CodegenCVisitor::print_statement_block(const ast::StatementBlock& node, auto statements = node.get_statements(); for (const auto& statement: statements) { - if (statement_to_skip(*statement)) { + if (info.statement_to_skip(*statement)) { continue; } /// not necessary to add indent for verbatim block (pretty-printing) @@ -2080,8 +1839,8 @@ std::string CodegenCVisitor::process_verbatim_text(std::string const& text) { std::string CodegenCVisitor::register_mechanism_arguments() const { - auto nrn_cur = nrn_cur_required() ? method_name(naming::NRN_CUR_METHOD) : "nullptr"; - auto nrn_state = nrn_state_required() ? method_name(naming::NRN_STATE_METHOD) : "nullptr"; + auto nrn_cur = info.nrn_cur_required() ? method_name(naming::NRN_CUR_METHOD) : "nullptr"; + auto nrn_state = info.nrn_state_required() ? method_name(naming::NRN_STATE_METHOD) : "nullptr"; auto nrn_alloc = method_name(naming::NRN_ALLOC_METHOD); auto nrn_init = method_name(naming::NRN_INIT_METHOD); auto const nrn_private_constructor = method_name(naming::NRN_PRIVATE_CONSTRUCTOR_METHOD); @@ -2203,7 +1962,7 @@ void CodegenCVisitor::print_num_variable_getter() { void CodegenCVisitor::print_net_receive_arg_size_getter() { - if (!net_receive_exist()) { + if (!info.net_receive_exist()) { return; } printer->add_newline(2); @@ -2398,17 +2157,18 @@ std::string CodegenCVisitor::get_variable_name(const std::string& name, bool use // clang-format on // float variable - auto f = std::find_if(codegen_float_variables.begin(), - codegen_float_variables.end(), + auto f = std::find_if(info.codegen_float_variables.begin(), + info.codegen_float_variables.end(), symbol_comparator); - if (f != codegen_float_variables.end()) { + if (f != info.codegen_float_variables.end()) { return float_variable_name(*f, use_instance); } // integer variable - auto i = - std::find_if(codegen_int_variables.begin(), codegen_int_variables.end(), index_comparator); - if (i != codegen_int_variables.end()) { + auto i = std::find_if(info.codegen_int_variables.begin(), + info.codegen_int_variables.end(), + index_comparator); + if (i != info.codegen_int_variables.end()) { return int_variable_name(*i, varname, use_instance); } @@ -2421,10 +2181,10 @@ std::string CodegenCVisitor::get_variable_name(const std::string& name, bool use } // shadow variable - auto s = std::find_if(codegen_shadow_variables.begin(), - codegen_shadow_variables.end(), + auto s = std::find_if(info.codegen_shadow_variables.begin(), + info.codegen_shadow_variables.end(), symbol_comparator); - if (s != codegen_shadow_variables.end()) { + if (s != info.codegen_shadow_variables.end()) { return ion_shadow_variable_name(*s); } @@ -2989,7 +2749,7 @@ void CodegenCVisitor::print_mechanism_register() { if (info.artificial_cell) { printer->fmt_line("add_nrn_artcell(mech_type, {});", info.tqitem_index); } - if (net_receive_buffering_required()) { + if (info.net_receive_buffering_required()) { printer->fmt_line("hoc_register_net_receive_buffering({}, mech_type);", method_name("net_buf_receive")); } @@ -3109,14 +2869,14 @@ void CodegenCVisitor::print_mechanism_range_var_structure(bool print_initialiser print_initialisers ? fmt::format("{{&coreneuron::{}}}", name) : std::string{}); } - for (auto& var: codegen_float_variables) { + for (auto& var: info.codegen_float_variables) { auto name = var->get_name(); auto type = get_range_var_float_type(var); auto qualifier = is_constant_variable(name) ? "const " : ""; printer->fmt_line( "{}{}* {}{}{};", qualifier, type, ptr_type_qualifier(), name, value_initialise); } - for (auto& var: codegen_int_variables) { + for (auto& var: info.codegen_int_variables) { auto name = var.symbol->get_name(); if (var.is_index || var.is_integer) { auto qualifier = var.is_constant ? "const " : ""; @@ -3296,9 +3056,9 @@ void CodegenCVisitor::print_instance_variable_setup() { for (auto const& [var, type]: info.neuron_global_variables) { ptr_members.push_back(var->get_name()); } - ptr_members.reserve(ptr_members.size() + codegen_float_variables.size() + - codegen_int_variables.size()); - for (auto& var: codegen_float_variables) { + ptr_members.reserve(ptr_members.size() + info.codegen_float_variables.size() + + info.codegen_int_variables.size()); + for (auto& var: info.codegen_float_variables) { auto name = var->get_name(); auto range_var_type = get_range_var_float_type(var); if (float_type == range_var_type) { @@ -3315,7 +3075,7 @@ void CodegenCVisitor::print_instance_variable_setup() { id += var->get_length(); } - for (auto& var: codegen_int_variables) { + for (auto& var: info.codegen_int_variables) { auto name = var.symbol->get_name(); auto const variable = [&var]() { if (var.is_index || var.is_integer) { @@ -3963,7 +3723,7 @@ void CodegenCVisitor::print_net_receive_loop_end() { void CodegenCVisitor::print_net_receive_buffering(bool need_mech_inst) { - if (!net_receive_required() || info.artificial_cell) { + if (!info.net_receive_required() || info.artificial_cell) { return; } printer->add_newline(2); @@ -4013,7 +3773,7 @@ void CodegenCVisitor::print_net_send_buffering_grow() { } void CodegenCVisitor::print_net_send_buffering() { - if (!net_send_buffer_required()) { + if (!info.net_send_buffer_required()) { return; } @@ -4077,7 +3837,7 @@ void CodegenCVisitor::visit_for_netcon(const ast::ForNetcon& node) { } void CodegenCVisitor::print_net_receive_kernel() { - if (!net_receive_required()) { + if (!info.net_receive_required()) { return; } codegen = true; @@ -4140,7 +3900,7 @@ void CodegenCVisitor::print_net_receive_kernel() { void CodegenCVisitor::print_net_receive() { - if (!net_receive_required()) { + if (!info.net_receive_required()) { return; } codegen = true; @@ -4298,7 +4058,7 @@ void CodegenCVisitor::visit_solution_expression(const SolutionExpression& node) void CodegenCVisitor::print_nrn_state() { - if (!nrn_state_required()) { + if (!info.nrn_state_required()) { return; } codegen = true; @@ -4489,8 +4249,8 @@ void CodegenCVisitor::print_fast_imem_calculation() { return; } std::string rhs, d; - auto rhs_op = operator_for_rhs(); - auto d_op = operator_for_d(); + const auto& rhs_op = info.operator_for_rhs(); + const auto& d_op = info.operator_for_d(); if (info.point_process) { rhs = "shadow_rhs[id]"; d = "shadow_d[id]"; @@ -4515,7 +4275,7 @@ void CodegenCVisitor::print_fast_imem_calculation() { } void CodegenCVisitor::print_nrn_cur() { - if (!nrn_cur_required()) { + if (!info.nrn_cur_required()) { return; } @@ -4608,11 +4368,13 @@ void CodegenCVisitor::print_g_unused() const { void CodegenCVisitor::print_compute_functions() { print_top_verbatim_blocks(); print_function_prototypes(); - for (const auto& procedure: info.procedures) { - print_procedure(*procedure); - } - for (const auto& function: info.functions) { - print_function(*function); + if (print_procedures_and_functions) { + for (const auto& procedure: info.procedures) { + print_procedure(*procedure); + } + for (const auto& function: info.functions) { + print_function(*function); + } } for (size_t i = 0; i < info.before_after_blocks.size(); i++) { print_before_after_block(info.before_after_blocks[i], i); @@ -4621,6 +4383,7 @@ void CodegenCVisitor::print_compute_functions() { auto block = callback->get_node_to_solve().get(); print_derivimplicit_kernel(block); } + print_backend_compute_routine_decl(); print_net_send_buffering(); print_net_init(); print_watch_activate(); @@ -4681,10 +4444,6 @@ void CodegenCVisitor::setup(const Program& node) { logger->warn("CodegenCVisitor : MOD file uses non-thread safe constructs of NMODL"); } - codegen_float_variables = get_float_variables(); - codegen_int_variables = get_int_variables(); - codegen_shadow_variables = get_shadow_variables(); - update_index_semantics(); rename_function_arguments(); } diff --git a/src/codegen/codegen_c_visitor.hpp b/src/codegen/codegen_c_visitor.hpp index 3a4fc39c13..e419951a65 100644 --- a/src/codegen/codegen_c_visitor.hpp +++ b/src/codegen/codegen_c_visitor.hpp @@ -46,46 +46,6 @@ namespace codegen { * @{ */ -/** - * \enum BlockType - * \brief Helper to represent various block types - * - * Note: do not assign integers to these enums - * - */ -enum BlockType { - /// initial block - Initial, - - /// constructor block - Constructor, - - /// destructor block - Destructor, - - /// breakpoint block - Equation, - - /// ode_* routines block (not used) - Ode, - - /// derivative block - State, - - /// watch block - Watch, - - /// net_receive block - NetReceive, - - /// before / after block - BeforeAfter, - - /// fake ending block type for loops on the enums. Keep it at the end - BlockTypeEnd -}; - - /** * \enum MemberType * \brief Helper to represent various variables types @@ -105,57 +65,6 @@ enum class MemberType { thread }; - -/** - * \class IndexVariableInfo - * \brief Helper to represent information about index/int variables - * - */ -struct IndexVariableInfo { - /// symbol for the variable - const std::shared_ptr symbol; - - /// if variable reside in vdata field of NrnThread - /// typically true for bbcore pointer - bool is_vdata = false; - - /// if this is pure index (e.g. style_ion) variables is directly - /// index and shouldn't be printed with data/vdata - bool is_index = false; - - /// if this is an integer (e.g. tqitem, point_process) variable which - /// is printed as array accesses - bool is_integer = false; - - /// if the variable is qualified as constant (this is property of IndexVariable) - bool is_constant = false; - - IndexVariableInfo(std::shared_ptr symbol, - bool is_vdata = false, - bool is_index = false, - bool is_integer = false) - : symbol(std::move(symbol)) - , is_vdata(is_vdata) - , is_index(is_index) - , is_integer(is_integer) {} -}; - - -/** - * \class ShadowUseStatement - * \brief Represents ion write statement during code generation - * - * Ion update statement needs use of shadow vectors for certain backends - * as atomics operations are not supported on cpu backend. - * - * \todo If shadow_lhs is empty then we assume shadow statement not required - */ -struct ShadowUseStatement { - std::string lhs; - std::string op; - std::string rhs; -}; - /** @} */ // end of codegen_details @@ -219,11 +128,6 @@ class CodegenCVisitor: public visitor::ConstAstVisitor { */ symtab::SymbolTable* program_symtab = nullptr; - /** - * All float variables for the model - */ - std::vector codegen_float_variables; - /** * All int variables for the model */ @@ -260,6 +164,11 @@ class CodegenCVisitor: public visitor::ConstAstVisitor { */ int current_watch_statement = 0; + /** + * Bool to select whether procedures and functions should be printed in the generated file + */ + bool print_procedures_and_functions = true; + /** * Data type of floating point variables */ @@ -309,23 +218,6 @@ class CodegenCVisitor: public visitor::ConstAstVisitor { return "\"" + text + "\""; } - - /** - * Operator for rhs vector update (matrix update) - */ - std::string operator_for_rhs() const noexcept { - return info.electrode_current ? "+=" : "-="; - } - - - /** - * Operator for diagonal vector update (matrix update) - */ - std::string operator_for_d() const noexcept { - return info.electrode_current ? "-=" : "+="; - } - - /** * Data type for the local variables */ @@ -357,6 +249,10 @@ class CodegenCVisitor: public visitor::ConstAstVisitor { return codegen::naming::DEFAULT_INTEGER_TYPE; } + /** + * Instance Struct type name suffix + */ + std::string instance_struct_type_suffix = "Instance"; /** * Checks if given function name is \c net_send @@ -390,7 +286,7 @@ class CodegenCVisitor: public visitor::ConstAstVisitor { * Name of structure that wraps range variables */ std::string instance_struct() const { - return fmt::format("{}_Instance", info.mod_suffix); + return fmt::format("{}_{}", info.mod_suffix, instance_struct_type_suffix); } @@ -420,26 +316,6 @@ class CodegenCVisitor: public visitor::ConstAstVisitor { } - /** - * Constructs a shadow variable name - * \param name The name of the variable - * \return The name of the variable prefixed with \c shadow_ - */ - std::string shadow_varname(const std::string& name) const { - return "shadow_" + name; - } - - - /** - * Creates a temporary symbol - * \param name The name of the symbol - * \return A symbol based on the given name - */ - SymbolType make_symbol(const std::string& name) const { - return std::make_shared(name, ModToken()); - } - - /** * Checks if the given variable name belongs to a state variable * \param name The variable name @@ -448,36 +324,6 @@ class CodegenCVisitor: public visitor::ConstAstVisitor { bool state_variable(const std::string& name) const; - /** - * Check if net receive/send buffering kernels required - */ - bool net_receive_buffering_required() const noexcept; - - - /** - * Check if nrn_state function is required - */ - bool nrn_state_required() const noexcept; - - - /** - * Check if nrn_cur function is required - */ - bool nrn_cur_required() const noexcept; - - - /** - * Check if net_receive function is required - */ - bool net_receive_required() const noexcept; - - - /** - * Check if net_send_buffer is required - */ - bool net_send_buffer_required() const noexcept; - - /** * Check if setup_range_variable function is required * \return @@ -485,18 +331,6 @@ class CodegenCVisitor: public visitor::ConstAstVisitor { bool range_variable_setup_required() const noexcept; - /** - * Check if net_receive node exist - */ - bool net_receive_exist() const noexcept; - - - /** - * Check if breakpoint node exist - */ - bool breakpoint_exist() const noexcept; - - /** * Check if given method is defined in this model * \param name The name of the method to check @@ -664,27 +498,6 @@ class CodegenCVisitor: public visitor::ConstAstVisitor { void update_index_semantics(); - /** - * Determine all \c float variables required during code generation - * \return A \c vector of \c float variables - */ - std::vector get_float_variables(); - - - /** - * Determine all \c int variables required during code generation - * \return A \c vector of \c int variables - */ - std::vector get_int_variables(); - - - /** - * Determine all ion write variables that require shadow vectors during code generation - * \return A \c vector of ion variables - */ - std::vector get_shadow_variables(); - - /** * Print the items in a vector as a list * @@ -1262,6 +1075,18 @@ class CodegenCVisitor: public visitor::ConstAstVisitor { void print_net_event_call(const ast::FunctionCall& node); + /** + * Print the for loop statement going through all the mechanism instances + */ + void print_channel_iteration_loop(const std::string& start, const std::string& end); + + + /** + * Print backend compute routines declaration for various backends + */ + virtual void print_backend_compute_routine_decl(); + + /** * Print block start for tiling on channel iteration */ @@ -1791,19 +1616,19 @@ class CodegenCVisitor: public visitor::ConstAstVisitor { * \param skip_init_check \c true if we want the generated code to execute the initialization * conditionally */ - void print_nrn_init(bool skip_init_check = true); + virtual void print_nrn_init(bool skip_init_check = true); /** * Print nrn_state / state update function definition */ - void print_nrn_state(); + virtual void print_nrn_state(); /** * Print nrn_cur / current update function definition */ - void print_nrn_cur(); + virtual void print_nrn_cur(); /** * Print fast membrane current calculation code @@ -1895,12 +1720,12 @@ class CodegenCVisitor: public visitor::ConstAstVisitor { * @param print_initialisers Whether or not default values for variables * be included in the struct declaration. */ - void print_mechanism_range_var_structure(bool print_initialisers); + virtual void print_mechanism_range_var_structure(bool print_initialisers); /** * Print the function that initialize instance structure */ - void print_instance_variable_setup(); + virtual void print_instance_variable_setup(); void visit_binary_expression(const ast::BinaryExpression& node) override; void visit_binary_operator(const ast::BinaryOperator& node) override; diff --git a/src/codegen/codegen_cuda_visitor.cpp b/src/codegen/codegen_cuda_visitor.cpp index 8f58f04917..1c390ab0c9 100644 --- a/src/codegen/codegen_cuda_visitor.cpp +++ b/src/codegen/codegen_cuda_visitor.cpp @@ -94,8 +94,8 @@ void CodegenCudaVisitor::print_device_method_annotation() { void CodegenCudaVisitor::print_nrn_cur_matrix_shadow_update() { - auto rhs_op = operator_for_rhs(); - auto d_op = operator_for_d(); + auto rhs_op = info.operator_for_rhs(); + auto d_op = info.operator_for_d(); stringutils::remove_character(rhs_op, '='); stringutils::remove_character(d_op, '='); print_atomic_op("vec_rhs[node_id]", rhs_op, "rhs"); @@ -107,8 +107,8 @@ void CodegenCudaVisitor::print_fast_imem_calculation() { return; } - auto rhs_op = operator_for_rhs(); - auto d_op = operator_for_d(); + auto rhs_op = info.operator_for_rhs(); + auto d_op = info.operator_for_d(); stringutils::remove_character(rhs_op, '='); stringutils::remove_character(d_op, '='); printer->start_block("if (nt->nrn_fast_imem)"); diff --git a/src/codegen/codegen_driver.cpp b/src/codegen/codegen_driver.cpp new file mode 100644 index 0000000000..abec225214 --- /dev/null +++ b/src/codegen/codegen_driver.cpp @@ -0,0 +1,284 @@ +/************************************************************************* + * Copyright (C) 2018-2022 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include + +#include "codegen/codegen_driver.hpp" +#include "codegen/codegen_transform_visitor.hpp" +#include "codegen_compatibility_visitor.hpp" +#include "utils/logger.hpp" +#include "visitors/after_cvode_to_cnexp_visitor.hpp" +#include "visitors/ast_visitor.hpp" +#include "visitors/constant_folder_visitor.hpp" +#include "visitors/global_var_visitor.hpp" +#include "visitors/implicit_argument_visitor.hpp" +#include "visitors/inline_visitor.hpp" +#include "visitors/ispc_rename_visitor.hpp" +#include "visitors/kinetic_block_visitor.hpp" +#include "visitors/local_to_assigned_visitor.hpp" +#include "visitors/local_var_rename_visitor.hpp" +#include "visitors/localize_visitor.hpp" +#include "visitors/loop_unroll_visitor.hpp" +#include "visitors/neuron_solve_visitor.hpp" +#include "visitors/nmodl_visitor.hpp" +#include "visitors/perf_visitor.hpp" +#include "visitors/semantic_analysis_visitor.hpp" +#include "visitors/solve_block_visitor.hpp" +#include "visitors/steadystate_visitor.hpp" +#include "visitors/sympy_conductance_visitor.hpp" +#include "visitors/sympy_solver_visitor.hpp" +#include "visitors/symtab_visitor.hpp" +#include "visitors/units_visitor.hpp" +#include "visitors/verbatim_var_rename_visitor.hpp" + +using namespace nmodl; +using namespace codegen; +using namespace visitor; + +bool CodegenDriver::prepare_mod(std::shared_ptr node, const std::string& modfile) { + /// whether to update existing symbol table or create new + /// one whenever we run symtab visitor. + bool update_symtab = false; + + const auto scratch_dir = cfg.scratch_dir; + auto filepath = [scratch_dir, modfile](const std::string& suffix, const std::string& ext) { + static int count = 0; + return fmt::format( + "{}/{}.{}.{}.{}", scratch_dir, modfile, std::to_string(count++), suffix, ext); + }; + + /// just visit the ast + AstVisitor().visit_program(*node); + + /// construct symbol table + { + logger->info("Running symtab visitor"); + SymtabVisitor(update_symtab).visit_program(*node); + } + + /// Check some rules that ast should follow + { + logger->info("Running semantic analysis visitor"); + if (SemanticAnalysisVisitor().check(*node)) { + return false; + } + } + + /// use cnexp instead of after_cvode solve method + { + logger->info("Running CVode to cnexp visitor"); + AfterCVodeToCnexpVisitor().visit_program(*node); + ast_to_nmodl(*node, filepath("after_cvode_to_cnexp", "mod")); + } + + /// Rename variables that match ISPC compiler double constants + if (cfg.ispc_backend) { + logger->info("Running ISPC variables rename visitor"); + IspcRenameVisitor(node).visit_program(*node); + SymtabVisitor(update_symtab).visit_program(*node); + ast_to_nmodl(*node, filepath("ispc_double_rename", "mod")); + } + + /// GLOBAL to RANGE rename visitor + if (cfg.nmodl_global_to_range) { + // make sure to run perf visitor because code generator + // looks for read/write counts const/non-const declaration + PerfVisitor().visit_program(*node); + // make sure to run the GlobalToRange visitor after all the + // reinitializations of Symtab + logger->info("Running GlobalToRange visitor"); + GlobalToRangeVisitor(*node).visit_program(*node); + SymtabVisitor(update_symtab).visit_program(*node); + ast_to_nmodl(*node, filepath("ispc_double_rename", "mod")); + } + + /// LOCAL to ASSIGNED visitor + if (cfg.nmodl_local_to_range) { + logger->info("Running LOCAL to ASSIGNED visitor"); + PerfVisitor().visit_program(*node); + LocalToAssignedVisitor().visit_program(*node); + SymtabVisitor(update_symtab).visit_program(*node); + ast_to_nmodl(*node, filepath("global_to_range", "mod")); + } + + { + // Compatibility Checking + logger->info("Running code compatibility checker"); + // run perfvisitor to update read/write counts + PerfVisitor().visit_program(*node); + + auto ast_has_unhandled_nodes = CodegenCompatibilityVisitor().find_unhandled_ast_nodes( + *node); + // If we want to just check compatibility we return the result + if (cfg.only_check_compatibility) { + return !ast_has_unhandled_nodes; // negate since this function returns false on failure + } + + // If there is an incompatible construct and code generation is not forced exit NMODL + if (ast_has_unhandled_nodes && !cfg.force_codegen) { + return false; + } + } + + ast_to_nmodl(*node, filepath("ast", "mod")); + ast_to_json(*node, filepath("ast", "json")); + + if (cfg.verbatim_rename) { + logger->info("Running verbatim rename visitor"); + VerbatimVarRenameVisitor().visit_program(*node); + ast_to_nmodl(*node, filepath("verbatim_rename", "mod")); + } + + if (cfg.nmodl_const_folding) { + logger->info("Running nmodl constant folding visitor"); + ConstantFolderVisitor().visit_program(*node); + ast_to_nmodl(*node, filepath("constfold", "mod")); + } + + if (cfg.nmodl_unroll) { + logger->info("Running nmodl loop unroll visitor"); + LoopUnrollVisitor().visit_program(*node); + ConstantFolderVisitor().visit_program(*node); + ast_to_nmodl(*node, filepath("unroll", "mod")); + SymtabVisitor(update_symtab).visit_program(*node); + } + + /// note that we can not symtab visitor in update mode as we + /// replace kinetic block with derivative block of same name + /// in global scope + { + logger->info("Running KINETIC block visitor"); + auto kineticBlockVisitor = KineticBlockVisitor(); + kineticBlockVisitor.visit_program(*node); + SymtabVisitor(update_symtab).visit_program(*node); + const auto filename = filepath("kinetic", "mod"); + ast_to_nmodl(*node, filename); + if (cfg.nmodl_ast && kineticBlockVisitor.get_conserve_statement_count()) { + logger->warn( + fmt::format("{} presents non-standard CONSERVE statements in DERIVATIVE blocks. " + "Use it only for debugging/developing", + filename)); + } + } + + { + logger->info("Running STEADYSTATE visitor"); + SteadystateVisitor().visit_program(*node); + SymtabVisitor(update_symtab).visit_program(*node); + ast_to_nmodl(*node, filepath("steadystate", "mod")); + } + + /// Parsing units fron "nrnunits.lib" and mod files + { + logger->info("Parsing Units"); + UnitsVisitor(cfg.units_dir).visit_program(*node); + } + + /// once we start modifying (especially removing) older constructs + /// from ast then we should run symtab visitor in update mode so + /// that old symbols (e.g. prime variables) are not lost + update_symtab = true; + +#ifdef NMODL_LLVM_BACKEND + if (cfg.nmodl_inline || cfg.llvm_ir) { +#else + if (cfg.nmodl_inline) { +#endif + logger->info("Running nmodl inline visitor"); + InlineVisitor().visit_program(*node); + ast_to_nmodl(*node, filepath("inline", "mod")); + } + + if (cfg.local_rename) { + logger->info("Running local variable rename visitor"); + LocalVarRenameVisitor().visit_program(*node); + SymtabVisitor(update_symtab).visit_program(*node); + ast_to_nmodl(*node, filepath("local_rename", "mod")); + } + + if (cfg.nmodl_localize) { + // localize pass must follow rename pass to avoid conflict + logger->info("Running localize visitor"); + LocalizeVisitor(cfg.localize_verbatim).visit_program(*node); + LocalVarRenameVisitor().visit_program(*node); + SymtabVisitor(update_symtab).visit_program(*node); + ast_to_nmodl(*node, filepath("localize", "mod")); + } + + if (cfg.sympy_conductance) { + logger->info("Running sympy conductance visitor"); + SympyConductanceVisitor().visit_program(*node); + SymtabVisitor(update_symtab).visit_program(*node); + ast_to_nmodl(*node, filepath("sympy_conductance", "mod")); + } + + if (cfg.sympy_analytic || sparse_solver_exists(*node)) { + if (!cfg.sympy_analytic) { + logger->info( + "Automatically enable sympy_analytic because it exists solver of type sparse"); + } + logger->info("Running sympy solve visitor"); + SympySolverVisitor(cfg.sympy_pade, cfg.sympy_cse).visit_program(*node); + SymtabVisitor(update_symtab).visit_program(*node); + ast_to_nmodl(*node, filepath("sympy_solve", "mod")); + } + + { + logger->info("Running cnexp visitor"); + NeuronSolveVisitor().visit_program(*node); + ast_to_nmodl(*node, filepath("cnexp", "mod")); + } + + { + SolveBlockVisitor().visit_program(*node); + SymtabVisitor(update_symtab).visit_program(*node); + ast_to_nmodl(*node, filepath("solveblock", "mod")); + } + + if (cfg.json_perfstat) { + std::string file{scratch_dir}; + file.append("/"); + file.append(modfile); + file.append(".perf.json"); + logger->info("Writing performance statistics to {}", file); + PerfVisitor(file).visit_program(*node); + } + + { + // Add implicit arguments (like celsius, nt) to NEURON functions (like + // nrn_ghk, at_time) whose signatures we have to massage. + ImplicitArgumentVisitor{}.visit_program(*node); + SymtabVisitor(update_symtab).visit_program(*node); + } + + { + // make sure to run perf visitor because code generator + // looks for read/write counts const/non-const declaration + PerfVisitor().visit_program(*node); + } + + { + CodegenTransformVisitor{}.visit_program(*node); + ast_to_nmodl(*node, filepath("TransformVisitor", "mod")); + SymtabVisitor(update_symtab).visit_program(*node); + } + return true; +} + +void CodegenDriver::ast_to_nmodl(Program& ast, const std::string& filepath) const { + if (cfg.nmodl_ast) { + NmodlPrintVisitor(filepath).visit_program(ast); + logger->info("AST to NMODL transformation written to {}", filepath); + } +}; + +void CodegenDriver::ast_to_json(ast::Program& ast, const std::string& filepath) const { + if (cfg.json_ast) { + JSONVisitor(filepath).write(ast); + logger->info("AST to JSON transformation written to {}", filepath); + } +}; diff --git a/src/codegen/codegen_driver.hpp b/src/codegen/codegen_driver.hpp new file mode 100644 index 0000000000..14d8ed76ab --- /dev/null +++ b/src/codegen/codegen_driver.hpp @@ -0,0 +1,165 @@ +/************************************************************************* + * Copyright (C) 2018-2022 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + + +#pragma once + +#include +#include + +#include "ast/program.hpp" +#include "config/config.h" + +namespace nmodl { +namespace codegen { + +struct CodeGenConfig { + /// true if serial c code to be generated + bool c_backend = true; + + /// true if c code with openmp to be generated + bool omp_backend = false; + + /// true if ispc code to be generated + bool ispc_backend = false; + + /// true if c code with openacc to be generated + bool oacc_backend = false; + + /// true if cuda code to be generated + bool cuda_backend = false; + + /// true if sympy should be used for solving ODEs analytically + bool sympy_analytic = false; + + /// true if Pade approximation to be used + bool sympy_pade = false; + + /// true if CSE (temp variables) to be used + bool sympy_cse = false; + + /// true if conductance keyword can be added to breakpoint + bool sympy_conductance = false; + + /// true if inlining at nmodl level to be done + bool nmodl_inline = false; + + /// true if unroll at nmodl level to be done + bool nmodl_unroll = false; + + /// true if perform constant folding at nmodl level to be done + bool nmodl_const_folding = false; + + /// true if range variables to be converted to local + bool nmodl_localize = false; + + /// true if global variables to be converted to range + bool nmodl_global_to_range = false; + + /// true if top level local variables to be converted to range + bool nmodl_local_to_range = false; + + /// true if localize variables even if verbatim block is used + bool localize_verbatim = false; + + /// true if local variables to be renamed + bool local_rename = true; + + /// true if inline even if verbatim block exist + bool verbatim_inline = false; + + /// true if verbatim blocks + bool verbatim_rename = true; + + /// true if code generation is forced to happen even if there + /// is any incompatibility + bool force_codegen = false; + + /// true if we want to only check compatibility without generating code + bool only_check_compatibility = false; + + /// true if ion variable copies should be avoided + bool optimize_ionvar_copies_codegen = false; + + /// directory where code will be generated + std::string output_dir = "."; + + /// directory where intermediate file will be generated + std::string scratch_dir = "tmp"; + + /// directory where units lib file is located + std::string units_dir = NrnUnitsLib::get_path(); + + /// floating point data type + std::string data_type = "double"; + + /// true if ast should be converted to nmodl + bool nmodl_ast = false; + + /// true if ast should be converted to json + bool json_ast = false; + + /// true if performance stats should be converted to json + bool json_perfstat = false; + +#ifdef NMODL_LLVM_BACKEND + /// generate llvm IR + bool llvm_ir = false; + + /// use single precision floating-point types + bool llvm_float_type = false; + + /// optimisation level for IR generation + int llvm_opt_level_ir = 0; + + /// math library name + std::string llvm_math_library = "none"; + + /// disable debug information generation for the IR + bool llvm_no_debug = false; + + /// fast math flags for LLVM backend + std::vector llvm_fast_math_flags; + + /// traget CPU platform name + std::string llvm_cpu_name = "default"; + + /// traget GPU platform name + std::string llvm_gpu_name = "default"; + + /// GPU target architecture + std::string llvm_gpu_target_architecture = "sm_70"; + + /// llvm vector width if generating code for CPUs + int llvm_vector_width = 1; + + /// optimisation level for machine code generation + int llvm_opt_level_codegen = 0; + + /// list of shared libraries to link against in JIT + std::vector shared_lib_paths; +#endif +}; + +class CodegenDriver { + public: + explicit CodegenDriver(CodeGenConfig _cfg) + : cfg(std::move(_cfg)) {} + + bool prepare_mod(std::shared_ptr node, const std::string& modfile); + + private: + CodeGenConfig cfg; + + + /// write ast to nmodl + void ast_to_nmodl(ast::Program& ast, const std::string& filepath) const; + void ast_to_json(ast::Program& ast, const std::string& filepath) const; +}; + +} // namespace codegen +} // namespace nmodl diff --git a/src/codegen/codegen_helper_visitor.cpp b/src/codegen/codegen_helper_visitor.cpp index 45a0c5c53f..4e81e1f0a9 100644 --- a/src/codegen/codegen_helper_visitor.cpp +++ b/src/codegen/codegen_helper_visitor.cpp @@ -24,6 +24,7 @@ using namespace ast; using symtab::syminfo::NmodlType; using symtab::syminfo::Status; + /** * How symbols are stored in NEURON? See notes written in markdown file. * @@ -285,6 +286,7 @@ void CodegenHelperVisitor::find_non_range_variables() { // clang-format on } + /** * Find range variables i.e. ones that are belong to per instance allocation * @@ -711,6 +713,9 @@ void CodegenHelperVisitor::visit_program(const ast::Program& node) { find_non_range_variables(); find_table_variables(); find_neuron_global_variables(); + info.get_int_variables(); + info.get_shadow_variables(); + info.get_float_variables(); } diff --git a/src/codegen/codegen_helper_visitor.hpp b/src/codegen/codegen_helper_visitor.hpp index 614f93732e..611eccb788 100644 --- a/src/codegen/codegen_helper_visitor.hpp +++ b/src/codegen/codegen_helper_visitor.hpp @@ -76,6 +76,16 @@ class CodegenHelperVisitor: public visitor::ConstAstVisitor { void find_neuron_global_variables(); static void sort_with_mod2c_symbol_order(std::vector& symbols); + /** + * Check if breakpoint node exist + */ + bool breakpoint_exist() const noexcept; + + /** + * Check if net_receive node exist + */ + bool net_receive_exist() const noexcept; + public: CodegenHelperVisitor() = default; diff --git a/src/codegen/codegen_info.cpp b/src/codegen/codegen_info.cpp index fb96b26e59..dc461d6399 100644 --- a/src/codegen/codegen_info.cpp +++ b/src/codegen/codegen_info.cpp @@ -8,6 +8,7 @@ #include "codegen/codegen_info.hpp" #include "ast/all.hpp" +#include "utils/logger.hpp" #include "visitors/var_usage_visitor.hpp" #include "visitors/visitor_utils.hpp" @@ -15,8 +16,19 @@ namespace nmodl { namespace codegen { +using symtab::syminfo::NmodlType; using visitor::VarUsageVisitor; +SymbolType make_symbol(const std::string& name) { + return std::make_shared(name, ModToken()); +} + + +std::string shadow_varname(const std::string& name) { + return "shadow_" + name; +} + + /// if any ion has write variable bool CodegenInfo::ion_has_write_variable() const { return std::any_of(ions.begin(), ions.end(), [](auto const& ion) { @@ -106,5 +118,303 @@ bool CodegenInfo::is_voltage_used_by_watch_statements() const { }); } +bool CodegenInfo::state_variable(const std::string& name) const { + // clang-format off + auto result = std::find_if(state_vars.begin(), + state_vars.end(), + [&name](const SymbolType& sym) { + return name == sym->get_name(); + } + ); + // clang-format on + return result != state_vars.end(); +} + +std::pair CodegenInfo::read_ion_variable_name( + const std::string& name) const { + return {name, "ion_" + name}; +} + + +std::pair CodegenInfo::write_ion_variable_name( + const std::string& name) const { + return {"ion_" + name, name}; +} + + +/** + * \details Current variable used in breakpoint block could be local variable. + * In this case, neuron has already renamed the variable name by prepending + * "_l". In our implementation, the variable could have been renamed by + * one of the pass. And hence, we search all local variables and check if + * the variable is renamed. Note that we have to look into the symbol table + * of statement block and not breakpoint. + */ +std::string CodegenInfo::breakpoint_current(std::string current) const { + auto& breakpoint = breakpoint_node; + if (breakpoint == nullptr) { + return current; + } + const auto& symtab = breakpoint->get_statement_block()->get_symbol_table(); + const auto& variables = symtab->get_variables_with_properties(NmodlType::local_var); + for (const auto& var: variables) { + std::string renamed_name = var->get_name(); + std::string original_name = var->get_original_name(); + if (current == original_name) { + current = renamed_name; + break; + } + } + return current; +} + + +bool CodegenInfo::is_an_instance_variable(const std::string& varname) const { + /// check if symbol of given name exist + auto check_symbol = [](const std::string& name, const std::vector& symbols) { + for (auto& symbol: symbols) { + if (symbol->get_name() == name) { + return true; + } + } + return false; + }; + + /// check if variable exist into all possible types + if (check_symbol(varname, assigned_vars) || check_symbol(varname, state_vars) || + check_symbol(varname, range_parameter_vars) || check_symbol(varname, range_assigned_vars) || + check_symbol(varname, range_state_vars)) { + return true; + } + return false; +} + + +/** + * IndexVariableInfo has following constructor arguments: + * - symbol + * - is_vdata (false) + * - is_index (false + * - is_integer (false) + * + * Which variables are constant qualified? + * + * - node area is read only + * - read ion variables are read only + * - style_ionname is index / offset + */ +void CodegenInfo::get_int_variables() { + if (point_process) { + codegen_int_variables.emplace_back(make_symbol(naming::NODE_AREA_VARIABLE)); + codegen_int_variables.back().is_constant = true; + /// note that this variable is not printed in neuron implementation + if (artificial_cell) { + codegen_int_variables.emplace_back(make_symbol(naming::POINT_PROCESS_VARIABLE), true); + } else { + codegen_int_variables.emplace_back(make_symbol(naming::POINT_PROCESS_VARIABLE), + false, + false, + true); + codegen_int_variables.back().is_constant = true; + } + } + + for (const auto& ion: ions) { + bool need_style = false; + std::unordered_map ion_vars; // used to keep track of the variables to + // not have doubles between read/write. Same + // name variables are allowed + for (const auto& var: ion.reads) { + const std::string name = naming::ION_VARNAME_PREFIX + var; + codegen_int_variables.emplace_back(make_symbol(name)); + codegen_int_variables.back().is_constant = true; + ion_vars[name] = codegen_int_variables.size() - 1; + } + + /// symbol for di_ion_dv var + std::shared_ptr ion_di_dv_var = nullptr; + + for (const auto& var: ion.writes) { + const std::string name = naming::ION_VARNAME_PREFIX + var; + + const auto ion_vars_it = ion_vars.find(name); + if (ion_vars_it != ion_vars.end()) { + codegen_int_variables[ion_vars_it->second].is_constant = false; + } else { + codegen_int_variables.emplace_back(make_symbol(naming::ION_VARNAME_PREFIX + var)); + } + if (ion.is_ionic_current(var)) { + ion_di_dv_var = make_symbol(std::string(naming::ION_VARNAME_PREFIX) + "di" + + ion.name + "dv"); + } + if (ion.is_intra_cell_conc(var) || ion.is_extra_cell_conc(var)) { + need_style = true; + } + } + + /// insert after read/write variables but before style ion variable + if (ion_di_dv_var != nullptr) { + codegen_int_variables.emplace_back(ion_di_dv_var); + } + + if (need_style) { + codegen_int_variables.emplace_back(make_symbol("style_" + ion.name), false, true); + codegen_int_variables.back().is_constant = true; + } + } + + for (const auto& var: pointer_variables) { + auto name = var->get_name(); + if (var->has_any_property(NmodlType::pointer_var)) { + codegen_int_variables.emplace_back(make_symbol(name)); + } else { + codegen_int_variables.emplace_back(make_symbol(name), true); + } + } + + if (diam_used) { + codegen_int_variables.emplace_back(make_symbol(naming::DIAM_VARIABLE)); + } + + if (area_used) { + codegen_int_variables.emplace_back(make_symbol(naming::AREA_VARIABLE)); + } + + // for non-artificial cell, when net_receive buffering is enabled + // then tqitem is an offset + if (net_send_used) { + if (artificial_cell) { + codegen_int_variables.emplace_back(make_symbol(naming::TQITEM_VARIABLE), true); + } else { + codegen_int_variables.emplace_back(make_symbol(naming::TQITEM_VARIABLE), + false, + false, + true); + codegen_int_variables.back().is_constant = true; + } + tqitem_index = codegen_int_variables.size() - 1; + } + + /** + * \note Variables for watch statements : there is one extra variable + * used in coreneuron compared to actual watch statements for compatibility + * with neuron (which uses one extra Datum variable) + */ + if (!watch_statements.empty()) { + for (int i = 0; i < watch_statements.size() + 1; i++) { + codegen_int_variables.emplace_back(make_symbol(fmt::format("watch{}", i)), + false, + false, + true); + } + } +} + + +/** + * \details When we enable fine level parallelism at channel level, we have do updates + * to ion variables in atomic way. As cpus don't have atomic instructions in + * simd loop, we have to use shadow vectors for every ion variables. Here + * we return list of all such variables. + * + * \todo If conductances are specified, we don't need all below variables + */ +void CodegenInfo::get_shadow_variables() { + for (const auto& ion: ions) { + for (const auto& var: ion.writes) { + codegen_shadow_variables.push_back( + {make_symbol(shadow_varname(naming::ION_VARNAME_PREFIX + var))}); + if (ion.is_ionic_current(var)) { + codegen_shadow_variables.push_back({make_symbol(shadow_varname( + std::string(naming::ION_VARNAME_PREFIX) + "di" + ion.name + "dv"))}); + } + } + } + codegen_shadow_variables.push_back({make_symbol("ml_rhs")}); + codegen_shadow_variables.push_back({make_symbol("ml_d")}); +} + + +void CodegenInfo::get_float_variables() { + // sort with definition order + auto comparator = [](const SymbolType& first, const SymbolType& second) -> bool { + return first->get_definition_order() < second->get_definition_order(); + }; + + auto assigned = assigned_vars; + auto states = state_vars; + + // each state variable has corresponding Dstate variable + for (auto& state: states) { + auto name = "D" + state->get_name(); + auto symbol = make_symbol(name); + if (state->is_array()) { + symbol->set_as_array(state->get_length()); + } + symbol->set_definition_order(state->get_definition_order()); + assigned.push_back(symbol); + } + std::sort(assigned.begin(), assigned.end(), comparator); + + codegen_float_variables = range_parameter_vars; + codegen_float_variables.insert(codegen_float_variables.end(), + range_assigned_vars.begin(), + range_assigned_vars.end()); + codegen_float_variables.insert(codegen_float_variables.end(), + range_state_vars.begin(), + range_state_vars.end()); + codegen_float_variables.insert(codegen_float_variables.end(), assigned.begin(), assigned.end()); + + if (vectorize) { + codegen_float_variables.push_back(make_symbol(naming::VOLTAGE_UNUSED_VARIABLE)); + } + + if (breakpoint_exist()) { + std::string name = vectorize ? naming::CONDUCTANCE_UNUSED_VARIABLE + : naming::CONDUCTANCE_VARIABLE; + + // make sure conductance variable like `g` is not already defined + if (auto r = std::find_if(codegen_float_variables.cbegin(), + codegen_float_variables.cend(), + [&](const auto& s) { return name == s->get_name(); }); + r == codegen_float_variables.cend()) { + codegen_float_variables.push_back(make_symbol(name)); + } + } + + if (net_receive_exist()) { + codegen_float_variables.push_back(make_symbol(naming::T_SAVE_VARIABLE)); + } +} + +/** + * \details Certain statements like unit, comment, solve can/need to be skipped + * during code generation. Note that solve block is wrapped in expression + * statement and hence we have to check inner expression. It's also true + * for the initial block defined inside net receive block. + */ +bool CodegenInfo::statement_to_skip(const ast::Statement& node) const { + // clang-format off + if (node.is_unit_state() + || node.is_line_comment() + || node.is_block_comment() + || node.is_solve_block() + || node.is_conductance_hint() + || node.is_table_statement()) { + return true; + } + // clang-format on + if (node.is_expression_statement()) { + auto expression = dynamic_cast(&node)->get_expression(); + if (expression->is_solve_block()) { + return true; + } + if (expression->is_initial_block()) { + return true; + } + } + return false; +} + } // namespace codegen } // namespace nmodl diff --git a/src/codegen/codegen_info.hpp b/src/codegen/codegen_info.hpp index 9ca2409dbe..fc96db6e17 100644 --- a/src/codegen/codegen_info.hpp +++ b/src/codegen/codegen_info.hpp @@ -16,11 +16,62 @@ #include #include "ast/ast.hpp" +#include "codegen/codegen_naming.hpp" #include "symtab/symbol_table.hpp" namespace nmodl { namespace codegen { +using SymbolType = std::shared_ptr; + +/** + * Creates a temporary symbol + * \param name The name of the symbol + * \return A symbol based on the given name + */ +SymbolType make_symbol(const std::string& name); + +/** + * Constructs a shadow variable name + * \param name The name of the variable + * \return The name of the variable prefixed with \c shadow_ + */ +std::string shadow_varname(const std::string& name); + +/** + * \class IndexVariableInfo + * \brief Helper to represent information about index/int variables + * + */ +struct IndexVariableInfo { + /// symbol for the variable + const std::shared_ptr symbol; + + /// if variable reside in vdata field of NrnThread + /// typically true for bbcore pointer + bool is_vdata = false; + + /// if this is pure index (e.g. style_ion) variables is directly + /// index and shouldn't be printed with data/vdata + bool is_index = false; + + /// if this is an integer (e.g. tqitem, point_process) variable which + /// is printed as array accesses + bool is_integer = false; + + /// if the variable is qualified as constant (this is property of IndexVariable) + bool is_constant = false; + + IndexVariableInfo(std::shared_ptr symbol, + bool is_vdata = false, + bool is_index = false, + bool is_integer = false) + : symbol(std::move(symbol)) + , is_vdata(is_vdata) + , is_index(is_index) + , is_integer(is_integer) {} +}; + /** * @addtogroup codegen_details * @{ @@ -134,6 +185,66 @@ struct IndexSemantics { , size(size) {} }; +/** + * \enum BlockType + * \brief Helper to represent various block types + * + * Note: do not assign integers to these enums + * + */ +enum BlockType { + /// initial block + Initial, + + /// constructor block + Constructor, + + /// destructor block + Destructor, + + /// breakpoint block + Equation, + + /// ode_* routines block (not used) + Ode, + + /// derivative block + State, + + /// watch block + Watch, + + /// net_receive block + NetReceive, + + /// before / after block + BeforeAfter, + + /// fake ending block type for loops on the enums. Keep it at the end + BlockTypeEnd +}; + + +/** + * \class ShadowUseStatement + * \brief Represents ion write statement during code generation + * + * Ion update statement needs use of shadow vectors for certain backends + * as atomics operations are not supported on cpu backend. + * + * \todo Currently `nrn_wrote_conc` is also added to shadow update statements + * list as it's corresponding to ion update statement in INITIAL block. This + * needs to be factored out. + * \todo This can be represented as AST node (like ast::CodegenAtomicStatement) + * but currently C backend use this same implementation. So we are using this + * same structure and then converting to ast::CodegenAtomicStatement for LLVM + * visitor. + */ +struct ShadowUseStatement { + std::string lhs; + std::string op; + std::string rhs; +}; /** * \class CodegenInfo @@ -335,6 +446,15 @@ struct CodegenInfo { /// new one used in print_ion_types std::vector use_ion_variables; + /// all int variables for the model + std::vector codegen_int_variables; + + /// all ion variables that could be possibly written + std::vector codegen_shadow_variables; + + /// all float variables for the model + std::vector codegen_float_variables; + /// this is the order in which they appear in derivative block /// this is required while printing them in initlist function std::vector prime_variables_by_order; @@ -422,8 +542,154 @@ struct CodegenInfo { /// true if WatchStatement uses voltage v variable bool is_voltage_used_by_watch_statements() const; + /** + * Check if net_send_buffer is required + */ + bool net_send_buffer_required() const noexcept { + if (net_receive_required() && !artificial_cell) { + if (net_event_used || net_send_used || is_watch_used()) { + return true; + } + } + return false; + } + + /** + * Check if net receive/send buffering kernels required + */ + bool net_receive_buffering_required() const noexcept { + return point_process && !artificial_cell && net_receive_node != nullptr; + } + + /** + * Check if nrn_state function is required + */ + bool nrn_state_required() const noexcept { + if (artificial_cell) { + return false; + } + return nrn_state_block != nullptr || breakpoint_exist(); + } + + /** + * Check if nrn_cur function is required + */ + bool nrn_cur_required() const noexcept { + return breakpoint_node != nullptr && !currents.empty(); + } + + /** + * Check if net_receive node exist + */ + bool net_receive_exist() const noexcept { + return net_receive_node != nullptr; + } + + /** + * Check if breakpoint node exist + */ + bool breakpoint_exist() const noexcept { + return breakpoint_node != nullptr; + } + + + /** + * Operator for rhs vector update (matrix update) + * + * Note that we only rely on following two syntax for + * increment and decrement. Code generation backends + * are relying on this convention. + */ + std::string operator_for_rhs() const noexcept { + return electrode_current ? "+=" : "-="; + } + + + /** + * Operator for diagonal vector update (matrix update) + * + * Note that we only rely on following two syntax for + * increment and decrement. Code generation backends + * are relying on this convention. + */ + std::string operator_for_d() const noexcept { + return electrode_current ? "-=" : "+="; + } + + /** + * Check if net_receive function is required + */ + bool net_receive_required() const noexcept { + return net_receive_exist(); + } + + /** + * Checks if the given variable name belongs to a state variable + * \param name The variable name + * \return \c true if the variable is a state variable + */ + bool state_variable(const std::string& name) const; + + /** + * Return ion variable name and corresponding ion read variable name + * \param name The ion variable name + * \return The ion read variable name + */ + std::pair read_ion_variable_name(const std::string& name) const; + + /** + * Return ion variable name and corresponding ion write variable name + * \param name The ion variable name + * \return The ion write variable name + */ + std::pair write_ion_variable_name(const std::string& name) const; + + /** + * Determine the variable name for the "current" used in breakpoint block taking into account + * intermediate code transformations. + * \param current The variable name for the current used in the model + * \return The name for the current to be printed in C + */ + std::string breakpoint_current(std::string current) const; + + /** + * Check if variable with given name is an instance variable + * + * Instance varaibles are local to each mechanism instance and + * needs to be accessed with an array index. Such variables are + * assigned, range, parameter+range etc. + * @param varname Name of the variable + * @return True if variable is per mechanism instance + */ + bool is_an_instance_variable(const std::string& varname) const; + /// if we need a call back to wrote_conc in neuron/coreneuron bool require_wrote_conc = false; + + /** + * Determine all \c int variables required during code generation + * \return A \c vector of \c int variables + */ + void get_int_variables(); + + /** + * Determine all ion write variables that require shadow vectors during code generation + * \return A \c vector of ion variables + */ + void get_shadow_variables(); + + /** + * Determine all \c float variables required during code generation + * \return A \c vector of \c float variables + */ + void get_float_variables(); + + /** + * Check if statement should be skipped for code generation + * @param node Statement to be checked for code generation + * @return True if statement should be skipped otherwise false + */ + bool statement_to_skip(const ast::Statement& node) const; }; /** @} */ // end of codegen_backends diff --git a/src/codegen/codegen_ispc_visitor.cpp b/src/codegen/codegen_ispc_visitor.cpp index fa1621178c..e9c482bc66 100644 --- a/src/codegen/codegen_ispc_visitor.cpp +++ b/src/codegen/codegen_ispc_visitor.cpp @@ -245,8 +245,8 @@ void CodegenIspcVisitor::print_atomic_op(const std::string& lhs, void CodegenIspcVisitor::print_nrn_cur_matrix_shadow_reduction() { - auto rhs_op = operator_for_rhs(); - auto d_op = operator_for_d(); + const auto& rhs_op = info.operator_for_rhs(); + const auto& d_op = info.operator_for_d(); if (info.point_process) { printer->add_line("uniform int node_id = node_index[id];"); printer->fmt_line("vec_rhs[node_id] {} shadow_rhs[id];", rhs_op); @@ -435,7 +435,7 @@ void CodegenIspcVisitor::print_ion_variable() { /****************************************************************************************/ void CodegenIspcVisitor::print_net_receive_buffering_wrapper() { - if (!net_receive_required() || info.artificial_cell) { + if (!info.net_receive_required() || info.artificial_cell) { return; } printer->add_newline(2); @@ -509,17 +509,17 @@ void CodegenIspcVisitor::print_backend_compute_routine_decl() { printer->fmt_line("extern \"C\" void {}({});", compute_function, get_parameter_str(params)); } - if (nrn_cur_required() && !emit_fallback[BlockType::Equation]) { + if (info.nrn_cur_required() && !emit_fallback[BlockType::Equation]) { compute_function = compute_method_name(BlockType::Equation); printer->fmt_line("extern \"C\" void {}({});", compute_function, get_parameter_str(params)); } - if (nrn_state_required() && !emit_fallback[BlockType::State]) { + if (info.nrn_state_required() && !emit_fallback[BlockType::State]) { compute_function = compute_method_name(BlockType::State); printer->fmt_line("extern \"C\" void {}({});", compute_function, get_parameter_str(params)); } - if (net_receive_required()) { + if (info.net_receive_required()) { auto net_recv_params = ParamVector(); net_recv_params.emplace_back("", fmt::format("{}*", instance_struct()), "", "inst"); net_recv_params.emplace_back("", "NrnThread*", "", "nt"); @@ -540,7 +540,7 @@ bool CodegenIspcVisitor::check_incompatibilities() { }; // instance vars - if (check_incompatible_var_name(codegen_float_variables, + if (check_incompatible_var_name(info.codegen_float_variables, get_name_from_symbol_type_vector)) { return true; } @@ -607,11 +607,11 @@ bool CodegenIspcVisitor::check_incompatibilities() { visitor::calls_function(*info.net_receive_node, "net_send"))); emit_fallback[BlockType::Equation] = emit_fallback[BlockType::Equation] || - (nrn_cur_required() && info.breakpoint_node && + (info.nrn_cur_required() && info.breakpoint_node && has_incompatible_nodes(*info.breakpoint_node)); emit_fallback[BlockType::State] = emit_fallback[BlockType::State] || - (nrn_state_required() && info.nrn_state_block && + (info.nrn_state_required() && info.nrn_state_block && has_incompatible_nodes(*info.nrn_state_block)); @@ -668,7 +668,7 @@ void CodegenIspcVisitor::print_block_wrappers_initial_equation_state() { print_wrapper_routine(naming::NRN_INIT_METHOD, BlockType::Initial); } - if (nrn_cur_required()) { + if (info.nrn_cur_required()) { if (emit_fallback[BlockType::Equation]) { logger->warn("Falling back to C backend for emitting breakpoint block"); fallback_codegen.print_nrn_cur(); @@ -677,7 +677,7 @@ void CodegenIspcVisitor::print_block_wrappers_initial_equation_state() { } } - if (nrn_state_required()) { + if (info.nrn_state_required()) { if (emit_fallback[BlockType::State]) { logger->warn("Falling back to C backend for emitting state block"); fallback_codegen.print_nrn_state(); diff --git a/src/codegen/codegen_ispc_visitor.hpp b/src/codegen/codegen_ispc_visitor.hpp index f97d0085c6..4e35819962 100644 --- a/src/codegen/codegen_ispc_visitor.hpp +++ b/src/codegen/codegen_ispc_visitor.hpp @@ -168,7 +168,7 @@ class CodegenIspcVisitor: public CodegenCVisitor { void print_procedure(const ast::ProcedureBlock& node) override; - void print_backend_compute_routine_decl(); + void print_backend_compute_routine_decl() override; /// print wrapper function that calls ispc kernel diff --git a/src/codegen/codegen_naming.hpp b/src/codegen/codegen_naming.hpp index 9739285bc4..cdd6f73d6d 100644 --- a/src/codegen/codegen_naming.hpp +++ b/src/codegen/codegen_naming.hpp @@ -80,6 +80,9 @@ static constexpr char VOLTAGE_UNUSED_VARIABLE[] = "v_unused"; /// variable t indicating last execution time of net receive block static constexpr char T_SAVE_VARIABLE[] = "tsave"; +/// global variable second_order +static constexpr char SECOND_ORDER_VARIABLE[] = "secondorder"; + /// shadow rhs variable in neuron thread structure static constexpr char NTHREAD_RHS_SHADOW[] = "_shadow_rhs"; @@ -92,6 +95,12 @@ static constexpr char CELSIUS_VARIABLE[] = "celsius"; /// instance struct member pointing to the global variable structure static constexpr char INST_GLOBAL_MEMBER[] = "global"; +/// rhs variable in neuron thread structure +static constexpr char NTHREAD_RHS[] = "vec_rhs"; + +/// d variable in neuron thread structure +static constexpr char NTHREAD_D[] = "vec_d"; + /// t variable in neuron thread structure static constexpr char NTHREAD_T_VARIABLE[] = "t"; @@ -164,6 +173,12 @@ static constexpr char THREAD_ARGS_PROTO[] = "_threadargsproto_"; /// prefix for ion variable static constexpr char ION_VARNAME_PREFIX[] = "ion_"; +/// name of the mechanism instance parameter in LLVM IR +static constexpr char MECH_INSTANCE_VAR[] = "mech"; +static constexpr char MECH_NODECOUNT_VAR[] = "node_count"; + +/// name of induction variable used in the kernel. +static constexpr char INDUCTION_VAR[] = "id"; /// commonly used variables in verbatim block and how they /// should be mapped to new code generation backends diff --git a/src/codegen/llvm/CMakeLists.txt b/src/codegen/llvm/CMakeLists.txt new file mode 100644 index 0000000000..828e48fb80 --- /dev/null +++ b/src/codegen/llvm/CMakeLists.txt @@ -0,0 +1,58 @@ +# ============================================================================= +# Codegen sources +# ============================================================================= +set(LLVM_CODEGEN_SOURCE_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/annotation.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/annotation.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_visitor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_visitor.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_helper_visitor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/codegen_llvm_helper_visitor.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/llvm_debug_builder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/llvm_debug_builder.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/llvm_ir_builder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/llvm_ir_builder.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/llvm_utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/llvm_utils.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/replace_with_lib_functions.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/replace_with_lib_functions.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/target_platform.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/target_platform.hpp) + +# ============================================================================= +# LLVM codegen library and executable +# ============================================================================= + +include_directories(${LLVM_INCLUDE_DIRS}) +add_library(llvm_codegen OBJECT ${LLVM_CODEGEN_SOURCE_FILES}) +set_property(TARGET llvm_codegen PROPERTY POSITION_INDEPENDENT_CODE ON) + +add_dependencies(llvm_codegen lexer util visitor) +target_link_libraries(llvm_codegen PRIVATE util) + +if(NOT NMODL_AS_SUBPROJECT) + add_executable(nmodl_llvm_runner main.cpp) + + target_link_libraries( + nmodl_llvm_runner + CLI11::CLI11 + llvm_benchmark + llvm_codegen + codegen + visitor + symtab + lexer + util + test_util + printer + ${NMODL_WRAPPER_LIBS} + ${LLVM_LIBS_TO_LINK}) +endif() + +# ============================================================================= +# Install executable +# ============================================================================= + +if(NOT NMODL_AS_SUBPROJECT) + install(TARGETS nmodl_llvm_runner DESTINATION ${NMODL_INSTALL_DIR_SUFFIX}bin) +endif() diff --git a/src/codegen/llvm/annotation.cpp b/src/codegen/llvm/annotation.cpp new file mode 100644 index 0000000000..1427b8cd1a --- /dev/null +++ b/src/codegen/llvm/annotation.cpp @@ -0,0 +1,105 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include "codegen/llvm/annotation.hpp" +#include "codegen/llvm/target_platform.hpp" + +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" + +static constexpr const char nmodl_annotations[] = "nmodl.annotations"; +static constexpr const char nmodl_compute_kernel[] = "nmodl.compute-kernel"; + +namespace nmodl { +namespace custom { + +void Annotator::add_nmodl_compute_kernel_annotation(llvm::Function& function) { + llvm::LLVMContext& context = function.getContext(); + llvm::MDNode* node = llvm::MDNode::get(context, + llvm::MDString::get(context, nmodl_compute_kernel)); + function.setMetadata(nmodl_annotations, node); +} + +bool Annotator::has_nmodl_compute_kernel_annotation(llvm::Function& function) { + if (!function.hasMetadata(nmodl_annotations)) + return false; + + llvm::MDNode* node = function.getMetadata(nmodl_annotations); + std::string type = llvm::cast(node->getOperand(0))->getString().str(); + return type == nmodl_compute_kernel; +} + +void DefaultCPUAnnotator::annotate(llvm::Function& function) const { + // By convention, the compute kernel does not free memory and does not + // throw exceptions. + function.setDoesNotFreeMemory(); + function.setDoesNotThrow(); + + // We also want to specify that the pointers that instance struct holds + // do not alias, unless specified otherwise. In order to do that, we + // add a `noalias` attribute to the argument. As per Clang's + // specification: + // > The `noalias` attribute indicates that the only memory accesses + // > inside function are loads and stores from objects pointed to by + // > its pointer-typed arguments, with arbitrary offsets. + function.addParamAttr(0, llvm::Attribute::NoAlias); + + // Finally, specify that the mechanism data struct pointer does not + // capture and is read-only. + function.addParamAttr(0, llvm::Attribute::NoCapture); + function.addParamAttr(0, llvm::Attribute::ReadOnly); +} + +void CUDAAnnotator::annotate(llvm::Function& function) const { + llvm::LLVMContext& context = function.getContext(); + llvm::Module* m = function.getParent(); + + auto one = llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 1); + llvm::Metadata* metadata[] = {llvm::ValueAsMetadata::get(&function), + llvm::MDString::get(context, "kernel"), + llvm::ValueAsMetadata::get(one)}; + llvm::MDNode* node = llvm::MDNode::get(context, metadata); + + m->getOrInsertNamedMetadata("nvvm.annotations")->addOperand(node); +} +} // namespace custom +} // namespace nmodl + +using nmodl::custom::Annotator; +namespace llvm { + +char AnnotationPass::ID = 0; + +bool AnnotationPass::runOnModule(Module& module) { + bool modified = false; + + for (auto& function: module.getFunctionList()) { + if (!function.isDeclaration() && Annotator::has_nmodl_compute_kernel_annotation(function)) { + annotator->annotate(function); + modified = true; + } + } + + return modified; +} + +void AnnotationPass::getAnalysisUsage(AnalysisUsage& au) const { + au.setPreservesCFG(); + au.addPreserved(); + au.addPreserved(); + au.addPreserved(); + au.addPreserved(); + au.addPreserved(); + au.addPreserved(); +} +} // namespace llvm diff --git a/src/codegen/llvm/annotation.hpp b/src/codegen/llvm/annotation.hpp new file mode 100644 index 0000000000..b7349a147f --- /dev/null +++ b/src/codegen/llvm/annotation.hpp @@ -0,0 +1,77 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" + +namespace nmodl { +namespace custom { + +/** + * \class Annotator + * \brief Base class that can be overriden to specify function annotations. + */ +class Annotator { + public: + virtual void annotate(llvm::Function& function) const = 0; + virtual ~Annotator() = default; + + /// Marks LLVM function as NMODL compute kernel. + static void add_nmodl_compute_kernel_annotation(llvm::Function& function); + + /// Returns true if LLVM function is marked as NMODL compute kernel. + static bool has_nmodl_compute_kernel_annotation(llvm::Function& function); +}; + +/** + * \class DefaultAnnotator + * \brief Specifies how LLVM IR functions for CPU platforms are annotated. Used + * by default. + */ +class DefaultCPUAnnotator: public Annotator { + public: + void annotate(llvm::Function& function) const override; +}; + +/** + * \class CUDAAnnotator + * \brief Specifies how LLVM IR functions for CUDA platforms are annotated. This + * includes marking functions with "kernel" or "device" attributes. + */ +class CUDAAnnotator: public Annotator { + public: + void annotate(llvm::Function& function) const override; +}; +} // namespace custom +} // namespace nmodl + +using nmodl::custom::Annotator; +namespace llvm { + +/** + * \class AnnotationPass + * \brief LLVM module pass that annotates NMODL compute kernels. + */ +class AnnotationPass: public ModulePass { + private: + // Underlying annotator that is applied to each LLVM function. + const Annotator* annotator; + + public: + static char ID; + + AnnotationPass(Annotator* annotator) + : ModulePass(ID) + , annotator(annotator) {} + + bool runOnModule(Module& module) override; + + void getAnalysisUsage(AnalysisUsage& au) const override; +}; +} // namespace llvm diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.cpp b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp new file mode 100644 index 0000000000..af28ee3e2e --- /dev/null +++ b/src/codegen/llvm/codegen_llvm_helper_visitor.cpp @@ -0,0 +1,1128 @@ + +/************************************************************************* + * Copyright (C) 2018-2019 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include "codegen_llvm_helper_visitor.hpp" + +#include "ast/all.hpp" +#include "codegen/codegen_helper_visitor.hpp" +#include "symtab/symbol_table.hpp" +#include "utils/logger.hpp" +#include "visitors/rename_visitor.hpp" +#include "visitors/visitor_utils.hpp" + +namespace nmodl { +namespace codegen { + +using symtab::syminfo::Status; + +/// initialize static member variables +const ast::AstNodeType CodegenLLVMHelperVisitor::INTEGER_TYPE = ast::AstNodeType::INTEGER; +const std::string CodegenLLVMHelperVisitor::NODECOUNT_VAR = "node_count"; +const std::string CodegenLLVMHelperVisitor::VOLTAGE_VAR = "voltage"; +const std::string CodegenLLVMHelperVisitor::NODE_INDEX_VAR = "node_index"; + +static constexpr const char epilogue_variable_prefix[] = "epilogue_"; + +/// Create asr::Varname node with given a given variable name +static ast::VarName* create_varname(const std::string& varname) { + return new ast::VarName(new ast::Name(new ast::String(varname)), nullptr, nullptr); +} + +/** + * Create initialization expression + * @param code Usually "id = 0" as a string + * @return Expression representing code + * \todo : we can not use `create_statement_as_expression` function because + * NMODL parser is using `ast::Double` type to represent all variables + * including Integer. See #542. + */ +static std::shared_ptr int_initialization_expression( + const std::string& induction_var, + int value = 0) { + // create id = 0 + const auto& id = create_varname(induction_var); + const auto& zero = new ast::Integer(value, nullptr); + return std::make_shared(id, ast::BinaryOperator(ast::BOP_ASSIGN), zero); +} + +/** + * \brief Create variable definition statement + * + * `LOCAL` variables in NMODL don't have type. These variables need + * to be defined with float type. Same for index, loop iteration and + * local variables. This helper function function is used to create + * type specific local variable. + * + * @param names Name of the variables to be defined + * @param type Type of the variables + * @return Statement defining variables + */ +static std::shared_ptr create_local_variable_statement( + const std::vector& names, + ast::AstNodeType type) { + /// create variables for the given name + ast::CodegenVarVector variables; + for (const auto& name: names) { + auto varname = new ast::Name(new ast::String(name)); + variables.emplace_back(new ast::CodegenVar(0, varname)); + } + auto var_type = new ast::CodegenVarType(type); + /// construct statement and return it + return std::make_shared(var_type, variables); +} + +/** + * \brief Create expression for a given NMODL code statement + * @param code NMODL code statement + * @return Expression representing given NMODL code + */ +static std::shared_ptr create_statement_as_expression(const std::string& code) { + const auto& statement = visitor::create_statement(code); + auto expr_statement = std::dynamic_pointer_cast(statement); + auto expr = expr_statement->get_expression()->clone(); + return std::make_shared(expr); +} + +/** + * \brief Create an expression for a given NMODL expression in string form + * @param code NMODL code expression + * @return Expression ast node representing NMODL code + */ +static ast::Expression* create_expression(const std::string& code) { + /// as provided code is only expression and not a full statement, create + /// a temporary assignment statement + const auto& wrapped_expr = create_statement_as_expression("some_var = " + code); + /// now extract RHS (representing original code) and return it as expression + auto expr = std::dynamic_pointer_cast(wrapped_expr)->get_expression(); + auto rhs = std::dynamic_pointer_cast(expr)->get_rhs(); + return new ast::WrappedExpression(rhs->clone()); +} + +CodegenFunctionVector CodegenLLVMHelperVisitor::get_codegen_functions(const ast::Program& node) { + const_cast(node).accept(*this); + return codegen_functions; +} + +/** + * \brief Add code generation function for FUNCTION or PROCEDURE block + * @param node AST node representing FUNCTION or PROCEDURE + * + * When we have a PROCEDURE or FUNCTION like + * + * \code{.mod} + * FUNCTION sum(x,y) { + * LOCAL res + * res = x + y + * sum = res + * } + * \endcode + * + * this gets typically converted to C/C++ code as: + * + * \code{.cpp} + * double sum(double x, double y) { + * double res; + * double ret_sum; + * res = x + y; + * ret_sum = res; + * return ret_sum; + * \endcode + * + * We perform following transformations so that code generation backends + * will have minimum logic: + * - Add type for the function arguments + * - Define variables and return variable + * - Add return type (int for PROCEDURE and double for FUNCTION) + */ +void CodegenLLVMHelperVisitor::create_function_for_node(ast::Block& node) { + /// name of the function from the node + std::string function_name = node.get_node_name(); + auto name = new ast::Name(new ast::String(function_name)); + + /// return variable name has "ret_" prefix + std::string return_var_name = fmt::format("ret_{}", function_name); + auto return_var = new ast::Name(new ast::String(return_var_name)); + + /// return type based on node type + ast::CodegenVarType* ret_var_type = nullptr; + if (node.get_node_type() == ast::AstNodeType::FUNCTION_BLOCK) { + ret_var_type = new ast::CodegenVarType(fp_type); + } else { + ret_var_type = new ast::CodegenVarType(INTEGER_TYPE); + } + + /// function body and it's statement, copy original block + auto block = node.get_statement_block()->clone(); + const auto& statements = block->get_statements(); + + /// convert local statement to codegenvar statement + convert_local_statement(*block); + + if (node.get_node_type() == ast::AstNodeType::PROCEDURE_BLOCK) { + block->insert_statement(statements.begin(), + std::make_shared( + int_initialization_expression(return_var_name))); + } + /// insert return variable at the start of the block + ast::CodegenVarVector codegen_vars; + codegen_vars.emplace_back(new ast::CodegenVar(0, return_var->clone())); + auto statement = std::make_shared(ret_var_type, codegen_vars); + block->insert_statement(statements.begin(), statement); + + /// add return statement + auto return_statement = new ast::CodegenReturnStatement(return_var); + block->emplace_back_statement(return_statement); + + /// prepare function arguments based original node arguments + ast::CodegenVarWithTypeVector arguments; + for (const auto& param: node.get_parameters()) { + /// create new type and name for creating new ast node + auto type = new ast::CodegenVarType(fp_type); + auto var = param->get_name()->clone(); + arguments.emplace_back(new ast::CodegenVarWithType(type, /*is_pointer=*/0, var)); + } + + /// return type of the function is same as return variable type + ast::CodegenVarType* fun_ret_type = ret_var_type->clone(); + + /// we have all information for code generation function, create a new node + /// which will be inserted later into AST + auto function = std::make_shared( + fun_ret_type, name, arguments, block, /*is_kernel=*/0); + if (node.get_token()) { + function->set_token(*node.get_token()->clone()); + } + codegen_functions.push_back(function); +} + +/** + * \note : Order of variables is not important but we assume all pointers + * are added first and then scalar variables like t, dt, second_order etc. + * This order is assumed when we allocate data for integration testing + * and benchmarking purpose. See CodegenDataHelper::create_data(). + */ +std::shared_ptr CodegenLLVMHelperVisitor::create_instance_struct() { + ast::CodegenVarWithTypeVector codegen_vars; + + auto add_var_with_type = + [&](const std::string& name, const ast::AstNodeType type, int is_pointer) { + auto var_name = new ast::Name(new ast::String(name)); + auto var_type = new ast::CodegenVarType(type); + auto codegen_var = new ast::CodegenVarWithType(var_type, is_pointer, var_name); + codegen_vars.emplace_back(codegen_var); + }; + + /// float variables are standard pointers to float vectors + for (const auto& float_var: info.codegen_float_variables) { + add_var_with_type(float_var->get_name(), fp_type, /*is_pointer=*/1); + } + + /// int variables are pointers to indexes for other vectors + for (const auto& int_var: info.codegen_int_variables) { + add_var_with_type(int_var.symbol->get_name(), fp_type, /*is_pointer=*/1); + } + + // for integer variables, there should be index + for (const auto& int_var: info.codegen_int_variables) { + std::string var_name = int_var.symbol->get_name() + "_index"; + add_var_with_type(var_name, INTEGER_TYPE, /*is_pointer=*/1); + } + + // add voltage and node index + add_var_with_type(VOLTAGE_VAR, fp_type, /*is_pointer=*/1); + add_var_with_type(NODE_INDEX_VAR, INTEGER_TYPE, /*is_pointer=*/1); + + // As we do not have `NrnThread` object as an argument, we store points to rhs + // and d to in the instance struct as well. Also need their respective shadow variables + // in case of point process mechanism. + // Note: shadow variables are not used at the moment because reduction will be taken care + // by LLVM backend (even on CPU via sequential add like ISPC). + add_var_with_type(naming::NTHREAD_RHS, fp_type, /*is_pointer=*/1); + add_var_with_type(naming::NTHREAD_D, fp_type, /*is_pointer=*/1); + add_var_with_type(naming::NTHREAD_RHS_SHADOW, fp_type, /*is_pointer=*/1); + add_var_with_type(naming::NTHREAD_D_SHADOW, fp_type, /*is_pointer=*/1); + + // NOTE: All the pointer variables should be declared before the scalar variables otherwise + // the allocation of memory for the variables in the InstanceStruct and their offsets will be + // wrong + + // add dt, t, celsius + add_var_with_type(naming::NTHREAD_T_VARIABLE, fp_type, /*is_pointer=*/0); + add_var_with_type(naming::NTHREAD_DT_VARIABLE, fp_type, /*is_pointer=*/0); + add_var_with_type(naming::CELSIUS_VARIABLE, fp_type, /*is_pointer=*/0); + add_var_with_type(naming::SECOND_ORDER_VARIABLE, INTEGER_TYPE, /*is_pointer=*/0); + add_var_with_type(naming::MECH_NODECOUNT_VAR, INTEGER_TYPE, /*is_pointer=*/0); + + return std::make_shared(codegen_vars); +} + +/** + * Append all code specific statements from StatementBlock to given StatementVector + * @param statements Statement vector to which statements to be added + * @param block Statement block from which statetments should be appended + * @param info CodegenInfo object with necessary data and helper functions + */ +static void append_statements_from_block(ast::StatementVector& statements, + const std::shared_ptr block, + const codegen::CodegenInfo& info) { + for (const auto& statement: block->get_statements()) { + if (!info.statement_to_skip(*statement)) { + statements.emplace_back(statement->clone()); + } + } +} + +/** + * Create atomic statement for given expression of the form a[i] += expression + * @param var Name of the variable on the LHS (it's an array), e.g. `a` + * @param var_index Name of the index variable to access variable `var` e.g. `i` + * @param op_str Operators like =, += or -= + * @param rhs_str expression that will be added or subtracted from `var[var_index]` + * @return A statement representing atomic operation using `ast::CodegenAtomicStatement` + */ +static std::shared_ptr create_atomic_statement( + const std::string& var, + const std::string& var_index, + const std::string& op_str, + const std::string& rhs_str) { + // create lhs expression + auto varname = new ast::Name(new ast::String(var)); + auto index = new ast::Name(new ast::String(var_index)); + auto lhs = new ast::VarName(new ast::IndexedName(varname, index), + /*at=*/nullptr, + /*index=*/nullptr); + + auto op = ast::BinaryOperator(ast::string_to_binaryop(op_str)); + auto rhs = create_expression(rhs_str); + return std::make_shared(lhs, op, rhs); +} + +/** + * For a given block type, add read ion statements + * + * Depending upon the block type, we have to update read ion variables + * during code generation. Depending on block/procedure being printed, + * this method adds necessary read ion variable statements and also + * corresponding index calculation statements. Note that index statements + * are added separately at the beginning for just readability purpose. + * + * @param type The type of code block being generated + * @param int_variables Index variables to be created + * @param double_variables Floating point variables to be created + * @param index_statements Statements for loading indexes (typically for ions, rhs, d) + * @param body_statements main compute/update statements + * + * \todo After looking into mod2c and neuron implementation, it seems like + * Ode block type is not used. Need to look into implementation details. + * + * \todo Ion copy optimization is not implemented yet. This is currently + * implemented in C backend using `ion_read_statements_optimized()`. + */ +void CodegenLLVMHelperVisitor::ion_read_statements(BlockType type, + std::vector& int_variables, + std::vector& double_variables, + ast::StatementVector& index_statements, + ast::StatementVector& body_statements) { + /// create read ion and corresponding index statements + auto create_read_statements = [&](std::pair variable_names) { + // variable in current mechanism instance + std::string& varname = variable_names.first; + // ion variable to be read + std::string& ion_varname = variable_names.second; + // index for reading ion variable + std::string index_varname = fmt::format("{}_id", varname); + // first load the index + std::string index_statement = fmt::format("{} = {}_index[id]", index_varname, ion_varname); + // now assign the value + std::string read_statement = + fmt::format("{} = {}[{}]", varname, ion_varname, index_varname); + // push index definition, index statement and actual read statement + int_variables.push_back(index_varname); + index_statements.push_back(visitor::create_statement(index_statement)); + body_statements.push_back(visitor::create_statement(read_statement)); + }; + + /// iterate over all ions and create statements for given block type + for (const auto& ion: info.ions) { + const std::string& name = ion.name; + for (const auto& var: ion.reads) { + if (type == BlockType::Ode && ion.is_ionic_conc(var) && info.state_variable(var)) { + continue; + } + auto variable_names = info.read_ion_variable_name(var); + create_read_statements(variable_names); + } + for (const auto& var: ion.writes) { + if (type == BlockType::Ode && ion.is_ionic_conc(var) && info.state_variable(var)) { + continue; + } + if (ion.is_ionic_conc(var)) { + auto variable_names = info.read_ion_variable_name(var); + create_read_statements(variable_names); + } + } + } +} + +/** + * For a given block type, add write ion statements + * + * Depending upon the block type, we have to update write ion variables + * during code generation. Depending on block/procedure being printed, + * this method adds necessary write ion variable statements and also + * corresponding index calculation statements. Note that index statements + * are added separately at the beginning for just readability purpose. + * + * @param type The type of code block being generated + * @param int_variables Index variables to be created + * @param double_variables Floating point variables to be created + * @param index_statements Statements for loading indexes (typically for ions) + * @param body_statements main compute/update statements + * + * \todo If intra or extra cellular ionic concentration is written + * then it requires call to `nrn_wrote_conc`. In C backend this is + * implemented in `ion_write_statements()` itself but this is not + * handled yet. + */ +void CodegenLLVMHelperVisitor::ion_write_statements(BlockType type, + std::vector& int_variables, + std::vector& double_variables, + ast::StatementVector& index_statements, + ast::StatementVector& body_statements) { + /// create write ion and corresponding index statements + auto create_write_statements = [&](std::string ion_varname, std::string op, std::string rhs) { + // index for writing ion variable + std::string index_varname = fmt::format("{}_id", ion_varname); + // load index + std::string index_statement = fmt::format("{} = {}_index[id]", index_varname, ion_varname); + // push index definition, index statement and actual write statement + int_variables.push_back(index_varname); + index_statements.push_back(visitor::create_statement(index_statement)); + + // pass ion variable to write and its index + body_statements.push_back(create_atomic_statement(ion_varname, index_varname, op, rhs)); + }; + + /// iterate over all ions and create write ion statements for given block type + for (const auto& ion: info.ions) { + std::string concentration; + std::string name = ion.name; + for (const auto& var: ion.writes) { + auto variable_names = info.write_ion_variable_name(var); + /// ionic currents are accumulated + if (ion.is_ionic_current(var)) { + if (type == BlockType::Equation) { + std::string current = info.breakpoint_current(var); + std::string lhs = variable_names.first; + std::string op = "+="; + std::string rhs = current; + // for synapse type + if (info.point_process) { + auto area = codegen::naming::NODE_AREA_VARIABLE; + rhs += fmt::format("*(1.e2/{0}[{0}_id])", area); + } + create_write_statements(lhs, op, rhs); + } + } else { + if (!ion.is_rev_potential(var)) { + concentration = var; + } + std::string lhs = variable_names.first; + std::string op = "="; + std::string rhs = variable_names.second; + create_write_statements(lhs, op, rhs); + } + } + + /// still need to handle, need to define easy to use API + if (type == BlockType::Initial && !concentration.empty()) { + int index = 0; + if (ion.is_intra_cell_conc(concentration)) { + index = 1; + } else if (ion.is_extra_cell_conc(concentration)) { + index = 2; + } else { + /// \todo Unhandled case also in neuron implementation + throw std::logic_error(fmt::format("codegen error for {} ion", ion.name)); + } + std::string ion_type_name = fmt::format("{}_type", ion.name); + std::string lhs = fmt::format("int {}", ion_type_name); + std::string op = "="; + std::string rhs = ion_type_name; + create_write_statements(lhs, op, rhs); + logger->error("conc_write_statement() call is required but it's not supported"); + } + } +} + +/** + * Convert variables in given node to instance variables + * + * For code generation, variables of type range, assigned, state or parameter+range + * needs to be converted to instance variable i.e. they need to be accessed with + * loop index variable. For example, `h` variables needs to be converted to `h[id]`. + * + * @param node Ast node under which variables to be converted to instance type + */ +void CodegenLLVMHelperVisitor::convert_to_instance_variable(ast::Node& node, + const std::string& index_var) { + /// collect all variables in the node of type ast::VarName + auto variables = collect_nodes(node, {ast::AstNodeType::VAR_NAME}); + for (const auto& v: variables) { + auto variable = std::dynamic_pointer_cast(v); + auto variable_name = variable->get_node_name(); + + /// all instance variables defined in the mod file should be converted to + /// indexed variables based on the loop iteration variable + if (info.is_an_instance_variable(variable_name)) { + auto name = variable->get_name()->clone(); + auto index = new ast::Name(new ast::String(index_var)); + auto indexed_name = std::make_shared(name, index); + variable->set_name(indexed_name); + } + + /// instance_var_helper check of instance variables from mod file as well + /// as extra variables like ion index variables added for code generation + if (instance_var_helper.is_an_instance_variable(variable_name)) { + auto name = new ast::Name(new ast::String(naming::MECH_INSTANCE_VAR)); + auto var = std::make_shared(name, variable->clone()); + variable->set_name(var); + } + } +} + +/** + * \brief Visit StatementBlock and convert Local statement for code generation + * @param node AST node representing Statement block + * + * Statement blocks can have LOCAL statement and if it exist it's typically + * first statement in the vector. We have to remove LOCAL statement and convert + * it to CodegenVarListStatement that will represent all variables as double. + */ +void CodegenLLVMHelperVisitor::convert_local_statement(ast::StatementBlock& node) { + /// collect all local statement block + const auto& statements = collect_nodes(node, {ast::AstNodeType::LOCAL_LIST_STATEMENT}); + + /// iterate over all statements and replace each with codegen variable + for (const auto& statement: statements) { + const auto& local_statement = std::dynamic_pointer_cast(statement); + + /// create codegen variables from local variables + /// clone variable to make new independent statement + ast::CodegenVarVector variables; + for (const auto& var: local_statement->get_variables()) { + variables.emplace_back(new ast::CodegenVar(0, var->get_name()->clone())); + } + + /// remove local list statement now + std::unordered_set to_delete({local_statement.get()}); + /// local list statement is enclosed in statement block + const auto& parent_node = dynamic_cast(local_statement->get_parent()); + parent_node->erase_statement(to_delete); + + /// create new codegen variable statement and insert at the beginning of the block + auto type = new ast::CodegenVarType(fp_type); + auto new_statement = std::make_shared(type, variables); + const auto& statements = parent_node->get_statements(); + parent_node->insert_statement(statements.begin(), new_statement); + } +} + +/** + * \brief Visit StatementBlock and rename all LOCAL variables + * @param node AST node representing Statement block + * + * Statement block in remainder loop will have same LOCAL variables from + * main loop. In order to avoid conflict during lookup, rename each local + * variable by appending unique number. The number used as suffix is just + * a counter used for Statement block. + */ +void CodegenLLVMHelperVisitor::rename_local_variables(ast::StatementBlock& node) { + /// local block counter just to append unique number + static int local_block_counter = 1; + + /// collect all local statement block + const auto& statements = collect_nodes(node, {ast::AstNodeType::LOCAL_LIST_STATEMENT}); + + /// iterate over each statement and rename all variables + for (const auto& statement: statements) { + const auto& local_statement = std::dynamic_pointer_cast(statement); + + /// rename local variable in entire statement block + for (auto& var: local_statement->get_variables()) { + std::string old_name = var->get_node_name(); + std::string new_name = fmt::format("{}_{}", old_name, local_block_counter); + visitor::RenameVisitor(old_name, new_name).visit_statement_block(node); + } + } + + /// make it unique for next statement block + local_block_counter++; +} + + +void CodegenLLVMHelperVisitor::visit_procedure_block(ast::ProcedureBlock& node) { + node.visit_children(*this); + create_function_for_node(node); +} + +void CodegenLLVMHelperVisitor::visit_function_block(ast::FunctionBlock& node) { + node.visit_children(*this); + create_function_for_node(node); +} + +std::shared_ptr CodegenLLVMHelperVisitor::loop_initialization_expression( + const std::string& induction_var, + bool is_remainder_loop) { + if (platform.is_gpu()) { + const auto& id = create_varname(induction_var); + const auto& tid = new ast::CodegenThreadId(); + return std::make_shared(id, + ast::BinaryOperator(ast::BOP_ASSIGN), + tid); + } + + // Otherwise, platfrom is CPU. Since the loop can be a remainder loop, check if + // we need to initialize at all. + if (is_remainder_loop) + return nullptr; + return int_initialization_expression(induction_var); +} + +std::shared_ptr CodegenLLVMHelperVisitor::loop_increment_expression( + const std::string& induction_var, + bool is_remainder_loop) { + const auto& id = create_varname(induction_var); + + // For GPU platforms, increment by grid stride. + if (platform.is_gpu()) { + const auto& stride = new ast::CodegenGridStride(); + const auto& inc_expr = + new ast::BinaryExpression(id, ast::BinaryOperator(ast::BOP_ADDITION), stride); + return std::make_shared(id->clone(), + ast::BinaryOperator(ast::BOP_ASSIGN), + inc_expr); + } + + // Otherwise, proceed with increment for CPU loop. + const int width = is_remainder_loop ? 1 : platform.get_instruction_width(); + const auto& inc = new ast::Integer(width, nullptr); + const auto& inc_expr = + new ast::BinaryExpression(id, ast::BinaryOperator(ast::BOP_ADDITION), inc); + return std::make_shared(id->clone(), + ast::BinaryOperator(ast::BOP_ASSIGN), + inc_expr); +} + +std::shared_ptr CodegenLLVMHelperVisitor::loop_count_expression( + const std::string& induction_var, + const std::string& node_count, + bool is_remainder_loop) { + const int width = is_remainder_loop ? 1 : platform.get_instruction_width(); + const auto& id = create_varname(induction_var); + const auto& mech_node_count = create_varname(node_count); + + // For non-vectorised loop, the condition is id < mech->node_count + if (width == 1) { + return std::make_shared(id->clone(), + ast::BinaryOperator(ast::BOP_LESS), + mech_node_count); + } + + // For vectorised loop, the condition is id < mech->node_count - width + 1 + const auto& remainder = new ast::Integer(width - 1, /*macro=*/nullptr); + const auto& count = new ast::BinaryExpression(mech_node_count, + ast::BinaryOperator(ast::BOP_SUBTRACTION), + remainder); + return std::make_shared(id->clone(), + ast::BinaryOperator(ast::BOP_LESS), + count); +} + +/** + * \brief Convert ast::NrnStateBlock to corresponding code generation function nrn_state + * @param node AST node representing ast::NrnStateBlock + * + * Solver passes converts DERIVATIVE block from MOD into ast::NrnStateBlock node + * that represent `nrn_state` function in the generated CPP code. To help this + * code generation, we perform various transformation on ast::NrnStateBlock and + * create new code generation function. + */ +void CodegenLLVMHelperVisitor::visit_nrn_state_block(ast::NrnStateBlock& node) { + // create vectors of local variables that would be used in compute part + std::vector int_variables{"node_id"}; + std::vector double_variables{"v"}; + + // statements to load indexes for gather/scatter like variables + ast::StatementVector index_statements; + + // statements for the main body of nrn_state + ast::StatementVector body_statements; + + // prepare main body of the compute function + { + /// access node index and corresponding voltage + index_statements.push_back(visitor::create_statement( + fmt::format("node_id = node_index[{}]", naming::INDUCTION_VAR))); + body_statements.push_back( + visitor::create_statement(fmt::format("v = {}[node_id]", VOLTAGE_VAR))); + + /// read ion variables + ion_read_statements( + BlockType::State, int_variables, double_variables, index_statements, body_statements); + + /// main compute node : extract solution expressions from the derivative block + const auto& solutions = collect_nodes(node, {ast::AstNodeType::SOLUTION_EXPRESSION}); + for (const auto& statement: solutions) { + const auto& solution = std::dynamic_pointer_cast(statement); + const auto& block = std::dynamic_pointer_cast( + solution->get_node_to_solve()); + append_statements_from_block(body_statements, block, info); + } + + /// add breakpoint block if no current + if (info.currents.empty() && info.breakpoint_node != nullptr) { + auto block = info.breakpoint_node->get_statement_block(); + append_statements_from_block(body_statements, block, info); + } + + /// write ion statements + ion_write_statements( + BlockType::State, int_variables, double_variables, index_statements, body_statements); + + // \todo handle process_shadow_update_statement and wrote_conc_call yet + } + + /// create target-specific compute body + ast::StatementVector compute_body; + compute_body.insert(compute_body.end(), index_statements.begin(), index_statements.end()); + compute_body.insert(compute_body.end(), body_statements.begin(), body_statements.end()); + + /// statements for new function to be generated + ast::StatementVector function_statements; + + std::vector induction_variables{naming::INDUCTION_VAR}; + function_statements.push_back( + create_local_variable_statement(induction_variables, INTEGER_TYPE)); + + if (platform.is_gpu()) { + create_gpu_compute_body(compute_body, function_statements, int_variables, double_variables); + } else { + create_cpu_compute_body(compute_body, function_statements, int_variables, double_variables); + } + + /// new block for the function + auto function_block = new ast::StatementBlock(function_statements); + + /// name of the function and it's return type + std::string function_name = "nrn_state_" + stringutils::tolower(info.mod_suffix); + auto name = new ast::Name(new ast::String(function_name)); + auto return_type = new ast::CodegenVarType(ast::AstNodeType::VOID); + + // argument to function: currently only instance structure + ast::CodegenVarWithTypeVector code_arguments; + auto instance_var_type = new ast::CodegenVarType(ast::AstNodeType::INSTANCE_STRUCT); + auto instance_var_name = new ast::Name(new ast::String(naming::MECH_INSTANCE_VAR)); + auto instance_var = new ast::CodegenVarWithType(instance_var_type, 1, instance_var_name); + code_arguments.emplace_back(instance_var); + + /// finally, create new function + auto function = std::make_shared( + return_type, name, code_arguments, function_block, /*is_kernel=*/1); + codegen_functions.push_back(function); + + // todo: remove this, temporary + std::cout << nmodl::to_nmodl(*function) << std::endl; +} + +void CodegenLLVMHelperVisitor::create_gpu_compute_body(ast::StatementVector& body, + ast::StatementVector& function_statements, + std::vector& int_variables, + std::vector& double_variables) { + auto kernel_block = std::make_shared(body); + + // dispatch loop creation with right parameters + create_compute_body_loop(kernel_block, function_statements, int_variables, double_variables); +} + +void CodegenLLVMHelperVisitor::create_cpu_compute_body(ast::StatementVector& body, + ast::StatementVector& function_statements, + std::vector& int_variables, + std::vector& double_variables) { + auto loop_block = std::make_shared(body); + create_compute_body_loop(loop_block, function_statements, int_variables, double_variables); + if (platform.is_cpu_with_simd()) + create_compute_body_loop(loop_block, + function_statements, + int_variables, + double_variables, + /*is_remainder_loop=*/true); +} + +void CodegenLLVMHelperVisitor::create_compute_body_loop(std::shared_ptr& block, + ast::StatementVector& function_statements, + std::vector& int_variables, + std::vector& double_variables, + bool is_remainder_loop) { + const auto& initialization = loop_initialization_expression(naming::INDUCTION_VAR, + is_remainder_loop); + const auto& condition = + loop_count_expression(naming::INDUCTION_VAR, NODECOUNT_VAR, is_remainder_loop); + const auto& increment = loop_increment_expression(naming::INDUCTION_VAR, is_remainder_loop); + + // Clone the statement block if needed since it can be used by the remainder loop. + auto loop_block = (is_remainder_loop || !platform.is_cpu_with_simd()) + ? block + : std::shared_ptr(block->clone()); + + // Convert local statement to use CodegenVar statements and create a FOR loop node. Also, if + // creating a remainder loop then rename variables to avoid conflicts. + if (is_remainder_loop) + rename_local_variables(*loop_block); + convert_local_statement(*loop_block); + auto for_loop = std::make_shared(initialization, + condition, + increment, + loop_block); + + // Convert all variables inside loop body to be instance variables. + convert_to_instance_variable(*for_loop, naming::INDUCTION_VAR); + + // Rename variables if processing remainder loop. + if (is_remainder_loop) { + const auto& loop_statements = for_loop->get_statement_block(); + auto rename = [&](std::vector& vars) { + for (int i = 0; i < vars.size(); ++i) { + std::string old_name = vars[i]; + std::string new_name = epilogue_variable_prefix + vars[i]; + vars[i] = new_name; + visitor::RenameVisitor v(old_name, new_name); + loop_statements->accept(v); + } + }; + rename(int_variables); + rename(double_variables); + } + + // Push variables and the loop to the function statements vector. + function_statements.push_back(create_local_variable_statement(int_variables, INTEGER_TYPE)); + function_statements.push_back(create_local_variable_statement(double_variables, fp_type)); + function_statements.push_back(for_loop); +} + +void CodegenLLVMHelperVisitor::remove_inlined_nodes(ast::Program& node) { + auto program_symtab = node.get_model_symbol_table(); + const auto& func_proc_nodes = + collect_nodes(node, {ast::AstNodeType::FUNCTION_BLOCK, ast::AstNodeType::PROCEDURE_BLOCK}); + std::unordered_set nodes_to_erase; + for (const auto& ast_node: func_proc_nodes) { + if (program_symtab->lookup(ast_node->get_node_name()) + .get() + ->has_all_status(Status::inlined)) { + nodes_to_erase.insert(static_cast(ast_node.get())); + } + } + node.erase_node(nodes_to_erase); +} + +/** + * Print `nrn_cur` kernel with`CONDUCTANCE` statements in the BREAKPOINT block + * @param node Ast node representing BREAKPOINT block + * @param int_variables Vector of integer variables in the kernel being generated + * @param double_variables Vector of double variables in the kernel being generated + * @param index_statements Statements for loading indexes (typically for ions, rhs, d) + * @param body_statements Vector of statements representing loop body of the `nrn_cur` kernel + */ +void CodegenLLVMHelperVisitor::print_nrn_cur_conductance_kernel( + const ast::BreakpointBlock& node, + std::vector& int_variables, + std::vector& double_variables, + ast::StatementVector& index_statements, + ast::StatementVector& body_statements) { + // TODO: this is not used by default but only with sympy --conductance option. This should be + // implemented later and hence just throw an error for now. + throw std::runtime_error( + "BREAKPOINT block with CONDUCTANCE statements is not supported in the LLVM backend yet"); +} + +/** + * Print `nrn_current` function that is typically generated as part of `nrn_cur()` + * @param node Ast node representing BREAKPOINT block + * @param body_statements Vector of statements representing loop body of the `nrn_cur` kernel + * @param variable Variable to which computed current will be assigned + */ +void CodegenLLVMHelperVisitor::print_nrn_current_body(const ast::BreakpointBlock& node, + ast::StatementVector& body_statements, + const std::string& variable) { + ast::StatementVector statements; + + // starts with current initialized to 0 + statements.emplace_back(visitor::create_statement("current = 0")); + + // append compatible code statements from the breakpoint block + append_statements_from_block(statements, node.get_statement_block(), info); + + // sum now all currents + for (auto& current: info.currents) { + statements.emplace_back( + visitor::create_statement(fmt::format("current = current + {}", current))); + } + + // assign computed current to the given variable + statements.emplace_back(visitor::create_statement(fmt::format("{} = current", variable))); + + // create StatementBlock for better readability of the generated code and add that to the main + // body statements + body_statements.emplace_back(new ast::ExpressionStatement(new ast::StatementBlock(statements))); +} + +/** + * Print `nrn_cur` kernel without `CONDUCTANCE` statements in the BREAKPOINT block + * @param node Ast node representing BREAKPOINT block + * @param int_variables Vector of integer variables in the kernel being generated + * @param double_variables Vector of double variables in the kernel being generated + * @param index_statements Statements for loading indexes (typically for ions, rhs, d) + * @param body_statements Vector of statements representing loop body of the `nrn_cur` kernel + */ +void CodegenLLVMHelperVisitor::print_nrn_cur_non_conductance_kernel( + const ast::BreakpointBlock& node, + std::vector& int_variables, + std::vector& double_variables, + ast::StatementVector& index_statements, + ast::StatementVector& body_statements) { + // add double variables needed in the local scope + double_variables.emplace_back("g"); + double_variables.emplace_back("rhs"); + double_variables.emplace_back("v_org"); + double_variables.emplace_back("current"); + + // store original voltage value as we are going to calculate current with v + 0.001 + body_statements.emplace_back(visitor::create_statement("v_org = v")); + + // first current calculation with v+0.001 and assign it to variable g + body_statements.emplace_back(visitor::create_statement("v = v + 0.001")); + print_nrn_current_body(node, body_statements, "g"); + + // now store all ionic currents to local variable + for (const auto& ion: info.ions) { + for (const auto& var: ion.writes) { + if (ion.is_ionic_current(var)) { + // also create local variable + std::string name{fmt::format("di{}", ion.name)}; + double_variables.emplace_back(name); + body_statements.emplace_back( + visitor::create_statement(fmt::format("{} = {}", name, var))); + } + } + } + + // now restore original v and calculate current and store it in rhs + body_statements.emplace_back(visitor::create_statement("v = v_org")); + print_nrn_current_body(node, body_statements, "rhs"); + + // calculate g + body_statements.emplace_back(visitor::create_statement("g = (g-rhs)/0.001")); + + // in case of point process we need to load area from another vector. + if (info.point_process) { + // create integer variable for index and then load value from area_index vector + int_variables.emplace_back(fmt::format("{}_id", naming::NODE_AREA_VARIABLE)); + index_statements.emplace_back(visitor::create_statement( + fmt::format(" {0}_id = {0}_index[id]", naming::NODE_AREA_VARIABLE))); + } + + // update all ionic currents now + for (const auto& ion: info.ions) { + for (const auto& var: ion.writes) { + if (ion.is_ionic_current(var)) { + // variable on the lhs + std::string lhs{fmt::format("{}di{}dv", naming::ION_VARNAME_PREFIX, ion.name)}; + + // expression on the rhs + std::string rhs{fmt::format("(di{}-{})/0.001", ion.name, var)}; + if (info.point_process) { + rhs += fmt::format("*1.e2/{0}[{0}_id]", naming::NODE_AREA_VARIABLE); + } + + // load the index for lhs variable + int_variables.emplace_back(lhs + "_id"); + std::string index_statement{fmt::format("{}_id = {}_index[id]", lhs, lhs)}; + index_statements.emplace_back(visitor::create_statement(index_statement)); + + // add statement that actually updates the + body_statements.emplace_back(visitor::create_statement( + fmt::format("{0}[{0}_id] = {0}[{0}_id] + {1}", lhs, rhs))); + } + } + } +} + +/** + * \brief Convert ast::BreakpointBlock to corresponding code generation function nrn_cur + * @param node AST node representing ast::BreakpointBlock + * + * The BREAKPOINT block from MOD file (ast::NrnStateBlock node in the AST) is converted + * to `nrn_cur` function in the generated CPP code via various transformations. Here we + * perform those transformations and create new codegen node in the AST. + */ +void CodegenLLVMHelperVisitor::visit_breakpoint_block(ast::BreakpointBlock& node) { + // no-op in case there are no currents or breakpoint block doesn't exist + if (!info.nrn_cur_required()) { + return; + } + + /// local variables in the function scope for integer and double variables + std::vector int_variables{"node_id"}; + std::vector double_variables{"v"}; + + /// statements to load indexes for gather/scatter like expressions + ast::StatementVector index_statements; + + /// statements for the rest of compute body + ast::StatementVector body_statements; + + /// prepare all function statements + { + /// access node index and corresponding voltage + index_statements.push_back(visitor::create_statement( + fmt::format("node_id = node_index[{}]", naming::INDUCTION_VAR))); + body_statements.push_back( + visitor::create_statement(fmt::format("v = {}[node_id]", VOLTAGE_VAR))); + + /// read ion variables + ion_read_statements(BlockType::Equation, + int_variables, + double_variables, + index_statements, + body_statements); + + /// print main current kernel based on conductance exist of not + if (info.conductances.empty()) { + print_nrn_cur_non_conductance_kernel( + node, int_variables, double_variables, index_statements, body_statements); + } else { + print_nrn_cur_conductance_kernel( + node, int_variables, double_variables, index_statements, body_statements); + } + + /// add write ion statements + ion_write_statements(BlockType::Equation, + int_variables, + double_variables, + index_statements, + body_statements); + + /// in case of point process, we have to scale values based on the area + if (info.point_process) { + double_variables.emplace_back("mfactor"); + body_statements.emplace_back(visitor::create_statement( + fmt::format("mfactor = 1.e2/{0}[{0}_id]", naming::NODE_AREA_VARIABLE))); + body_statements.emplace_back(visitor::create_statement("g = g*mfactor")); + body_statements.emplace_back(visitor::create_statement("rhs = rhs*mfactor")); + } + + /// as multiple point processes can exist at same node, with simd or gpu execution we have + /// to create atomic statements that will be handled by llvm ir generation + // \todo note that we are not creating rhs and d updates based on the shadow vectors. This + // is because llvm backend for cpu as well as gpu is going to take care for + // reductions. if these codegen functions will be used for C backend then we will need + // to implement separate reduction loop like mod2c or nmodl's c backend. + if (info.point_process && (platform.is_gpu() || platform.is_cpu_with_simd())) { + body_statements.emplace_back(create_atomic_statement( + naming::NTHREAD_RHS, "node_id", info.operator_for_rhs(), "rhs")); + body_statements.emplace_back(create_atomic_statement( + naming::NTHREAD_D, "node_id", info.operator_for_rhs(), "g")); + } else { + auto rhs_op(info.operator_for_rhs()); + auto d_op(info.operator_for_d()); + + // convert a += b to a = a + b, see BlueBrain/nmodl/issues/851 + // hence write update of rhs and de in the form of assignment statements + stringutils::remove_character(rhs_op, '='); + stringutils::remove_character(d_op, '='); + + body_statements.emplace_back(visitor::create_statement( + fmt::format("vec_rhs[node_id] = vec_rhs[node_id] {} rhs", rhs_op))); + body_statements.emplace_back(visitor::create_statement( + fmt::format("vec_d[node_id] = vec_d[node_id] {} g", d_op))); + } + } + + /// now create codegen function + { + /// compute body, index loading statements at the begining and then compute functions + ast::StatementVector compute_body; + compute_body.insert(compute_body.end(), index_statements.begin(), index_statements.end()); + compute_body.insert(compute_body.end(), body_statements.begin(), body_statements.end()); + + /// statements for new function to be generated + ast::StatementVector function_statements; + + std::vector induction_variables{naming::INDUCTION_VAR}; + function_statements.push_back( + create_local_variable_statement(induction_variables, INTEGER_TYPE)); + + if (platform.is_gpu()) { + create_gpu_compute_body(compute_body, + function_statements, + int_variables, + double_variables); + } else { + create_cpu_compute_body(compute_body, + function_statements, + int_variables, + double_variables); + } + + /// new block for the function + auto function_block = new ast::StatementBlock(function_statements); + + /// name of the function and it's return type + std::string function_name = "nrn_cur_" + stringutils::tolower(info.mod_suffix); + auto name = new ast::Name(new ast::String(function_name)); + auto return_type = new ast::CodegenVarType(ast::AstNodeType::VOID); + + /// only instance struct as an argument for now + ast::CodegenVarWithTypeVector code_arguments; + auto instance_var_type = new ast::CodegenVarType(ast::AstNodeType::INSTANCE_STRUCT); + auto instance_var_name = new ast::Name(new ast::String(naming::MECH_INSTANCE_VAR)); + auto instance_var = new ast::CodegenVarWithType(instance_var_type, 1, instance_var_name); + code_arguments.emplace_back(instance_var); + + /// finally, create new function + auto function = std::make_shared(return_type, + name, + code_arguments, + function_block, + /*is_kernel=*/1); + codegen_functions.push_back(function); + + // todo: remove this, temporary + std::cout << nmodl::to_nmodl(*function) << std::endl; + } +} + +void CodegenLLVMHelperVisitor::visit_program(ast::Program& node) { + /// run codegen helper visitor to collect information + CodegenHelperVisitor v; + info = v.analyze(node); + + instance_var_helper.instance = create_instance_struct(); + node.emplace_back_node(instance_var_helper.instance); + + logger->info("Running CodegenLLVMHelperVisitor"); + remove_inlined_nodes(node); + node.visit_children(*this); + for (auto& fun: codegen_functions) { + node.emplace_back_node(fun); + } +} + + +} // namespace codegen +} // namespace nmodl diff --git a/src/codegen/llvm/codegen_llvm_helper_visitor.hpp b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp new file mode 100644 index 0000000000..e41d60b0f6 --- /dev/null +++ b/src/codegen/llvm/codegen_llvm_helper_visitor.hpp @@ -0,0 +1,230 @@ +/************************************************************************* + * Copyright (C) 2018-2019 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +/** + * \file + * \brief \copybrief nmodl::codegen::CodegenLLVMHelperVisitor + */ + +#include + +#include "ast/instance_struct.hpp" +#include "codegen/codegen_info.hpp" +#include "codegen/llvm/target_platform.hpp" +#include "symtab/symbol_table.hpp" +#include "utils/logger.hpp" +#include "visitors/ast_visitor.hpp" + +namespace nmodl { +namespace codegen { + +typedef std::vector> CodegenFunctionVector; + +/** + * @addtogroup llvm_codegen_details + * @{ + */ + +/** + * \class InstanceVarHelper + * \brief Helper to query instance variables information + * + * For LLVM IR generation we need to know the variable, it's type and + * location in the instance structure. This helper provides convenient + * functions to query this information. + */ +struct InstanceVarHelper { + /// pointer to instance node in the AST + std::shared_ptr instance; + + /// find variable with given name and return the iterator + ast::CodegenVarWithTypeVector::const_iterator find_variable( + const ast::CodegenVarWithTypeVector& vars, + const std::string& name) { + return find_if(vars.begin(), + vars.end(), + [&](const std::shared_ptr& v) { + return v->get_node_name() == name; + }); + } + + /// check if given variable is instance variable + bool is_an_instance_variable(const std::string& name) { + const auto& vars = instance->get_codegen_vars(); + return find_variable(vars, name) != vars.end(); + } + + /// return codegen variable with a given name + const std::shared_ptr& get_variable(const std::string& name) { + const auto& vars = instance->get_codegen_vars(); + auto it = find_variable(vars, name); + if (it == vars.end()) { + throw std::runtime_error(fmt::format("Can not find variable with name {}", name)); + } + return *it; + } + + /// return position of the variable in the instance structure + int get_variable_index(const std::string& name) { + const auto& vars = instance->get_codegen_vars(); + auto it = find_variable(vars, name); + if (it == vars.end()) { + throw std::runtime_error( + fmt::format("Can not find codegen variable with name {}", name)); + } + return (it - vars.begin()); + } +}; + + +/** + * \class CodegenLLVMHelperVisitor + * \brief Helper visitor for AST information to help code generation backends + * + * Code generation backends convert NMODL AST to C++ code. But during this + * C++ code generation, various transformations happens and final code generated + * is quite different / large than actual kernel represented in MOD file ro + * NMODL AST. + * + * Currently, these transformations are embedded into code generation backends + * like ast::CodegenCVisitor. If we have to generate code for new simulator, there + * will be duplication of these transformations. Also, for completely new + * backends like NEURON simulator or SIMD library, we will have code duplication. + * + * In order to avoid this, we perform maximum transformations in this visitor. + * Currently we focus on transformations that will help LLVM backend but later + * these will be common across all backends. + */ +class CodegenLLVMHelperVisitor: public visitor::AstVisitor { + /// target platform + Platform platform; + + /// newly generated code generation specific functions + CodegenFunctionVector codegen_functions; + + /// ast information for code generation + codegen::CodegenInfo info; + + /// mechanism data helper + InstanceVarHelper instance_var_helper; + + /// create new function for FUNCTION or PROCEDURE block + void create_function_for_node(ast::Block& node); + + /// create new InstanceStruct + std::shared_ptr create_instance_struct(); + + private: + /// floating-point type + ast::AstNodeType fp_type; + + public: + /// default integer type + static const ast::AstNodeType INTEGER_TYPE; + + // node count, voltage and node index variables + static const std::string NODECOUNT_VAR; + static const std::string VOLTAGE_VAR; + static const std::string NODE_INDEX_VAR; + + CodegenLLVMHelperVisitor(Platform& platform) + : platform(platform) { + fp_type = platform.is_single_precision() ? ast::AstNodeType::FLOAT + : ast::AstNodeType::DOUBLE; + } + + const InstanceVarHelper& get_instance_var_helper() { + return instance_var_helper; + } + + std::string get_kernel_id() { + return naming::INDUCTION_VAR; + } + + /// run visitor and return code generation functions + CodegenFunctionVector get_codegen_functions(const ast::Program& node); + + void ion_read_statements(BlockType type, + std::vector& int_variables, + std::vector& double_variables, + ast::StatementVector& index_statements, + ast::StatementVector& body_statements); + + void ion_write_statements(BlockType type, + std::vector& int_variables, + std::vector& double_variables, + ast::StatementVector& index_statements, + ast::StatementVector& body_statements); + + void convert_to_instance_variable(ast::Node& node, const std::string& index_var); + + void convert_local_statement(ast::StatementBlock& node); + void rename_local_variables(ast::StatementBlock& node); + + /// Remove Function and Procedure blocks from the node since they are already inlined + void remove_inlined_nodes(ast::Program& node); + + void visit_procedure_block(ast::ProcedureBlock& node) override; + void visit_function_block(ast::FunctionBlock& node) override; + void visit_nrn_state_block(ast::NrnStateBlock& node) override; + + /** + * \brief Convert ast::BreakpointBlock to corresponding code generation function nrn_cur + * @param node AST node representing ast::BreakpointBlock + */ + void visit_breakpoint_block(ast::BreakpointBlock& node) override; + + void visit_program(ast::Program& node) override; + + private: + /// Methods to create target-specific loop constructs. + std::shared_ptr loop_initialization_expression( + const std::string& induction_var, + bool is_remainder_loop); + std::shared_ptr loop_count_expression(const std::string& induction_var, + const std::string& node_count, + bool is_remainder_loop); + std::shared_ptr loop_increment_expression(const std::string& induction_var, + bool is_remainder_loop); + + /// Methods to populate`function_statements` with necessary AST constructs to form + /// a kernel for a specific target. + void create_gpu_compute_body(ast::StatementVector& body, + ast::StatementVector& function_statements, + std::vector& int_variables, + std::vector& double_variables); + void create_cpu_compute_body(ast::StatementVector& body, + ast::StatementVector& function_statements, + std::vector& int_variables, + std::vector& double_variables); + void create_compute_body_loop(std::shared_ptr& block, + ast::StatementVector& function_statements, + std::vector& int_variables, + std::vector& double_variables, + bool is_remainder_loop = false); + + void print_nrn_current_body(const ast::BreakpointBlock& node, + ast::StatementVector& body_statements, + const std::string& variable); + void print_nrn_cur_non_conductance_kernel(const ast::BreakpointBlock& node, + std::vector& int_variables, + std::vector& double_variables, + ast::StatementVector& index_statements, + ast::StatementVector& body_statements); + void print_nrn_cur_conductance_kernel(const ast::BreakpointBlock& node, + std::vector& int_variables, + std::vector& double_variables, + ast::StatementVector& index_statements, + ast::StatementVector& body_statements); +}; + +/** @} */ // end of llvm_codegen_details + +} // namespace codegen +} // namespace nmodl diff --git a/src/codegen/llvm/codegen_llvm_visitor.cpp b/src/codegen/llvm/codegen_llvm_visitor.cpp new file mode 100644 index 0000000000..a114150491 --- /dev/null +++ b/src/codegen/llvm/codegen_llvm_visitor.cpp @@ -0,0 +1,1192 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include "codegen/llvm/codegen_llvm_visitor.hpp" +#include "codegen/llvm/annotation.hpp" +#include "codegen/llvm/llvm_utils.hpp" + +#include "ast/all.hpp" +#include "utils/logger.hpp" +#include "visitors/rename_visitor.hpp" +#include "visitors/visitor_utils.hpp" + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" + +namespace nmodl { +namespace codegen { + + +/****************************************************************************************/ +/* Helper routines */ +/****************************************************************************************/ + +/// A utility to check for supported Statement AST nodes. +static bool is_supported_statement(const ast::Statement& statement) { + return statement.is_codegen_atomic_statement() || statement.is_codegen_for_statement() || + statement.is_if_statement() || statement.is_codegen_return_statement() || + statement.is_codegen_var_list_statement() || statement.is_expression_statement() || + statement.is_while_statement(); +} + +/// A utility to check that the kernel body can be vectorised. +static bool can_vectorize(const ast::CodegenForStatement& statement, symtab::SymbolTable* sym_tab) { + // Check that function calls are made to external methods only. + const auto& function_calls = collect_nodes(statement, {ast::AstNodeType::FUNCTION_CALL}); + for (const auto& call: function_calls) { + const auto& name = call->get_node_name(); + auto symbol = sym_tab->lookup(name); + if (symbol && !symbol->has_any_property(symtab::syminfo::NmodlType::extern_method)) + return false; + } + + // Check for simple supported control flow in the kernel (single if/else statement). + const std::vector supported_control_flow = {ast::AstNodeType::IF_STATEMENT}; + const auto& supported = collect_nodes(statement, supported_control_flow); + + // Check for unsupported control flow statements. + const std::vector unsupported_nodes = {ast::AstNodeType::ELSE_IF_STATEMENT}; + const auto& unsupported = collect_nodes(statement, unsupported_nodes); + + return unsupported.empty() && supported.size() <= 1; +} + +llvm::Value* CodegenLLVMVisitor::accept_and_get(const std::shared_ptr& node) { + node->accept(*this); + return ir_builder.pop_last_value(); +} + +void CodegenLLVMVisitor::create_external_function_call(const std::string& name, + const ast::ExpressionVector& arguments) { + if (name == "printf") { + create_printf_call(arguments); + return; + } + + ValueVector argument_values; + TypeVector argument_types; + for (const auto& arg: arguments) { + llvm::Value* value = accept_and_get(arg); + llvm::Type* type = value->getType(); + argument_types.push_back(type); + argument_values.push_back(value); + } + ir_builder.create_intrinsic(name, argument_values, argument_types); +} + +void CodegenLLVMVisitor::create_function_call(llvm::Function* func, + const std::string& name, + const ast::ExpressionVector& arguments) { + // Check that function is called with the expected number of arguments. + if (!func->isVarArg() && arguments.size() != func->arg_size()) { + throw std::runtime_error("Error: Incorrect number of arguments passed"); + } + + // Pack function call arguments to vector and create a call instruction. + ValueVector argument_values; + argument_values.reserve(arguments.size()); + create_function_call_arguments(arguments, argument_values); + ir_builder.create_function_call(func, argument_values); +} + +void CodegenLLVMVisitor::create_function_call_arguments(const ast::ExpressionVector& arguments, + ValueVector& arg_values) { + for (const auto& arg: arguments) { + if (arg->is_string()) { + // If the argument is a string, create a global i8* variable with it. + auto string_arg = std::dynamic_pointer_cast(arg); + arg_values.push_back(ir_builder.create_global_string(*string_arg)); + } else { + llvm::Value* value = accept_and_get(arg); + arg_values.push_back(value); + } + } +} + +void CodegenLLVMVisitor::create_function_declaration(const ast::CodegenFunction& node) { + const auto& name = node.get_node_name(); + const auto& arguments = node.get_arguments(); + + TypeVector arg_types; + if (wrap_kernel_functions && node.get_is_kernel()) { + // We are wrapping NMODL compute kernels as a function taking void*. Thus, + // ignore struct pointer argument type and create a function signature with + // void* - actual conversion to struct pointer is done when generating + // the function body! + arg_types.push_back(ir_builder.get_i8_ptr_type()); + } else { + // Otherwise, process argument types as usual. + for (size_t i = 0; i < arguments.size(); ++i) + arg_types.push_back(get_codegen_var_type(*arguments[i]->get_type())); + } + llvm::Type* return_type = get_codegen_var_type(*node.get_return_type()); + + // Create a function that is automatically inserted into module's symbol table. + auto func = + llvm::Function::Create(llvm::FunctionType::get(return_type, arg_types, /*isVarArg=*/false), + llvm::Function::ExternalLinkage, + name, + *module); + + // Add function debug information, with location information if it exists. + if (add_debug_information) { + if (node.get_token()) { + Location loc{node.get_token()->start_line(), node.get_token()->start_column()}; + debug_builder.add_function_debug_info(func, &loc); + } else { + debug_builder.add_function_debug_info(func); + } + } +} + +void CodegenLLVMVisitor::create_printf_call(const ast::ExpressionVector& arguments) { + // First, create printf declaration or insert it if it does not exit. + std::string name = "printf"; + llvm::Function* printf = module->getFunction(name); + if (!printf) { + llvm::FunctionType* printf_type = llvm::FunctionType::get(ir_builder.get_i32_type(), + ir_builder.get_i8_ptr_type(), + /*isVarArg=*/true); + + printf = + llvm::Function::Create(printf_type, llvm::Function::ExternalLinkage, name, *module); + } + + // Create a call instruction. + ValueVector argument_values; + argument_values.reserve(arguments.size()); + create_function_call_arguments(arguments, argument_values); + ir_builder.create_function_call(printf, argument_values, /*use_result=*/false); +} + +void CodegenLLVMVisitor::create_vectorized_control_flow_block(const ast::IfStatement& node) { + // Get the true mask from the condition statement. + llvm::Value* true_mask = accept_and_get(node.get_condition()); + + // Process the true block. + ir_builder.set_mask(true_mask); + node.get_statement_block()->accept(*this); + + // Note: by default, we do not support kernels with complicated control flow. This is checked + // prior to visiting 'CodegenForStatement`. + const auto& elses = node.get_elses(); + if (elses) { + // If `else` statement exists, invert the mask and proceed with code generation. + ir_builder.invert_mask(); + elses->get_statement_block()->accept(*this); + } + + // Clear the mask value. + ir_builder.clear_mask(); +} + +void CodegenLLVMVisitor::find_kernel_names(std::vector& container) { + auto& functions = module->getFunctionList(); + for (auto& func: functions) { + const std::string name = func.getName().str(); + if (Annotator::has_nmodl_compute_kernel_annotation(func)) { + container.push_back(name); + } + } +} + +llvm::Type* CodegenLLVMVisitor::get_codegen_var_type(const ast::CodegenVarType& node) { + switch (node.get_type()) { + case ast::AstNodeType::BOOLEAN: + return ir_builder.get_boolean_type(); + case ast::AstNodeType::FLOAT: + case ast::AstNodeType::DOUBLE: + return ir_builder.get_fp_type(); + case ast::AstNodeType::INSTANCE_STRUCT: + return get_instance_struct_type(); + case ast::AstNodeType::INTEGER: + return ir_builder.get_i32_type(); + case ast::AstNodeType::VOID: + return ir_builder.get_void_type(); + default: + throw std::runtime_error("Error: expecting a type in CodegenVarType node\n"); + } +} + +llvm::Value* CodegenLLVMVisitor::get_index(const ast::IndexedName& node) { + // In NMODL, the index is either an integer expression or a named constant, such as "id". + llvm::Value* index_value = node.get_length()->is_name() + ? ir_builder.create_load(node.get_length()->get_node_name()) + : accept_and_get(node.get_length()); + return ir_builder.create_index(index_value); +} + +llvm::Type* CodegenLLVMVisitor::get_instance_struct_type() { + TypeVector member_types; + for (const auto& variable: instance_var_helper.instance->get_codegen_vars()) { + // Get the type information of the codegen variable. + const auto& is_pointer = variable->get_is_pointer(); + const auto& nmodl_type = variable->get_type()->get_type(); + + // Create the corresponding LLVM type. + switch (nmodl_type) { + case ast::AstNodeType::FLOAT: + case ast::AstNodeType::DOUBLE: + member_types.push_back(is_pointer ? ir_builder.get_fp_ptr_type() + : ir_builder.get_fp_type()); + break; + case ast::AstNodeType::INTEGER: + member_types.push_back(is_pointer ? ir_builder.get_i32_ptr_type() + : ir_builder.get_i32_type()); + break; + default: + throw std::runtime_error("Error: unsupported type found in instance struct\n"); + } + } + + return ir_builder.get_struct_ptr_type(instance_struct(), member_types); +} + +int CodegenLLVMVisitor::get_num_elements(const ast::IndexedName& node) { + // First, verify if the length is an integer value. + const auto& integer = std::dynamic_pointer_cast(node.get_length()); + if (!integer) + throw std::runtime_error("Error: only integer length is supported\n"); + + // Check if the length value is a constant. + if (!integer->get_macro()) + return integer->get_value(); + + // Otherwise, the length is taken from the macro. + const auto& macro = program_symtab->lookup(integer->get_macro()->get_node_name()); + return static_cast(*macro->get_value()); +} + +/** + * Currently, functions are identified as compute kernels if they satisfy the following: + * 1. They have a void return type + * 2. They have a single argument + * 3. The argument is a struct type pointer + * This is not robust, and hence it would be better to find what functions are kernels on the NMODL + * AST side (e.g. via a flag, or via names list). + * + * \todo identify kernels on NMODL AST side. + */ +bool CodegenLLVMVisitor::is_kernel_function(const std::string& function_name) { + llvm::Function* function = module->getFunction(function_name); + if (!function) + throw std::runtime_error("Error: function " + function_name + " does not exist\n"); + + // By convention, only kernel functions have a return type of void and single argument. The + // number of arguments check is needed to avoid LLVM void intrinsics to be considered as + // kernels. + if (!function->getReturnType()->isVoidTy() || !llvm::hasSingleElement(function->args())) + return false; + + // Kernel's argument is a pointer to the instance struct type. + llvm::Type* arg_type = function->getArg(0)->getType(); + if (auto pointer_type = llvm::dyn_cast(arg_type)) { + if (pointer_type->getElementType()->isStructTy()) + return true; + } + return false; +} + +llvm::Value* CodegenLLVMVisitor::read_from_or_write_to_instance(const ast::CodegenInstanceVar& node, + llvm::Value* maybe_value_to_store) { + const auto& instance_name = node.get_instance_var()->get_node_name(); + const auto& member_node = node.get_member_var(); + const auto& member_name = member_node->get_node_name(); + + if (!instance_var_helper.is_an_instance_variable(member_name)) + throw std::runtime_error("Error: " + member_name + + " is not a member of the instance variable\n"); + + // Load the instance struct by its name. + llvm::Value* instance_ptr = ir_builder.create_load(instance_name); + + // Get the pointer to the specified member. + int member_index = instance_var_helper.get_variable_index(member_name); + llvm::Value* member_ptr = ir_builder.get_struct_member_ptr(instance_ptr, member_index); + + // Check if the member is scalar. Load the value or store to it straight away. Otherwise, we + // need some extra handling. + auto codegen_var_with_type = instance_var_helper.get_variable(member_name); + if (!codegen_var_with_type->get_is_pointer()) { + if (maybe_value_to_store) { + ir_builder.create_store(member_ptr, maybe_value_to_store); + return nullptr; + } else { + return ir_builder.create_load(member_ptr); + } + } + + // Check that the member is an indexed name indeed, and that it is indexed by a named constant + // (e.g. "id"). + const auto& member_var_name = std::dynamic_pointer_cast(member_node); + if (!member_var_name->get_name()->is_indexed_name()) + throw std::runtime_error("Error: " + member_name + " is not an IndexedName\n"); + + const auto& member_indexed_name = std::dynamic_pointer_cast( + member_var_name->get_name()); + if (!member_indexed_name->get_length()->is_name()) + throw std::runtime_error("Error: " + member_name + " must be indexed with a variable!"); + + // Get the index to the member and the id used to index it. + llvm::Value* i64_index = get_index(*member_indexed_name); + const std::string id = member_indexed_name->get_length()->get_node_name(); + + // Load the member of the instance struct. + llvm::Value* instance_member = ir_builder.create_load(member_ptr); + + // Create a pointer to the specified element of the struct member. + return ir_builder.load_to_or_store_from_array(id, + i64_index, + instance_member, + maybe_value_to_store); +} + +llvm::Value* CodegenLLVMVisitor::read_variable(const ast::VarName& node) { + const auto& identifier = node.get_name(); + + if (identifier->is_name()) { + return ir_builder.create_load(node.get_node_name(), + /*masked=*/ir_builder.generates_predicated_ir()); + } + + if (identifier->is_indexed_name()) { + const auto& indexed_name = std::dynamic_pointer_cast(identifier); + llvm::Value* index = get_index(*indexed_name); + return ir_builder.create_load_from_array(node.get_node_name(), index); + } + + if (identifier->is_codegen_instance_var()) { + const auto& instance_var = std::dynamic_pointer_cast(identifier); + return read_from_or_write_to_instance(*instance_var); + } + + throw std::runtime_error("Error: the type of '" + node.get_node_name() + + "' is not supported\n"); +} + +void CodegenLLVMVisitor::write_to_variable(const ast::VarName& node, llvm::Value* value) { + const auto& identifier = node.get_name(); + if (!identifier->is_name() && !identifier->is_indexed_name() && + !identifier->is_codegen_instance_var()) { + throw std::runtime_error("Error: the type of '" + node.get_node_name() + + "' is not supported\n"); + } + + if (identifier->is_name()) { + ir_builder.create_store(node.get_node_name(), value); + } + + if (identifier->is_indexed_name()) { + const auto& indexed_name = std::dynamic_pointer_cast(identifier); + llvm::Value* index = get_index(*indexed_name); + ir_builder.create_store_to_array(node.get_node_name(), index, value); + } + + if (identifier->is_codegen_instance_var()) { + const auto& instance_var = std::dynamic_pointer_cast(identifier); + read_from_or_write_to_instance(*instance_var, value); + } +} + +/****************************************************************************************/ +/* Overloaded visitor routines */ +/****************************************************************************************/ + + +void CodegenLLVMVisitor::visit_binary_expression(const ast::BinaryExpression& node) { + const auto& op = node.get_op().get_value(); + + // Process rhs first, since lhs is handled differently for assignment and binary + // operators. + llvm::Value* rhs = accept_and_get(node.get_rhs()); + if (op == ast::BinaryOp::BOP_ASSIGN) { + auto var = dynamic_cast(node.get_lhs().get()); + if (!var) + throw std::runtime_error("Error: only 'VarName' assignment is supported\n"); + + write_to_variable(*var, rhs); + return; + } + + llvm::Value* lhs = accept_and_get(node.get_lhs()); + ir_builder.create_binary_op(lhs, rhs, op); +} + +void CodegenLLVMVisitor::visit_statement_block(const ast::StatementBlock& node) { + const auto& statements = node.get_statements(); + for (const auto& statement: statements) { + if (is_supported_statement(*statement)) + statement->accept(*this); + } +} + +void CodegenLLVMVisitor::visit_boolean(const ast::Boolean& node) { + ir_builder.create_boolean_constant(node.get_value()); +} + +void CodegenLLVMVisitor::visit_codegen_atomic_statement(const ast::CodegenAtomicStatement& node) { + // Get the variable node that need an atomic update. + const auto& var = std::dynamic_pointer_cast(node.get_lhs()); + if (!var) + throw std::runtime_error("Error: only 'VarName' update is supported\n"); + + // Evaluate RHS of the update. + llvm::Value* rhs = accept_and_get(node.get_rhs()); + + // First, check if it is an atomic write only and we can return early. + // Otherwise, extract what kind of atomic update we want to make. + ast::BinaryOp atomic_op = node.get_atomic_op().get_value(); + if (atomic_op == ast::BinaryOp::BOP_ASSIGN) { + write_to_variable(*var, rhs); + return; + } + ast::BinaryOp op = ir_builder.extract_atomic_op(atomic_op); + + // For different platforms, we handle atomic updates differently! + + // For non-SIMD CPUs (or any scalar code on SIMD CPUs), updates don't have to be atomic at all! + const bool non_SIMD_cpu = platform.is_cpu() && !platform.is_cpu_with_simd(); + if (non_SIMD_cpu || (platform.is_cpu_with_simd() && !ir_builder.vectorizing())) { + llvm::Value* lhs = accept_and_get(node.get_lhs()); + ir_builder.create_binary_op(lhs, rhs, op); + llvm::Value* result = ir_builder.pop_last_value(); + + write_to_variable(*var, result); + return; + } + + // Otherwise, we either have a GPU or a SIMD CPU. Double-check to be sure. + if (!platform.is_gpu() && !platform.is_cpu_with_simd()) + throw std::runtime_error("Error: unknown platform - " + platform.get_name() + "\n"); + + const auto& identifier = var->get_name(); + if (!identifier->is_codegen_instance_var()) + throw std::runtime_error("Error: atomic updates for non-instance variable\n"); + + const auto& codegen_intance_node = std::dynamic_pointer_cast( + identifier); + const auto& instance_name = codegen_intance_node->get_instance_var()->get_node_name(); + const auto& member_node = codegen_intance_node->get_member_var(); + const auto& member_name = member_node->get_node_name(); + + // Sanity checks. Not that there is a bit of duplication with `read_from_or_write_to_instance` + // but this is not crucial for now. + // TODO: remove this duplication! + if (!instance_var_helper.is_an_instance_variable(member_name)) + throw std::runtime_error("Error: " + member_name + + " is not a member of the instance variable\n"); + auto codegen_var_with_type = instance_var_helper.get_variable(member_name); + if (!codegen_var_with_type->get_is_pointer()) + throw std::runtime_error("Error: atomic updates are allowed on pointer variables only\n"); + const auto& member_var_name = std::dynamic_pointer_cast(member_node); + if (!member_var_name->get_name()->is_indexed_name()) + throw std::runtime_error("Error: " + member_name + " is not an IndexedName\n"); + const auto& member_indexed_name = std::dynamic_pointer_cast( + member_var_name->get_name()); + if (!member_indexed_name->get_length()->is_name()) + throw std::runtime_error("Error: " + member_name + " must be indexed with a variable!"); + + // First, load the pointer variable from instance struct and process its index. + llvm::Value* instance_ptr = ir_builder.create_load(instance_name); + const int member_index = instance_var_helper.get_variable_index(member_name); + llvm::Value* member_ptr = ir_builder.get_struct_member_ptr(instance_ptr, member_index); + llvm::Value* instance_member = ir_builder.create_load(member_ptr); + llvm::Value* i64_index = get_index(*member_indexed_name); + + // For GPUs, we just need to create atomic add/subtract. + if (platform.is_gpu()) { + llvm::Value* ptr = ir_builder.create_inbounds_gep(instance_member, i64_index); + ir_builder.create_atomic_op(ptr, rhs, op); + } else { + // SIMD case is more elaborate. We will create a scalar block that will perform necessary + // update. The overall structure will be + // +---------------------------+ + // | | + // | | + // | br %atomic | + // +---------------------------+ + // | + // V + // +-----------------------------+ + // | | + // | %cmp = ... |<------+ + // | cond_br %cmp, %atomic, %rem | | + // +-----------------------------+ | + // | | | + // | +---------------+ + // V + // +---------------------------+ + // | | + // | | + // +---------------------------+ + + // Step 1: Create a vector of (replicated) starting addresses of the given member. + llvm::Value* start = ir_builder.create_member_addresses(instance_member); + + // Step 2: Create a vector alloca that will store addresses of member values. Then also + // create an array of these addresses (as pointers). While this can be moved to `IRBuilder`, + // the amount of code is rather negligible and thus can be left here. + const int vector_width = platform.get_instruction_width(); + llvm::Type* vi64_type = llvm::FixedVectorType::get(ir_builder.get_i64_type(), vector_width); + llvm::Type* array_type = llvm::ArrayType::get(ir_builder.get_fp_ptr_type(), vector_width); + + llvm::Value* ptrs_vec = ir_builder.create_alloca(/*name=*/"ptrs", vi64_type); + llvm::Value* ptrs_arr = + ir_builder.create_bitcast(ptrs_vec, + llvm::PointerType::get(array_type, /*AddressSpace=*/0)); + + // Step 3: Calculate offsets of the values in the member by: + // offset = start + (index * sizeof(fp_type)) + // Store this vector to a temporary for later reuse. + llvm::Value* offsets = ir_builder.create_member_offsets(start, i64_index); + ir_builder.create_store(ptrs_vec, offsets); + + // Step 4: Create a new block that will be used for atomic code generation. + llvm::BasicBlock* body_bb = ir_builder.get_current_block(); + llvm::BasicBlock* cond_bb = body_bb->getNextNode(); + llvm::Function* func = body_bb->getParent(); + llvm::BasicBlock* atomic_bb = + llvm::BasicBlock::Create(*context, /*Name=*/"atomic.update", func, cond_bb); + llvm::BasicBlock* remaining_body_bb = + llvm::BasicBlock::Create(*context, /*Name=*/"for.body.remaining", func, cond_bb); + ir_builder.create_br_and_set_insertion_point(atomic_bb); + + // Step 5: Generate code for the atomic update: go through each element in the vector + // performing the computation. + llvm::Value* cmp = ir_builder.create_atomic_loop(ptrs_arr, rhs, op); + + // Create branch to close the loop and restore the insertion point. + ir_builder.create_cond_br(cmp, remaining_body_bb, atomic_bb); + ir_builder.set_insertion_point(remaining_body_bb); + } +} + +// Generating FOR loop in LLVM IR creates the following structure: +// +// +---------------------------+ +// | | +// | | +// | br %cond | +// +---------------------------+ +// | +// V +// +-----------------------------+ +// | | +// | %cond = ... |<------+ +// | cond_br %cond, %body, %exit | | +// +-----------------------------+ | +// | | | +// | V | +// | +------------------------+ | +// | | | | +// | | br %inc | | +// | +------------------------+ | +// | | | +// | V | +// | +------------------------+ | +// | | | | +// | | br %cond | | +// | +------------------------+ | +// | | | +// | +---------------+ +// V +// +---------------------------+ +// | | +// +---------------------------+ +void CodegenLLVMVisitor::visit_codegen_for_statement(const ast::CodegenForStatement& node) { + // Condition and increment blocks must be scalar. + ir_builder.generate_scalar_ir(); + + // Get the current and the next blocks within the function. + llvm::BasicBlock* curr_block = ir_builder.get_current_block(); + llvm::BasicBlock* next = curr_block->getNextNode(); + llvm::Function* func = curr_block->getParent(); + + // Create the basic blocks for FOR loop. + llvm::BasicBlock* for_cond = + llvm::BasicBlock::Create(*context, /*Name=*/"for.cond", func, next); + llvm::BasicBlock* for_body = + llvm::BasicBlock::Create(*context, /*Name=*/"for.body", func, next); + llvm::BasicBlock* for_inc = llvm::BasicBlock::Create(*context, /*Name=*/"for.inc", func, next); + llvm::BasicBlock* exit = llvm::BasicBlock::Create(*context, /*Name=*/"for.exit", func, next); + + // First, initialize the loop in the same basic block. If processing the remainder of the loop, + // no initialization happens. + const auto& main_loop_initialization = node.get_initialization(); + if (main_loop_initialization) + main_loop_initialization->accept(*this); + + // Branch to condition basic block and insert condition code there. + ir_builder.create_br_and_set_insertion_point(for_cond); + + // Extract the condition to decide whether to branch to the loop body or loop exit. + llvm::Value* cond = accept_and_get(node.get_condition()); + llvm::BranchInst* loop_br = ir_builder.create_cond_br(cond, for_body, exit); + ir_builder.set_insertion_point(for_body); + + // If not processing remainder of the loop, start vectorization. + if (platform.is_cpu_with_simd() && main_loop_initialization) + ir_builder.generate_vector_ir(); + + // Generate code for the loop body and create the basic block for the increment. + const auto& statement_block = node.get_statement_block(); + statement_block->accept(*this); + ir_builder.generate_scalar_ir(); + ir_builder.create_br_and_set_insertion_point(for_inc); + + // Process the increment. + node.get_increment()->accept(*this); + + // Create a branch to condition block, then generate exit code out of the loop. + ir_builder.create_br(for_cond); + ir_builder.set_insertion_point(exit); +} + + +void CodegenLLVMVisitor::visit_codegen_function(const ast::CodegenFunction& node) { + const auto& name = node.get_node_name(); + const auto& arguments = node.get_arguments(); + + // Create the entry basic block of the function/procedure and point the local named values table + // to the symbol table. + llvm::Function* func = module->getFunction(name); + ir_builder.create_block_and_set_insertion_point(func); + ir_builder.set_function(func); + + // When processing a function, it returns a value named in NMODL. Therefore, we + // first run RenameVisitor to rename it into ret_. This will aid in avoiding + // symbolic conflicts. + std::string return_var_name = "ret_" + name; + const auto& block = node.get_statement_block(); + visitor::RenameVisitor v(name, return_var_name); + block->accept(v); + + // Allocate parameters on the stack and add them to the symbol table. + if (wrap_kernel_functions && node.get_is_kernel()) { + // If we wrap NMODL compute kernel, the parameter will be void*! Hence, + // we get the actual struct pointer type and allocate parameters on the + // stack with additional bitcast. + llvm::Type* struct_ty = get_codegen_var_type(*arguments[0]->get_type()); + ir_builder.allocate_and_wrap_kernel_arguments(func, arguments, struct_ty); + } else { + // Otherwise, nothing specific needed. + ir_builder.allocate_function_arguments(func, arguments); + } + + // Process function or procedure body. If the function is a compute kernel, enable + // vectorization. If this is the case, the return statement is handled in a + // separate visitor. + if (node.get_is_kernel()) { + if (platform.is_cpu_with_simd()) { + ir_builder.generate_vector_ir(); + block->accept(*this); + ir_builder.generate_scalar_ir(); + } else if (platform.is_gpu()) { + block->accept(*this); + } else { // scalar + block->accept(*this); + } + } else { + block->accept(*this); + } + + // If function is a compute kernel, add a void terminator explicitly, since there is no + // `CodegenReturnVar` node. Also, set the necessary attributes. + if (node.get_is_kernel()) { + custom::Annotator::add_nmodl_compute_kernel_annotation(*func); + ir_builder.create_return(); + } + + // Clear local values stack and remove the pointer to the local symbol table. + ir_builder.clear_function(); +} + +void CodegenLLVMVisitor::visit_codegen_grid_stride(const ast::CodegenGridStride& node) { + ir_builder.create_grid_stride(); +} + +void CodegenLLVMVisitor::visit_codegen_return_statement(const ast::CodegenReturnStatement& node) { + if (!node.get_statement()->is_name()) + throw std::runtime_error("Error: CodegenReturnStatement must contain a name node\n"); + + std::string ret = "ret_" + ir_builder.get_current_function_name(); + llvm::Value* ret_value = ir_builder.create_load(ret); + ir_builder.create_return(ret_value); +} + +void CodegenLLVMVisitor::visit_codegen_thread_id(const ast::CodegenThreadId& node) { + ir_builder.create_thread_id(); +} + +void CodegenLLVMVisitor::visit_codegen_var_list_statement( + const ast::CodegenVarListStatement& node) { + llvm::Type* scalar_type = get_codegen_var_type(*node.get_var_type()); + for (const auto& variable: node.get_variables()) { + const auto& identifier = variable->get_name(); + std::string name = variable->get_node_name(); + + // Local variable can be a scalar (Node AST class) or an array (IndexedName AST class). For + // each case, create memory allocations. + if (identifier->is_indexed_name()) { + const auto& indexed_name = std::dynamic_pointer_cast(identifier); + int length = get_num_elements(*indexed_name); + ir_builder.create_array_alloca(name, scalar_type, length); + } else if (identifier->is_name()) { + ir_builder.create_scalar_or_vector_alloca(name, scalar_type); + } else { + throw std::runtime_error("Error: unsupported local variable type\n"); + } + } +} + +void CodegenLLVMVisitor::visit_double(const ast::Double& node) { + ir_builder.create_fp_constant(node.get_value()); +} + +void CodegenLLVMVisitor::visit_float(const ast::Float& node) { + ir_builder.create_fp_constant(node.get_value()); +} + +void CodegenLLVMVisitor::visit_function_block(const ast::FunctionBlock& node) { + // do nothing. \todo: remove old function blocks from ast. +} + +void CodegenLLVMVisitor::visit_function_call(const ast::FunctionCall& node) { + const auto& name = node.get_node_name(); + llvm::Function* func = module->getFunction(name); + if (func) { + create_function_call(func, name, node.get_arguments()); + } else { + auto symbol = program_symtab->lookup(name); + if (symbol && symbol->has_any_property(symtab::syminfo::NmodlType::extern_method)) { + create_external_function_call(name, node.get_arguments()); + } else { + throw std::runtime_error("Error: unknown function name: " + name + "\n"); + } + } +} + +void CodegenLLVMVisitor::visit_if_statement(const ast::IfStatement& node) { + // If vectorizing the compute kernel with control flow, process it separately. + if (platform.is_cpu_with_simd() && ir_builder.vectorizing()) { + create_vectorized_control_flow_block(node); + return; + } + + // Get the current and the next blocks within the function. + llvm::BasicBlock* curr_block = ir_builder.get_current_block(); + llvm::BasicBlock* next = curr_block->getNextNode(); + llvm::Function* func = curr_block->getParent(); + + // Add a true block and a merge block where the control flow merges. + llvm::BasicBlock* true_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next); + llvm::BasicBlock* merge_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next); + + // Add condition to the current block. + llvm::Value* cond = accept_and_get(node.get_condition()); + + // Process the true block. + ir_builder.set_insertion_point(true_block); + node.get_statement_block()->accept(*this); + ir_builder.create_br(merge_block); + + // Save the merge block and proceed with codegen for `else if` statements. + llvm::BasicBlock* exit = merge_block; + for (const auto& else_if: node.get_elseifs()) { + // Link the current block to the true and else blocks. + llvm::BasicBlock* else_block = + llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block); + ir_builder.set_insertion_point(curr_block); + ir_builder.create_cond_br(cond, true_block, else_block); + + // Process else block. + ir_builder.set_insertion_point(else_block); + cond = accept_and_get(else_if->get_condition()); + + // Reassign true and merge blocks respectively. Note that the new merge block has to be + // connected to the old merge block (tmp). + true_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block); + llvm::BasicBlock* tmp = merge_block; + merge_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block); + ir_builder.set_insertion_point(merge_block); + ir_builder.create_br(tmp); + + // Process true block. + ir_builder.set_insertion_point(true_block); + else_if->get_statement_block()->accept(*this); + ir_builder.create_br(merge_block); + curr_block = else_block; + } + + // Finally, generate code for `else` statement if it exists. + const auto& elses = node.get_elses(); + llvm::BasicBlock* else_block; + if (elses) { + else_block = llvm::BasicBlock::Create(*context, /*Name=*/"", func, merge_block); + ir_builder.set_insertion_point(else_block); + elses->get_statement_block()->accept(*this); + ir_builder.create_br(merge_block); + } else { + else_block = merge_block; + } + ir_builder.set_insertion_point(curr_block); + ir_builder.create_cond_br(cond, true_block, else_block); + ir_builder.set_insertion_point(exit); +} + +void CodegenLLVMVisitor::visit_integer(const ast::Integer& node) { + ir_builder.create_i32_constant(node.get_value()); +} + +void CodegenLLVMVisitor::visit_program(const ast::Program& node) { + // Before generating LLVM: + // - convert function and procedure blocks into CodegenFunctions + // - gather information about AST. For now, information about functions + // and procedures is used only. + CodegenLLVMHelperVisitor v{platform}; + const auto& functions = v.get_codegen_functions(node); + instance_var_helper = v.get_instance_var_helper(); + program_symtab = node.get_symbol_table(); + std::string kernel_id = v.get_kernel_id(); + + // Initialize the builder for this NMODL program. + ir_builder.initialize(*program_symtab, kernel_id); + + // Create compile unit if adding debug information to the module. + if (add_debug_information) { + debug_builder.create_compile_unit(*module, module->getModuleIdentifier(), output_dir); + } + + // For every function, generate its declaration. Thus, we can look up + // `llvm::Function` in the symbol table in the module. + for (const auto& func: functions) { + create_function_declaration(*func); + } + + // Set the AST symbol table. + program_symtab = node.get_symbol_table(); + + // Proceed with code generation. Right now, we do not do + // node.visit_children(*this); + // The reason is that the node may contain AST nodes for which the visitor functions have been + // defined. In our implementation we assume that the code generation is happening within the + // function scope. To avoid generating code outside of functions, visit only them for now. + // \todo: Handle what is mentioned here. + for (const auto& func: functions) { + visit_codegen_function(*func); + } + + // Finalize the debug information. + if (add_debug_information) { + debug_builder.finalize(); + } + + // Verify the generated LLVM IR module. + std::string error; + llvm::raw_string_ostream ostream(error); + if (verifyModule(*module, &ostream)) { + throw std::runtime_error("Error: incorrect IR has been generated!\n" + ostream.str()); + } + + // Handle optimization passes for GPUs separately. + if (platform.is_cpu() && opt_level_ir) { + logger->info("Running LLVM optimisation passes for CPU platforms"); + utils::initialise_optimisation_passes(); + utils::optimise_module(*module, opt_level_ir); + } + + // Pass 1: replace LLVM math intrinsics with library calls. + utils::replace_with_lib_functions(platform, *module); + + // Pass 2: annotate NMODL compute kernels. + utils::annotate(platform, *module); + + // Handle GPU optimizations (CUDA platfroms only for now). + if (platform.is_gpu()) { + // We only support CUDA backends anyway, so this works for now. + utils::initialise_nvptx_passes(); + + std::string target_asm; + utils::optimise_module_for_nvptx(platform, *module, opt_level_ir, target_asm); + + logger->debug("Dumping generated IR...\n" + dump_module()); + logger->debug("Dumping generated PTX...\n" + target_asm); + } else { + // Workaround for debug outputs. + logger->debug("Dumping generated IR...\n" + dump_module()); + } + + // Setup CodegenHelper for C++ wrapper file + setup(node); + // Print C++ wrapper file + print_wrapper_routines(); + // Print LLVM IR module to .ll file + utils::save_ir_to_ll_file(*module, output_dir + "/" + mod_filename); +} + +void CodegenLLVMVisitor::print_mechanism_range_var_structure(bool) { + printer->add_newline(2); + printer->add_line("/** Instance Struct passed as argument to LLVM IR kernels */"); + printer->start_block(fmt::format("struct {} ", instance_struct())); + for (const auto& variable: instance_var_helper.instance->get_codegen_vars()) { + auto is_pointer = variable->get_is_pointer(); + auto name = to_nmodl(variable->get_name()); + auto qualifier = is_constant_variable(name) ? "const " : ""; + auto nmodl_type = variable->get_type()->get_type(); + auto pointer = is_pointer ? "*" : ""; + auto var_name = variable->get_node_name(); + switch (nmodl_type) { +#define DISPATCH(type, c_type) \ + case type: \ + printer->add_line(fmt::format("{}{}{} {}{};", \ + qualifier, \ + c_type, \ + pointer, \ + is_pointer ? ptr_type_qualifier() : "", \ + var_name)); \ + break; + + DISPATCH(ast::AstNodeType::FLOAT, "float"); + DISPATCH(ast::AstNodeType::DOUBLE, "double"); + DISPATCH(ast::AstNodeType::INTEGER, "int"); + +#undef DISPATCH + default: + throw std::runtime_error("Error: unsupported type found in instance struct"); + } + } + printer->end_block(); + printer->add_text(";"); + printer->add_newline(); +} + +void CodegenLLVMVisitor::print_instance_variable_setup() { + if (range_variable_setup_required()) { + print_setup_range_variable(); + } + + printer->add_newline(2); + printer->add_line("/** initialize mechanism instance variables */"); + printer->start_block("static inline void setup_instance(NrnThread* nt, Memb_list* ml) "); + printer->add_line( + fmt::format("{0}* inst = ({0}*) mem_alloc(1, sizeof({0}));", instance_struct())); + + std::string stride; + printer->add_line("int pnodecount = ml->_nodecount_padded;"); + stride = "*pnodecount"; + + printer->add_line("Datum* indexes = ml->pdata;"); + + std::string float_type = default_float_data_type(); + std::string int_type = default_int_data_type(); + std::string float_type_pointer = float_type + "*"; + std::string int_type_pointer = int_type + "*"; + + int id = 0; + std::vector variables_to_free; + + for (auto& var: info.codegen_float_variables) { + auto name = var->get_name(); + auto range_var_type = get_range_var_float_type(var); + if (float_type == range_var_type) { + auto variable = fmt::format("ml->data+{}{}", id, stride); + printer->add_line(fmt::format("inst->{} = {};", name, variable)); + } else { + printer->add_line(fmt::format( + "inst->{} = setup_range_variable(ml->data+{}{}, pnodecount);", name, id, stride)); + variables_to_free.push_back(name); + } + id += var->get_length(); + } + + for (auto& var: info.codegen_int_variables) { + auto name = var.symbol->get_name(); + std::string variable = name; + std::string type = ""; + if (var.is_index || var.is_integer) { + variable = "ml->pdata"; + type = int_type_pointer; + } else if (var.is_vdata) { + variable = "nt->_vdata"; + type = "void**"; + } else { + variable = "nt->_data"; + type = info.artificial_cell ? "void*" : float_type_pointer; + } + printer->add_line(fmt::format("inst->{} = {};", name, variable)); + } + + int index_id = 0; + // for integer variables, there should be index + for (const auto& int_var: info.codegen_int_variables) { + std::string var_name = int_var.symbol->get_name() + "_index"; + // Create for loop that instantiates the ion__index with + // indexes[*pdnodecount] + printer->add_line(fmt::format("inst->{} = indexes+{}*pnodecount;", var_name, index_id)); + index_id++; + } + + // Pass voltage pointer to the the instance struct + printer->add_line("inst->voltage = nt->_actual_v;"); + + // Pass ml->nodeindices pointer to node_index + printer->add_line("inst->node_index = ml->nodeindices;"); + + // Setup rhs, d and their shadow vectors + printer->add_line(fmt::format("inst->{} = nt->_actual_rhs;", naming::NTHREAD_RHS)); + printer->add_line(fmt::format("inst->{} = nt->_actual_d;", naming::NTHREAD_D)); + printer->add_line(fmt::format("inst->{} = nt->_shadow_rhs;", naming::NTHREAD_RHS_SHADOW)); + printer->add_line(fmt::format("inst->{} = nt->_shadow_d;", naming::NTHREAD_D_SHADOW)); + + // Setup global variables + printer->add_line(fmt::format("inst->{0} = nt->{0};", naming::NTHREAD_T_VARIABLE)); + printer->add_line(fmt::format("inst->{0} = nt->{0};", naming::NTHREAD_DT_VARIABLE)); + printer->add_line(fmt::format("inst->{0} = {0};", naming::CELSIUS_VARIABLE)); + printer->add_line(fmt::format("inst->{0} = {0};", naming::SECOND_ORDER_VARIABLE)); + printer->add_line(fmt::format("inst->{} = ml->nodecount;", naming::MECH_NODECOUNT_VAR)); + + printer->add_line("ml->instance = inst;"); + printer->end_block(3); + + printer->add_line("/** cleanup mechanism instance variables */"); + printer->start_block("static inline void cleanup_instance(Memb_list* ml) "); + printer->add_line(fmt::format("{0}* inst = ({0}*) ml->instance;", instance_struct())); + if (range_variable_setup_required()) { + for (auto& var: variables_to_free) { + printer->add_line(fmt::format("mem_free((void*)inst->{});", var)); + } + } + printer->add_line("mem_free((void*)inst);"); + printer->end_block(1); +} + +CodegenLLVMVisitor::ParamVector CodegenLLVMVisitor::get_compute_function_parameter() { + auto params = ParamVector(); + params.emplace_back(param_type_qualifier(), + fmt::format("{}*", instance_struct()), + ptr_type_qualifier(), + "inst"); + return params; +} + +void CodegenLLVMVisitor::print_backend_compute_routine_decl() { + auto params = get_compute_function_parameter(); + auto compute_function = compute_method_name(BlockType::Initial); + + printer->add_newline(2); + printer->add_line( + fmt::format("extern void {}({});", compute_function, get_parameter_str(params))); + + if (info.nrn_cur_required()) { + compute_function = compute_method_name(BlockType::Equation); + printer->add_line( + fmt::format("extern void {}({});", compute_function, get_parameter_str(params))); + } + + if (info.nrn_state_required()) { + compute_function = compute_method_name(BlockType::State); + printer->add_line( + fmt::format("extern void {}({});", compute_function, get_parameter_str(params))); + } +} + +// Copied from CodegenIspcVisitor +void CodegenLLVMVisitor::print_wrapper_routine(const std::string& wrapper_function, + BlockType type) { + static const auto args = "NrnThread* nt, Memb_list* ml, int type"; + const auto function_name = method_name(wrapper_function); + auto compute_function = compute_method_name(type); + + printer->add_newline(2); + printer->start_block(fmt::format("void {}({})", function_name, args)); + printer->add_line("int nodecount = ml->nodecount;"); + // clang-format off + printer->add_line(fmt::format("{0}* {1}inst = ({0}*) ml->instance;", instance_struct(), ptr_type_qualifier())); + // clang-format on + + if (type == BlockType::Initial) { + printer->add_newline(); + printer->add_line("setup_instance(nt, ml);"); + printer->add_newline(); + printer->start_block("if (_nrn_skip_initmodel)"); + printer->add_line("return;"); + printer->end_block(); + printer->add_newline(); + } + + printer->add_line(fmt::format("{}(inst);", compute_function)); + printer->end_block(); + printer->add_newline(); +} + +void CodegenLLVMVisitor::print_nrn_init(bool skip_init_check) { + print_wrapper_routine(naming::NRN_INIT_METHOD, BlockType::Initial); +} + +void CodegenLLVMVisitor::print_nrn_cur() { + print_wrapper_routine(naming::NRN_CUR_METHOD, BlockType::Equation); +} + +void CodegenLLVMVisitor::print_nrn_state() { + print_wrapper_routine(naming::NRN_STATE_METHOD, BlockType::State); +} + +void CodegenLLVMVisitor::print_wrapper_routines() { + printer = wrapper_printer; + wrapper_codegen = true; + CodegenCVisitor::print_codegen_routines(); +} + +void CodegenLLVMVisitor::visit_procedure_block(const ast::ProcedureBlock& node) { + // do nothing. \todo: remove old procedures from ast. +} + +void CodegenLLVMVisitor::visit_unary_expression(const ast::UnaryExpression& node) { + ast::UnaryOp op = node.get_op().get_value(); + llvm::Value* value = accept_and_get(node.get_expression()); + ir_builder.create_unary_op(value, op); +} + +void CodegenLLVMVisitor::visit_var_name(const ast::VarName& node) { + llvm::Value* value = read_variable(node); + ir_builder.maybe_replicate_value(value); +} + +void CodegenLLVMVisitor::visit_while_statement(const ast::WhileStatement& node) { + // Get the current and the next blocks within the function. + llvm::BasicBlock* curr_block = ir_builder.get_current_block(); + llvm::BasicBlock* next = curr_block->getNextNode(); + llvm::Function* func = curr_block->getParent(); + + // Add a header and the body blocks. + llvm::BasicBlock* header = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next); + llvm::BasicBlock* body = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next); + llvm::BasicBlock* exit = llvm::BasicBlock::Create(*context, /*Name=*/"", func, next); + + ir_builder.create_br_and_set_insertion_point(header); + + + // Generate code for condition and create branch to the body block. + llvm::Value* condition = accept_and_get(node.get_condition()); + ir_builder.create_cond_br(condition, body, exit); + + ir_builder.set_insertion_point(body); + node.get_statement_block()->accept(*this); + ir_builder.create_br(header); + + ir_builder.set_insertion_point(exit); +} + +// for the llvm backend we only support breakpoint and derivative blocks +void CodegenLLVMVisitor::print_compute_functions() { + print_nrn_cur(); + print_nrn_state(); +} + +} // namespace codegen +} // namespace nmodl diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp new file mode 100644 index 0000000000..43b0cbd3c7 --- /dev/null +++ b/src/codegen/llvm/codegen_llvm_visitor.hpp @@ -0,0 +1,367 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +/** + * \dir + * \brief LLVM based code generation backend implementation for CoreNEURON + * + * \file + * \brief \copybrief nmodl::codegen::CodegenLLVMVisitor + */ + +#include +#include + +#include "codegen/codegen_c_visitor.hpp" +#include "codegen/llvm/codegen_llvm_helper_visitor.hpp" +#include "codegen/llvm/llvm_debug_builder.hpp" +#include "codegen/llvm/llvm_ir_builder.hpp" +#include "symtab/symbol_table.hpp" +#include "visitors/ast_visitor.hpp" + +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" + +namespace nmodl { +namespace codegen { + +/** + * @defgroup llvm LLVM Based Code Generation Implementation + * @brief Implementations of LLVM based code generation + * + * @defgroup llvm_backends LLVM Codegen Backend + * @ingroup llvm + * @brief Code generation backends for NMODL AST to LLVM IR + * @{ + */ + + +/** + * \class CodegenLLVMVisitor + * \brief %Visitor for transforming NMODL AST to LLVM IR + */ +class CodegenLLVMVisitor: public CodegenCVisitor { + /// Name of mod file (without .mod suffix). + std::string mod_filename; + + /// Output directory for code generation. + std::string output_dir; + + /// flag to indicate if visitor should print the the wrapper code + bool wrapper_codegen = false; + + private: + /// Underlying LLVM context. + std::unique_ptr context = std::make_unique(); + + /// Underlying LLVM module. + std::unique_ptr module = std::make_unique(mod_filename, *context); + + /// LLVM IR builder. + IRBuilder ir_builder; + + /// Debug information builder. + DebugBuilder debug_builder; + + /// Add debug information to the module. + bool add_debug_information; + + /// Instance variable helper. + InstanceVarHelper instance_var_helper; + + /// Optimisation level for LLVM IR transformations. + int opt_level_ir; + + /// Target platform for the code generation. + Platform platform; + + /// Wrap calls to NMODL compute kernels by taking void* and casting to + /// appropriate struct type pointer. Used when executing kernels with JIT. + bool wrap_kernel_functions; + + public: + CodegenLLVMVisitor(const std::string& mod_filename, + const std::string& output_dir, + Platform& platform, + int opt_level_ir, + bool add_debug_information = false, + std::vector fast_math_flags = {}, + bool wrap_kernel_functions = false) + : CodegenCVisitor(mod_filename, + output_dir, + platform.is_single_precision() ? "float" : "double", + false, + ".ll", + ".cpp") + , mod_filename(mod_filename) + , output_dir(output_dir) + , platform(platform) + , opt_level_ir(opt_level_ir) + , add_debug_information(add_debug_information) + , ir_builder(*context, platform, fast_math_flags) + , debug_builder(*module) + , wrap_kernel_functions(wrap_kernel_functions) { + instance_struct_type_suffix = "_instance_var__type"; + print_procedures_and_functions = false; + } + + CodegenLLVMVisitor(const std::string& mod_filename, + std::ostream& stream, + Platform& platform, + int opt_level_ir, + bool add_debug_information = false, + std::vector fast_math_flags = {}, + bool wrap_kernel_functions = false) + : CodegenCVisitor(mod_filename, + stream, + platform.is_single_precision() ? "float" : "double", + false, + ".ll", + ".cpp") + , mod_filename(mod_filename) + , output_dir(".") + , platform(platform) + , opt_level_ir(opt_level_ir) + , add_debug_information(add_debug_information) + , ir_builder(*context, platform, fast_math_flags) + , debug_builder(*module) + , wrap_kernel_functions(wrap_kernel_functions) { + instance_struct_type_suffix = "_instance_var__type"; + print_procedures_and_functions = false; + } + + /// Dumps the generated LLVM IR module to string. + std::string dump_module() const { + std::string str; + llvm::raw_string_ostream os(str); + os << *module; + os.flush(); + return str; + } + + /// Fills the container with the names of kernel functions from the MOD file. + void find_kernel_names(std::vector& container); + + /// Returns underlying module. + std::unique_ptr get_module() { + return std::move(module); + } + + /// Returns shared_ptr to generated ast::InstanceStruct. + std::shared_ptr get_instance_struct_ptr() { + return instance_var_helper.instance; + } + + /// Returns InstanceVarHelper for the given MOD file. + InstanceVarHelper get_instance_var_helper() { + return instance_var_helper; + } + + /// Returns vector width + int get_vector_width() const { + return platform.get_instruction_width(); + } + + // Visitors. + void visit_binary_expression(const ast::BinaryExpression& node) override; + void visit_boolean(const ast::Boolean& node) override; + void visit_codegen_atomic_statement(const ast::CodegenAtomicStatement& node) override; + void visit_codegen_for_statement(const ast::CodegenForStatement& node) override; + void visit_codegen_function(const ast::CodegenFunction& node) override; + void visit_codegen_grid_stride(const ast::CodegenGridStride& node) override; + void visit_codegen_return_statement(const ast::CodegenReturnStatement& node) override; + void visit_codegen_thread_id(const ast::CodegenThreadId& node) override; + void visit_codegen_var_list_statement(const ast::CodegenVarListStatement& node) override; + void visit_double(const ast::Double& node) override; + void visit_float(const ast::Float& node) override; + void visit_function_block(const ast::FunctionBlock& node) override; + void visit_function_call(const ast::FunctionCall& node) override; + void visit_if_statement(const ast::IfStatement& node) override; + void visit_integer(const ast::Integer& node) override; + void visit_procedure_block(const ast::ProcedureBlock& node) override; + void visit_program(const ast::Program& node) override; + void visit_statement_block(const ast::StatementBlock& node) override; + void visit_unary_expression(const ast::UnaryExpression& node) override; + void visit_var_name(const ast::VarName& node) override; + void visit_while_statement(const ast::WhileStatement& node) override; + + /* + * Override functions from CodegenCVisitor to the ones from visitor::ConstsAstVisitor as it was + * originally for CodegenLLVMVisitor + */ + void visit_binary_operator(const ast::BinaryOperator& node) override { + visitor::ConstAstVisitor::visit_binary_operator(node); + } + void visit_else_if_statement(const ast::ElseIfStatement& node) override { + visitor::ConstAstVisitor::visit_else_if_statement(node); + } + void visit_else_statement(const ast::ElseStatement& node) override { + visitor::ConstAstVisitor::visit_else_statement(node); + } + void visit_from_statement(const ast::FromStatement& node) override { + visitor::ConstAstVisitor::visit_from_statement(node); + } + void visit_eigen_newton_solver_block(const ast::EigenNewtonSolverBlock& node) override { + visitor::ConstAstVisitor::visit_eigen_newton_solver_block(node); + } + void visit_eigen_linear_solver_block(const ast::EigenLinearSolverBlock& node) override { + visitor::ConstAstVisitor::visit_eigen_linear_solver_block(node); + } + void visit_indexed_name(const ast::IndexedName& node) override { + visitor::ConstAstVisitor::visit_indexed_name(node); + } + void visit_local_list_statement(const ast::LocalListStatement& node) override { + visitor::ConstAstVisitor::visit_local_list_statement(node); + } + void visit_name(const ast::Name& node) override { + visitor::ConstAstVisitor::visit_name(node); + } + void visit_paren_expression(const ast::ParenExpression& node) override { + visitor::ConstAstVisitor::visit_paren_expression(node); + } + void visit_prime_name(const ast::PrimeName& node) override { + visitor::ConstAstVisitor::visit_prime_name(node); + } + void visit_string(const ast::String& node) override { + visitor::ConstAstVisitor::visit_string(node); + } + void visit_solution_expression(const ast::SolutionExpression& node) override { + visitor::ConstAstVisitor::visit_solution_expression(node); + } + void visit_unary_operator(const ast::UnaryOperator& node) override { + visitor::ConstAstVisitor::visit_unary_operator(node); + } + void visit_unit(const ast::Unit& node) override { + visitor::ConstAstVisitor::visit_unit(node); + } + void visit_verbatim(const ast::Verbatim& node) override { + visitor::ConstAstVisitor::visit_verbatim(node); + } + void visit_watch_statement(const ast::WatchStatement& node) override { + visitor::ConstAstVisitor::visit_watch_statement(node); + } + void visit_derivimplicit_callback(const ast::DerivimplicitCallback& node) override { + visitor::ConstAstVisitor::visit_derivimplicit_callback(node); + } + void visit_for_netcon(const ast::ForNetcon& node) override { + visitor::ConstAstVisitor::visit_for_netcon(node); + } + + /* + * Functions related to printing the wrapper cpp file + */ + void print_wrapper_routines() override; + void print_wrapper_headers_include(); + void print_data_structures(); + void print_mechanism_range_var_structure(bool) override; + void print_instance_variable_setup() override; + + /** + * Print the \c nrn\_init function definition + * \param skip_init_check \c true if we want the generated code to execute the initialization + * conditionally + */ + void print_nrn_init(bool skip_init_check = true) override; + /** + * Print nrn_state / state update function definition + */ + void print_nrn_state() override; + /** + * Print nrn_cur / current update function definition + */ + void print_nrn_cur() override; + /* + * Declare the external compute functions (nrn_init, nrn_cur and nrn_state) + */ + void print_backend_compute_routine_decl() override; + /* + * Define the wrappers for the external compute functions (nrn_init, nrn_cur and nrn_state) + */ + void print_backend_compute_routine(); + /* + * Print the wrapper routine based on the parameters given + * \param wrapper_function The name of the function to wrap + * \param type The \c BlockType that this function is based on + */ + void print_wrapper_routine(const std::string& wrapper_function, BlockType type); + /* + * Function that returns a vector of Parameters needed to be passed to the compute routines. + * The first argument should be an object of \c mechanism_instance_struct_type_name + */ + CodegenLLVMVisitor::ParamVector get_compute_function_parameter(); + + /// print compute functions relevant for this backend + void print_compute_functions() override; + + private: + /// Annotates kernel function with NVVM metadata. + void annotate_kernel_with_nvvm(llvm::Function* kernel, const std::string& annotation); + + /// Handles NVVM function annotations when we create the wrapper functions. All original kernels + /// should be "device" functions and wrappers "kernel" functions + void annotate_wrapper_kernels_with_nvvm(); + + /// Accepts the given AST node and returns the processed value. + llvm::Value* accept_and_get(const std::shared_ptr& node); + + /// Creates a call to an external function (e.g pow, exp, etc.) + void create_external_function_call(const std::string& name, + const ast::ExpressionVector& arguments); + + /// Creates a call to NMODL function or procedure in the same MOD file. + void create_function_call(llvm::Function* func, + const std::string& name, + const ast::ExpressionVector& arguments); + + /// Fills values vector with processed NMODL function call arguments. + void create_function_call_arguments(const ast::ExpressionVector& arguments, + ValueVector& arg_values); + + /// Creates the function declaration for the given AST node. + void create_function_declaration(const ast::CodegenFunction& node); + + /// Creates a call to `printf` function. + void create_printf_call(const ast::ExpressionVector& arguments); + + /// Creates a vectorized version of the LLVM IR for the simple control flow statement. + void create_vectorized_control_flow_block(const ast::IfStatement& node); + + /// Returns LLVM type for the given CodegenVarType AST node. + llvm::Type* get_codegen_var_type(const ast::CodegenVarType& node); + + /// Returns the index value from the IndexedName AST node. + llvm::Value* get_index(const ast::IndexedName& node); + + /// Returns an instance struct type. + llvm::Type* get_instance_struct_type(); + + /// Returns the number of elements in the array specified by the IndexedName AST node. + int get_num_elements(const ast::IndexedName& node); + + /// Returns whether the function is an NMODL compute kernel. + bool is_kernel_function(const std::string& function_name); + + /// If the value to store is specified, writes it to the instance. Otherwise, returns the + /// instance variable. + llvm::Value* read_from_or_write_to_instance(const ast::CodegenInstanceVar& node, + llvm::Value* maybe_value_to_store = nullptr); + + /// Reads the given variable and returns the processed value. + llvm::Value* read_variable(const ast::VarName& node); + + //// Writes the value to the given variable. + void write_to_variable(const ast::VarName& node, llvm::Value* value); +}; + +/** \} */ // end of llvm_backends + +} // namespace codegen +} // namespace nmodl diff --git a/src/codegen/llvm/llvm_debug_builder.cpp b/src/codegen/llvm/llvm_debug_builder.cpp new file mode 100644 index 0000000000..5682a6e904 --- /dev/null +++ b/src/codegen/llvm/llvm_debug_builder.cpp @@ -0,0 +1,63 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include "codegen/llvm/llvm_debug_builder.hpp" + +namespace nmodl { +namespace codegen { + + +static constexpr const char debug_version_key[] = "Debug Version"; + + +void DebugBuilder::add_function_debug_info(llvm::Function* function, Location* loc) { + // Create the function debug type (subroutine type). We are not interested in parameters and + // types, and therefore passing llvm::None as argument suffices for now. + llvm::DISubroutineType* subroutine_type = di_builder.createSubroutineType( + di_builder.getOrCreateTypeArray(llvm::None)); + llvm::DISubprogram::DISPFlags sp_flags = llvm::DISubprogram::SPFlagDefinition | + llvm::DISubprogram::SPFlagOptimized; + // If there is no location associated with the function, just use 0. + int line = loc ? loc->line : 0; + llvm::DISubprogram* program = di_builder.createFunction(compile_unit, + function->getName(), + function->getName(), + file, + line, + subroutine_type, + line, + llvm::DINode::FlagZero, + sp_flags); + function->setSubprogram(program); + di_builder.finalizeSubprogram(program); +} + +void DebugBuilder::create_compile_unit(llvm::Module& module, + const std::string& debug_filename, + const std::string& debug_output_dir) { + // Create the debug file and compile unit for the module. + file = di_builder.createFile(debug_filename, debug_output_dir); + compile_unit = di_builder.createCompileUnit(llvm::dwarf::DW_LANG_C, + file, + /*Producer=*/"NMODL-LLVM", + /*isOptimized=*/false, + /*Flags=*/"", + /*RV=*/0); + + // Add a flag to the module to specify that it has debug information. + if (!module.getModuleFlag(debug_version_key)) { + module.addModuleFlag(llvm::Module::Warning, + debug_version_key, + llvm::DEBUG_METADATA_VERSION); + } +} + +void DebugBuilder::finalize() { + di_builder.finalize(); +} +} // namespace codegen +} // namespace nmodl diff --git a/src/codegen/llvm/llvm_debug_builder.hpp b/src/codegen/llvm/llvm_debug_builder.hpp new file mode 100644 index 0000000000..9322cd461a --- /dev/null +++ b/src/codegen/llvm/llvm_debug_builder.hpp @@ -0,0 +1,70 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +#include + +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" + +namespace nmodl { +namespace codegen { + +/// A struct to store AST location information. +/// \todo Currently, not all AST nodes have location information. Moreover, +/// some may not have it as they were artificially introduced (e.g. +/// CodegenForStatement). This simple wrapper suffices for now, but in future +/// we may want to handle this properly. +struct Location { + /// Line in the file. + int line; + + /// Column in the file. + int column; +}; + + +/** + * \class DebugBuilder + * \brief A helper class to create debug information for LLVM IR module. + * \todo Only function debug information is supported. + */ +class DebugBuilder { + private: + /// Debug information builder. + llvm::DIBuilder di_builder; + + /// LLVM context. + llvm::LLVMContext& context; + + /// Debug compile unit for the module. + llvm::DICompileUnit* compile_unit = nullptr; + + /// Debug file pointer. + llvm::DIFile* file = nullptr; + + public: + DebugBuilder(llvm::Module& module) + : di_builder(module) + , context(module.getContext()) {} + + /// Adds function debug information with an optional location. + void add_function_debug_info(llvm::Function* function, Location* loc = nullptr); + + /// Creates the compile unit for and sets debug flags for the module. + void create_compile_unit(llvm::Module& module, + const std::string& debug_filename, + const std::string& debug_output_dir); + + /// Finalizes the debug information. + void finalize(); +}; +} // namespace codegen +} // namespace nmodl diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp new file mode 100644 index 0000000000..c8ad11c205 --- /dev/null +++ b/src/codegen/llvm/llvm_ir_builder.cpp @@ -0,0 +1,706 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include "codegen/llvm/llvm_ir_builder.hpp" +#include "ast/all.hpp" + +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/IR/ValueSymbolTable.h" + +namespace nmodl { +namespace codegen { + + +/****************************************************************************************/ +/* LLVM type utilities */ +/****************************************************************************************/ + +llvm::Type* IRBuilder::get_boolean_type() { + return llvm::Type::getInt1Ty(builder.getContext()); +} + +llvm::Type* IRBuilder::get_i8_ptr_type() { + return llvm::Type::getInt8PtrTy(builder.getContext()); +} + +llvm::Type* IRBuilder::get_i32_type() { + return llvm::Type::getInt32Ty(builder.getContext()); +} + +llvm::Type* IRBuilder::get_i32_ptr_type() { + return llvm::Type::getInt32PtrTy(builder.getContext()); +} + +llvm::Type* IRBuilder::get_i64_type() { + return llvm::Type::getInt64Ty(builder.getContext()); +} + +llvm::Type* IRBuilder::get_fp_type() { + if (platform.is_single_precision()) + return llvm::Type::getFloatTy(builder.getContext()); + return llvm::Type::getDoubleTy(builder.getContext()); +} + +llvm::Type* IRBuilder::get_fp_ptr_type() { + if (platform.is_single_precision()) + return llvm::Type::getFloatPtrTy(builder.getContext()); + return llvm::Type::getDoublePtrTy(builder.getContext()); +} + +llvm::Type* IRBuilder::get_void_type() { + return llvm::Type::getVoidTy(builder.getContext()); +} + +llvm::Type* IRBuilder::get_struct_ptr_type(const std::string& struct_type_name, + TypeVector& member_types) { + llvm::StructType* llvm_struct_type = llvm::StructType::getTypeByName(builder.getContext(), + struct_type_name); + + if (!llvm_struct_type) { + llvm_struct_type = llvm::StructType::create(builder.getContext(), struct_type_name); + llvm_struct_type->setBody(member_types); + } + + return llvm::PointerType::get(llvm_struct_type, /*AddressSpace=*/0); +} + + +/****************************************************************************************/ +/* LLVM value utilities */ +/****************************************************************************************/ + +llvm::Value* IRBuilder::lookup_value(const std::string& value_name) { + auto value = current_function->getValueSymbolTable()->lookup(value_name); + if (!value) + throw std::runtime_error("Error: variable " + value_name + " is not in the scope\n"); + return value; +} + +llvm::Value* IRBuilder::pop_last_value() { + // Check if the stack is empty. + if (value_stack.empty()) + throw std::runtime_error("Error: popping a value from the empty stack\n"); + + // Return the last added value and delete it from the stack. + llvm::Value* last = value_stack.back(); + value_stack.pop_back(); + return last; +} + +/****************************************************************************************/ +/* LLVM constants utilities */ +/****************************************************************************************/ + +void IRBuilder::create_boolean_constant(int value) { + if (platform.is_cpu_with_simd() && vectorize) { + value_stack.push_back(get_vector_constant(get_boolean_type(), value)); + } else { + value_stack.push_back(get_scalar_constant(get_boolean_type(), value)); + } +} + +void IRBuilder::create_fp_constant(const std::string& value) { + if (platform.is_cpu_with_simd() && vectorize) { + value_stack.push_back(get_vector_constant(get_fp_type(), value)); + } else { + value_stack.push_back(get_scalar_constant(get_fp_type(), value)); + } +} + +llvm::Value* IRBuilder::create_global_string(const ast::String& node) { + return builder.CreateGlobalStringPtr(node.get_value()); +} + +void IRBuilder::create_i32_constant(int value) { + if (platform.is_cpu_with_simd() && vectorize) { + value_stack.push_back(get_vector_constant(get_i32_type(), value)); + } else { + value_stack.push_back(get_scalar_constant(get_i32_type(), value)); + } +} + +template +llvm::Value* IRBuilder::get_scalar_constant(llvm::Type* type, V value) { + return C::get(type, value); +} + +template +llvm::Value* IRBuilder::get_vector_constant(llvm::Type* type, V value) { + int vector_width = platform.get_instruction_width(); + + ConstantVector constants; + for (unsigned i = 0; i < vector_width; ++i) { + const auto& element = C::get(type, value); + constants.push_back(element); + } + return llvm::ConstantVector::get(constants); +} + +/****************************************************************************************/ +/* LLVM function utilities */ +/****************************************************************************************/ + +void IRBuilder::allocate_function_arguments(llvm::Function* function, + const ast::CodegenVarWithTypeVector& nmodl_arguments) { + unsigned i = 0; + for (auto& arg: function->args()) { + std::string arg_name = nmodl_arguments[i++].get()->get_node_name(); + llvm::Type* arg_type = arg.getType(); + llvm::Value* alloca = create_alloca(arg_name, arg_type); + arg.setName(arg_name); + builder.CreateStore(&arg, alloca); + } +} + +void IRBuilder::allocate_and_wrap_kernel_arguments( + llvm::Function* function, + const ast::CodegenVarWithTypeVector& nmodl_arguments, + llvm::Type* struct_type) { + // In theory, this should never happen but let's guard anyway. + if (nmodl_arguments.size() != 1) { + throw std::runtime_error("Error: NMODL computer kernel must have a single argument\n"); + } + + // Bitcast void* pointer provided as compute kernel argument to mechanism data type. + llvm::Value* data_ptr = create_bitcast(function->getArg(0), struct_type); + + std::string arg_name = nmodl_arguments[0].get()->get_node_name(); + llvm::Value* alloca = create_alloca(arg_name, struct_type); + builder.CreateStore(data_ptr, alloca); +} + +std::string IRBuilder::get_current_function_name() { + return current_function->getName().str(); +} + +void IRBuilder::create_function_call(llvm::Function* callee, + ValueVector& arguments, + bool use_result) { + llvm::Value* call_instruction = builder.CreateCall(callee, arguments); + if (use_result) + value_stack.push_back(call_instruction); +} + +void IRBuilder::create_intrinsic(const std::string& name, + ValueVector& argument_values, + TypeVector& argument_types) { + // Process 'pow' call separately. + if (name == "pow") { + llvm::Value* pow_intrinsic = builder.CreateIntrinsic(llvm::Intrinsic::pow, + {argument_types.front()}, + argument_values); + value_stack.push_back(pow_intrinsic); + return; + } + + // Create other intrinsics. + unsigned intrinsic_id = llvm::StringSwitch(name) + .Case("ceil", llvm::Intrinsic::ceil) + .Case("cos", llvm::Intrinsic::cos) + .Case("exp", llvm::Intrinsic::exp) + .Case("fabs", llvm::Intrinsic::fabs) + .Case("floor", llvm::Intrinsic::floor) + .Case("log", llvm::Intrinsic::log) + .Case("log10", llvm::Intrinsic::log10) + .Case("sin", llvm::Intrinsic::sin) + .Case("sqrt", llvm::Intrinsic::sqrt) + .Default(llvm::Intrinsic::not_intrinsic); + if (intrinsic_id) { + llvm::Value* intrinsic = + builder.CreateIntrinsic(intrinsic_id, argument_types, argument_values); + value_stack.push_back(intrinsic); + } else { + throw std::runtime_error("Error: calls to " + name + " are not valid or not supported\n"); + } +} + +/****************************************************************************************/ +/* LLVM instruction utilities */ +/****************************************************************************************/ + +llvm::Value* IRBuilder::create_alloca(const std::string& name, llvm::Type* type) { + // If insertion point for `alloca` instructions is not set, then create the instruction in the + // entry block and set it to be the insertion point. + if (!alloca_ip) { + // Get the entry block and insert the `alloca` instruction there. + llvm::BasicBlock* current_block = builder.GetInsertBlock(); + llvm::BasicBlock& entry_block = current_block->getParent()->getEntryBlock(); + builder.SetInsertPoint(&entry_block); + llvm::Value* alloca = builder.CreateAlloca(type, /*ArraySize=*/nullptr, name); + + // Set the `alloca` instruction insertion point and restore the insertion point for the next + // set of instructions. + alloca_ip = llvm::cast(alloca); + builder.SetInsertPoint(current_block); + return alloca; + } + + // Create `alloca` instruction. + llvm::BasicBlock* alloca_block = alloca_ip->getParent(); + const auto& data_layout = alloca_block->getModule()->getDataLayout(); + auto* alloca = new llvm::AllocaInst(type, + data_layout.getAllocaAddrSpace(), + /*ArraySize=*/nullptr, + data_layout.getPrefTypeAlign(type), + name); + + // Insert `alloca` at the specified insertion point and reset it for the next instructions. + alloca_block->getInstList().insertAfter(alloca_ip->getIterator(), alloca); + alloca_ip = alloca; + return alloca; +} + +void IRBuilder::create_array_alloca(const std::string& name, + llvm::Type* element_type, + int num_elements) { + llvm::Type* array_type = llvm::ArrayType::get(element_type, num_elements); + create_alloca(name, array_type); +} + +ast::BinaryOp IRBuilder::extract_atomic_op(ast::BinaryOp op) { + switch (op) { + case ast::BinaryOp::BOP_SUB_ASSIGN: + return ast::BinaryOp::BOP_SUBTRACTION; + case ast::BinaryOp::BOP_ADD_ASSIGN: + return ast::BinaryOp::BOP_ADDITION; + default: + throw std::runtime_error("Error: only atomic addition and subtraction is supported\n"); + } +} + +void IRBuilder::create_atomic_op(llvm::Value* ptr, llvm::Value* update, ast::BinaryOp op) { + if (op == ast::BinaryOp::BOP_SUBTRACTION) { + update = builder.CreateFNeg(update); + } + builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, + ptr, + update, + llvm::MaybeAlign(), + llvm::AtomicOrdering::SequentiallyConsistent); +} + +llvm::Value* IRBuilder::create_member_addresses(llvm::Value* member_ptr) { + llvm::Module* m = builder.GetInsertBlock()->getParent()->getParent(); + + // Treat this member address as integer value. + llvm::Type* int_ptr_type = m->getDataLayout().getIntPtrType(builder.getContext()); + llvm::Value* ptr_to_int = builder.CreatePtrToInt(member_ptr, int_ptr_type); + + // Create a vector that has address at 0. + llvm::Type* vector_type = llvm::FixedVectorType::get(int_ptr_type, + platform.get_instruction_width()); + llvm::Value* zero = get_scalar_constant(get_i32_type(), 0); + llvm::Value* tmp = + builder.CreateInsertElement(llvm::UndefValue::get(vector_type), ptr_to_int, zero); + + // Finally, use `shufflevector` with zeroinitializer to replicate the 0th element. + llvm::Value* select = llvm::Constant::getNullValue(vector_type); + return builder.CreateShuffleVector(tmp, llvm::UndefValue::get(vector_type), select); +} + +llvm::Value* IRBuilder::create_member_offsets(llvm::Value* start, llvm::Value* indices) { + llvm::Value* factor = get_vector_constant(get_i64_type(), + platform.get_precision() / 8); + llvm::Value* offset = builder.CreateMul(indices, factor); + return builder.CreateAdd(start, offset); +} + +llvm::Value* IRBuilder::create_atomic_loop(llvm::Value* ptrs_arr, + llvm::Value* rhs, + ast::BinaryOp op) { + const int vector_width = platform.get_instruction_width(); + llvm::BasicBlock* curr = get_current_block(); + llvm::BasicBlock* prev = curr->getPrevNode(); + llvm::BasicBlock* next = curr->getNextNode(); + + // Some constant values. + llvm::Value* false_value = get_scalar_constant(get_boolean_type(), 0); + llvm::Value* zero = get_scalar_constant(get_i64_type(), 0); + llvm::Value* one = get_scalar_constant(get_i64_type(), 1); + llvm::Value* minus_one = get_scalar_constant(get_i64_type(), -1); + + // First, we create a PHI node that holds the mask of active vector elements. + llvm::PHINode* mask = builder.CreatePHI(get_i64_type(), /*NumReservedValues=*/2); + + // Intially, all elements are active. + llvm::Value* init_value = get_scalar_constant(get_i64_type(), + ~((~0) << vector_width)); + + // Find the index of the next active element and update the mask. This can be easily computed + // with: + // index = cttz(mask) + // new_mask = mask & ((1 << index) ^ -1) + llvm::Value* index = + builder.CreateIntrinsic(llvm::Intrinsic::cttz, {get_i64_type()}, {mask, false_value}); + llvm::Value* new_mask = builder.CreateShl(one, index); + new_mask = builder.CreateXor(new_mask, minus_one); + new_mask = builder.CreateAnd(mask, new_mask); + + // Update PHI with appropriate values. + mask->addIncoming(init_value, prev); + mask->addIncoming(new_mask, curr); + + // Get the pointer to the current value, the value itself and the update.b + llvm::Value* gep = + builder.CreateGEP(ptrs_arr->getType()->getPointerElementType(), ptrs_arr, {zero, index}); + llvm::Value* ptr = create_load(gep); + llvm::Value* source = create_load(ptr); + llvm::Value* update = builder.CreateExtractElement(rhs, index); + + // Perform the update and store the result back. + // source = *ptr + // *ptr = source + update + create_binary_op(source, update, op); + llvm::Value* result = pop_last_value(); + create_store(ptr, result); + + // Return condition to break out of atomic update loop. + return builder.CreateICmpEQ(new_mask, zero); +} + +void IRBuilder::create_binary_op(llvm::Value* lhs, llvm::Value* rhs, ast::BinaryOp op) { + // Check that both lhs and rhs have the same types. + if (lhs->getType() != rhs->getType()) + throw std::runtime_error( + "Error: lhs and rhs of the binary operator have different types\n"); + + llvm::Value* result; + switch (op) { +#define DISPATCH(binary_op, fp_instruction, integer_instruction) \ + case binary_op: \ + if (lhs->getType()->isIntOrIntVectorTy()) \ + result = integer_instruction(lhs, rhs); \ + else \ + result = fp_instruction(lhs, rhs); \ + break; + + // Arithmetic instructions. + DISPATCH(ast::BinaryOp::BOP_ADDITION, builder.CreateFAdd, builder.CreateAdd); + DISPATCH(ast::BinaryOp::BOP_DIVISION, builder.CreateFDiv, builder.CreateSDiv); + DISPATCH(ast::BinaryOp::BOP_MULTIPLICATION, builder.CreateFMul, builder.CreateMul); + DISPATCH(ast::BinaryOp::BOP_SUBTRACTION, builder.CreateFSub, builder.CreateSub); + + // Comparison instructions. + DISPATCH(ast::BinaryOp::BOP_EXACT_EQUAL, builder.CreateFCmpOEQ, builder.CreateICmpEQ); + DISPATCH(ast::BinaryOp::BOP_GREATER, builder.CreateFCmpOGT, builder.CreateICmpSGT); + DISPATCH(ast::BinaryOp::BOP_GREATER_EQUAL, builder.CreateFCmpOGE, builder.CreateICmpSGE); + DISPATCH(ast::BinaryOp::BOP_LESS, builder.CreateFCmpOLT, builder.CreateICmpSLT); + DISPATCH(ast::BinaryOp::BOP_LESS_EQUAL, builder.CreateFCmpOLE, builder.CreateICmpSLE); + DISPATCH(ast::BinaryOp::BOP_NOT_EQUAL, builder.CreateFCmpONE, builder.CreateICmpNE); + +#undef DISPATCH + + // Separately replace ^ with the `pow` intrinsic. + case ast::BinaryOp::BOP_POWER: + result = builder.CreateIntrinsic(llvm::Intrinsic::pow, {lhs->getType()}, {lhs, rhs}); + break; + + // Logical instructions. + case ast::BinaryOp::BOP_AND: + result = builder.CreateAnd(lhs, rhs); + break; + case ast::BinaryOp::BOP_OR: + result = builder.CreateOr(lhs, rhs); + break; + + default: + throw std::runtime_error("Error: unsupported binary operator\n"); + } + value_stack.push_back(result); +} + +llvm::Value* IRBuilder::create_bitcast(llvm::Value* value, llvm::Type* dst_type) { + return builder.CreateBitCast(value, dst_type); +} + +llvm::Value* IRBuilder::create_inbounds_gep(const std::string& var_name, llvm::Value* index) { + llvm::Value* variable_ptr = lookup_value(var_name); + + // Since we index through the pointer, we need an extra 0 index in the indices list for GEP. + ValueVector indices{llvm::ConstantInt::get(get_i64_type(), 0), index}; + llvm::Type* variable_type = variable_ptr->getType()->getPointerElementType(); + return builder.CreateInBoundsGEP(variable_type, variable_ptr, indices); +} + +llvm::Value* IRBuilder::create_inbounds_gep(llvm::Value* variable, llvm::Value* index) { + ValueVector indices{index}; + llvm::Type* variable_type = variable->getType()->getPointerElementType(); + return builder.CreateInBoundsGEP(variable_type, variable, indices); +} + +llvm::Value* IRBuilder::create_index(llvm::Value* value) { + // Check if index is a double. While it is possible to use casting from double to integer + // values, we choose not to support these cases. + llvm::Type* value_type = value->getType(); + if (!value_type->isIntOrIntVectorTy()) + throw std::runtime_error("Error: only integer indexing is supported\n"); + + // Conventionally, in LLVM array indices are 64 bit. + llvm::Type* i64_type = get_i64_type(); + if (auto index_type = llvm::dyn_cast(value_type)) { + if (index_type->getBitWidth() == i64_type->getIntegerBitWidth()) + return value; + return builder.CreateSExtOrTrunc(value, i64_type); + } + + const auto& vector_type = llvm::cast(value_type); + const auto& element_type = llvm::cast(vector_type->getElementType()); + if (element_type->getBitWidth() == i64_type->getIntegerBitWidth()) + return value; + int vector_width = platform.get_instruction_width(); + return builder.CreateSExtOrTrunc(value, llvm::FixedVectorType::get(i64_type, vector_width)); +} + +llvm::Value* IRBuilder::create_load(const std::string& name, bool masked) { + llvm::Value* ptr = lookup_value(name); + llvm::Type* loaded_type = ptr->getType()->getPointerElementType(); + + // Check if the generated IR is vectorized and masked. + if (masked) { + builder.CreateMaskedLoad(loaded_type, ptr, llvm::Align(), mask); + } + llvm::Value* loaded = builder.CreateLoad(loaded_type, ptr); + value_stack.push_back(loaded); + return loaded; +} + +llvm::Value* IRBuilder::create_load(llvm::Value* ptr, bool masked) { + llvm::Type* loaded_type = ptr->getType()->getPointerElementType(); + + // Check if the generated IR is vectorized and masked. + if (masked) { + builder.CreateMaskedLoad(loaded_type, ptr, llvm::Align(), mask); + } + + llvm::Value* loaded = builder.CreateLoad(loaded_type, ptr); + value_stack.push_back(loaded); + return loaded; +} + +llvm::Value* IRBuilder::create_load_from_array(const std::string& name, llvm::Value* index) { + llvm::Value* element_ptr = create_inbounds_gep(name, index); + return create_load(element_ptr); +} + +void IRBuilder::create_store(const std::string& name, llvm::Value* value, bool masked) { + llvm::Value* ptr = lookup_value(name); + + // Check if the generated IR is vectorized and masked. + if (masked) { + builder.CreateMaskedStore(value, ptr, llvm::Align(), mask); + return; + } + builder.CreateStore(value, ptr); +} + +void IRBuilder::create_store(llvm::Value* ptr, llvm::Value* value, bool masked) { + // Check if the generated IR is vectorized and masked. + if (masked) { + builder.CreateMaskedStore(value, ptr, llvm::Align(), mask); + return; + } + builder.CreateStore(value, ptr); +} + +void IRBuilder::create_store_to_array(const std::string& name, + llvm::Value* index, + llvm::Value* value) { + llvm::Value* element_ptr = create_inbounds_gep(name, index); + create_store(element_ptr, value); +} + +void IRBuilder::create_return(llvm::Value* return_value) { + if (return_value) + builder.CreateRet(return_value); + else + builder.CreateRetVoid(); +} + +void IRBuilder::create_scalar_or_vector_alloca(const std::string& name, + llvm::Type* element_or_scalar_type) { + // Even if generating vectorised code, some variables still need to be scalar. Particularly, the + // induction variable "id" and remainder loop variables (that start with "epilogue" prefix). + llvm::Type* type; + if (platform.is_cpu_with_simd() && vectorize && name != kernel_id && + name.rfind("epilogue", 0)) { + int vector_width = platform.get_instruction_width(); + type = llvm::FixedVectorType::get(element_or_scalar_type, vector_width); + } else { + type = element_or_scalar_type; + } + create_alloca(name, type); +} + +void IRBuilder::create_unary_op(llvm::Value* value, ast::UnaryOp op) { + if (op == ast::UOP_NEGATION) { + value_stack.push_back(builder.CreateFNeg(value)); + } else if (op == ast::UOP_NOT) { + value_stack.push_back(builder.CreateNot(value)); + } else { + throw std::runtime_error("Error: unsupported unary operator\n"); + } +} + +llvm::Value* IRBuilder::get_struct_member_ptr(llvm::Value* struct_variable, int member_index) { + ValueVector indices; + indices.push_back(llvm::ConstantInt::get(get_i32_type(), 0)); + indices.push_back(llvm::ConstantInt::get(get_i32_type(), member_index)); + + llvm::Type* type = struct_variable->getType()->getPointerElementType(); + return builder.CreateInBoundsGEP(type, struct_variable, indices); +} + +void IRBuilder::invert_mask() { + if (!mask) + throw std::runtime_error("Error: mask is not set\n"); + + // Create the vector with all `true` values. + create_boolean_constant(1); + llvm::Value* one = pop_last_value(); + + mask = builder.CreateXor(mask, one); +} + +llvm::Value* IRBuilder::load_to_or_store_from_array(const std::string& id_name, + llvm::Value* id_value, + llvm::Value* array, + llvm::Value* maybe_value_to_store) { + // First, calculate the address of the element in the array. + llvm::Value* element_ptr = create_inbounds_gep(array, id_value); + + // Find out if the vector code is generated. + bool generating_vector_ir = platform.is_cpu_with_simd() && vectorize; + + // If the vector code is generated, we need to distinguish between two cases. If the array is + // indexed indirectly (i.e. not by an induction variable `kernel_id`), create gather/scatter + // instructions. + if (id_name != kernel_id && generating_vector_ir) { + if (maybe_value_to_store) { + return builder.CreateMaskedScatter(maybe_value_to_store, + element_ptr, + llvm::Align(), + mask); + } else { + // Construct the loaded vector type. + auto* ptrs = llvm::cast(element_ptr->getType()); + llvm::ElementCount element_count = ptrs->getElementCount(); + llvm::Type* element_type = ptrs->getElementType()->getPointerElementType(); + llvm::Type* loaded_type = llvm::VectorType::get(element_type, element_count); + + return builder.CreateMaskedGather(loaded_type, element_ptr, llvm::Align(), mask); + } + } + + llvm::Value* ptr; + if (generating_vector_ir) { + // If direct indexing is used during the vectorization, we simply bitcast the scalar pointer + // to a vector pointer + llvm::Type* vector_type = llvm::PointerType::get( + llvm::FixedVectorType::get(element_ptr->getType()->getPointerElementType(), + platform.get_instruction_width()), + /*AddressSpace=*/0); + ptr = builder.CreateBitCast(element_ptr, vector_type); + } else { + // Otherwise, scalar code is generated and hence return the element pointer. + ptr = element_ptr; + } + + if (maybe_value_to_store) { + create_store(ptr, maybe_value_to_store, /*masked=*/mask && generating_vector_ir); + return nullptr; + } else { + return create_load(ptr, /*masked=*/mask && generating_vector_ir); + } +} + +void IRBuilder::maybe_replicate_value(llvm::Value* value) { + // If the value should not be vectorised, or it is already a vector, add it to the stack. + if (!vectorize || !platform.is_cpu_with_simd() || value->getType()->isVectorTy()) { + value_stack.push_back(value); + } else { + // Otherwise, we generate vectorized code inside the loop, so replicate the value to form a + // vector. + int vector_width = platform.get_instruction_width(); + llvm::Value* vector_value = builder.CreateVectorSplat(vector_width, value); + value_stack.push_back(vector_value); + } +} + +void IRBuilder::create_grid_stride() { + llvm::Module* m = builder.GetInsertBlock()->getParent()->getParent(); + auto create_call = [&](llvm::Intrinsic::ID id) { + llvm::Function* intrinsic = llvm::Intrinsic::getDeclaration(m, id); + return builder.CreateCall(intrinsic, {}); + }; + + llvm::Value* block_dim = create_call(llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x); + llvm::Value* grid_dim = create_call(llvm::Intrinsic::nvvm_read_ptx_sreg_nctaid_x); + llvm::Value* stride = builder.CreateMul(block_dim, grid_dim); + + value_stack.push_back(stride); +} + +void IRBuilder::create_thread_id() { + llvm::Module* m = builder.GetInsertBlock()->getParent()->getParent(); + auto create_call = [&](llvm::Intrinsic::ID id) { + llvm::Function* intrinsic = llvm::Intrinsic::getDeclaration(m, id); + return builder.CreateCall(intrinsic, {}); + }; + + // For now, this function only supports NVPTX backend, however it can be easily + // adjusted to generate thread id variable for any other platform. + llvm::Value* block_id = create_call(llvm::Intrinsic::nvvm_read_ptx_sreg_ctaid_x); + llvm::Value* block_dim = create_call(llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x); + llvm::Value* tmp = builder.CreateMul(block_id, block_dim); + + llvm::Value* tid = create_call(llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x); + llvm::Value* id = builder.CreateAdd(tmp, tid); + + value_stack.push_back(id); +} + + +/****************************************************************************************/ +/* LLVM block utilities */ +/****************************************************************************************/ + +llvm::BasicBlock* IRBuilder::create_block_and_set_insertion_point(llvm::Function* function, + llvm::BasicBlock* insert_before, + std::string name) { + llvm::BasicBlock* block = + llvm::BasicBlock::Create(builder.getContext(), name, function, insert_before); + builder.SetInsertPoint(block); + return block; +} + +void IRBuilder::create_br(llvm::BasicBlock* block) { + builder.CreateBr(block); +} + +void IRBuilder::create_br_and_set_insertion_point(llvm::BasicBlock* block) { + builder.CreateBr(block); + builder.SetInsertPoint(block); +} + +llvm::BranchInst* IRBuilder::create_cond_br(llvm::Value* condition, + llvm::BasicBlock* true_block, + llvm::BasicBlock* false_block) { + return builder.CreateCondBr(condition, true_block, false_block); +} + +llvm::BasicBlock* IRBuilder::get_current_block() { + return builder.GetInsertBlock(); +} + +void IRBuilder::set_insertion_point(llvm::BasicBlock* block) { + builder.SetInsertPoint(block); +} + +} // namespace codegen +} // namespace nmodl diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp new file mode 100644 index 0000000000..62cfea5145 --- /dev/null +++ b/src/codegen/llvm/llvm_ir_builder.hpp @@ -0,0 +1,342 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +#include + +#include "codegen/llvm/codegen_llvm_helper_visitor.hpp" +#include "codegen/llvm/target_platform.hpp" +#include "symtab/symbol_table.hpp" + +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" + +namespace nmodl { +namespace codegen { + +/// Floating point bit widths. +static constexpr const unsigned single_precision = 32; +static constexpr const unsigned double_precision = 64; + +/// Some typedefs. +using ConstantVector = std::vector; +using TypeVector = std::vector; +using ValueVector = std::vector; + +/** + * \class IRBuilder + * \brief A helper class to generate LLVM IR for NMODL AST. + */ +class IRBuilder { + private: + /// Underlying LLVM IR builder. + llvm::IRBuilder<> builder; + + /// Stack to hold visited and processed values. + ValueVector value_stack; + + /// Pointer to the current function for which the code is generated. + llvm::Function* current_function; + + /// Symbol table of the NMODL AST. + symtab::SymbolTable* symbol_table; + + /// Insertion point for `alloca` instructions. + llvm::Instruction* alloca_ip; + + /// Flag to indicate that the generated IR should be vectorized. + bool vectorize; + + /// Target platform for which IR is built. + Platform platform; + + /// Masked value used to predicate vector instructions. + llvm::Value* mask; + + /// The name of induction variable used in kernel loops. + std::string kernel_id; + + /// Fast math flags for floating-point IR instructions. + std::vector fast_math_flags; + + public: + IRBuilder(llvm::LLVMContext& context, + Platform& platform, + std::vector fast_math_flags = {}) + : builder(context) + , platform(platform) + , symbol_table(nullptr) + , current_function(nullptr) + , vectorize(false) + , alloca_ip(nullptr) + , mask(nullptr) + , kernel_id("") + , fast_math_flags(fast_math_flags) {} + + /// Transforms the fast math flags provided to the builder into LLVM's representation. + llvm::FastMathFlags transform_to_fmf(std::vector& flags) { + static const std::map set_flag = { + {"nnan", &llvm::FastMathFlags::setNoNaNs}, + {"ninf", &llvm::FastMathFlags::setNoInfs}, + {"nsz", &llvm::FastMathFlags::setNoSignedZeros}, + {"contract", &llvm::FastMathFlags::setAllowContract}, + {"afn", &llvm::FastMathFlags::setApproxFunc}, + {"reassoc", &llvm::FastMathFlags::setAllowReassoc}, + {"fast", &llvm::FastMathFlags::setFast}}; + llvm::FastMathFlags fmf; + for (const auto& flag: flags) { + (fmf.*(set_flag.at(flag)))(true); + } + return fmf; + } + + /// Initializes the builder with the symbol table and the kernel induction variable id. + void initialize(symtab::SymbolTable& symbol_table, std::string& kernel_id) { + if (!fast_math_flags.empty()) + builder.setFastMathFlags(transform_to_fmf(fast_math_flags)); + this->symbol_table = &symbol_table; + this->kernel_id = kernel_id; + } + + /// Explicitly sets the builder to produce scalar IR. + void generate_scalar_ir() { + vectorize = false; + } + + /// Indicates whether the builder generates vectorized IR. + bool vectorizing() { + return vectorize; + } + + /// Explicitly sets the builder to produce vectorized IR. + void generate_vector_ir() { + vectorize = true; + } + + /// Sets the current function for which LLVM IR is generated. + void set_function(llvm::Function* function) { + current_function = function; + } + + /// Clears the stack of the values and unsets the current function. + void clear_function() { + value_stack.clear(); + current_function = nullptr; + alloca_ip = nullptr; + } + + /// Sets the value to be the mask for vector code generation. + void set_mask(llvm::Value* value) { + mask = value; + } + + /// Clears the mask for vector code generation. + void clear_mask() { + mask = nullptr; + } + + /// Indicates whether the vectorized IR is predicated. + bool generates_predicated_ir() { + return vectorize && mask; + } + + /// Extracts binary operator (+ or -) from atomic update (+= or =-). + ast::BinaryOp extract_atomic_op(ast::BinaryOp op); + + /// Generates LLVM IR to allocate the arguments of the function on the stack. + void allocate_function_arguments(llvm::Function* function, + const ast::CodegenVarWithTypeVector& nmodl_arguments); + + /// Generates LLVM IR to allocate the arguments of the NMODL compute kernel + /// on the stack, bitcasting void* pointer to mechanism struct pointers. + void allocate_and_wrap_kernel_arguments(llvm::Function* function, + const ast::CodegenVarWithTypeVector& nmodl_arguments, + llvm::Type* struct_type); + + llvm::Value* create_alloca(const std::string& name, llvm::Type* type); + + /// Generates IR for allocating an array. + void create_array_alloca(const std::string& name, llvm::Type* element_type, int num_elements); + + /// Generates LLVM IR for the given binary operator. + void create_binary_op(llvm::Value* lhs, llvm::Value* rhs, ast::BinaryOp op); + + /// Generates LLVM IR for the given atomic operator. + void create_atomic_op(llvm::Value* ptr, llvm::Value* update, ast::BinaryOp op); + + /// Generates LLVM IR for the bitcast instruction. + llvm::Value* create_bitcast(llvm::Value* value, llvm::Type* dst_type); + + /// Create a basic block and set the builder's insertion point to it. + llvm::BasicBlock* create_block_and_set_insertion_point( + llvm::Function* function, + llvm::BasicBlock* insert_before = nullptr, + std::string name = ""); + + /// Generates LLVM IR for unconditional branch. + void create_br(llvm::BasicBlock* block); + + /// Generates LLVM IR for unconditional branch and sets the insertion point to this block. + void create_br_and_set_insertion_point(llvm::BasicBlock* block); + + /// Generates LLVM IR for conditional branch. + llvm::BranchInst* create_cond_br(llvm::Value* condition, + llvm::BasicBlock* true_block, + llvm::BasicBlock* false_block); + + /// Generates LLVM IR for the boolean constant. + void create_boolean_constant(int value); + + /// Generates LLVM IR for the floating-point constant. + void create_fp_constant(const std::string& value); + + /// Generates LLVM IR for a call to the function. + void create_function_call(llvm::Function* callee, + ValueVector& arguments, + bool use_result = true); + + /// Generates LLVM IR for the string value. + llvm::Value* create_global_string(const ast::String& node); + + /// Generates LLVM IR to transform the value into an index by possibly sign-extending it. + llvm::Value* create_index(llvm::Value* value); + + /// Generates an intrinsic that corresponds to the given name. + void create_intrinsic(const std::string& name, + ValueVector& argument_values, + TypeVector& argument_types); + + /// Generates LLVM IR for the integer constant. + void create_i32_constant(int value); + + /// Generates LLVM IR to load the value specified by its name and returns it. + llvm::Value* create_load(const std::string& name, bool masked = false); + + /// Generates LLVM IR to load the value from the pointer and returns it. + llvm::Value* create_load(llvm::Value* ptr, bool masked = false); + + /// Generates LLVM IR to load the element at the specified index from the given array name and + /// returns it. + llvm::Value* create_load_from_array(const std::string& name, llvm::Value* index); + + /// Generates LLVM IR to store the value to the location specified by the name. + void create_store(const std::string& name, llvm::Value* value, bool masked = false); + + /// Generates LLVM IR to store the value to the location specified by the pointer. + void create_store(llvm::Value* ptr, llvm::Value* value, bool masked = false); + + /// Generates LLVM IR to store the value to the array element, where array is specified by the + /// name. + void create_store_to_array(const std::string& name, llvm::Value* index, llvm::Value* value); + + /// Generates LLVM IR return instructions. + void create_return(llvm::Value* return_value = nullptr); + + /// Generates IR for allocating a scalar or vector variable. + void create_scalar_or_vector_alloca(const std::string& name, + llvm::Type* element_or_scalar_type); + + /// Creates an expression of the form: blockDim.x * gridDim.x + void create_grid_stride(); + + /// Creates an expression of the form: blockIdx.x * blockDim.x + threadIdx.x + void create_thread_id(); + + /// Generates LLVM IR for the given unary operator. + void create_unary_op(llvm::Value* value, ast::UnaryOp op); + + /// Creates a boolean (1-bit integer) type. + llvm::Type* get_boolean_type(); + + /// Returns current basic block. + llvm::BasicBlock* get_current_block(); + + /// Returns the name of the function for which LLVM IR is generated. + std::string get_current_function_name(); + + /// Creates a pointer to 8-bit integer type. + llvm::Type* get_i8_ptr_type(); + + /// Creates a 32-bit integer type. + llvm::Type* get_i32_type(); + + /// Creates a pointer to 32-bit integer type. + llvm::Type* get_i32_ptr_type(); + + /// Creates a 64-bit integer type. + llvm::Type* get_i64_type(); + + /// Creates a floating-point type. + llvm::Type* get_fp_type(); + + /// Creates a pointer to floating-point type. + llvm::Type* get_fp_ptr_type(); + + /// Creates a void type. + llvm::Type* get_void_type(); + + /// Generates LLVM IR to get the address of the struct's member at given index. Returns the + /// calculated value. + llvm::Value* get_struct_member_ptr(llvm::Value* struct_variable, int member_index); + + /// Creates a pointer to struct type with the given name and given members. + llvm::Type* get_struct_ptr_type(const std::string& struct_type_name, TypeVector& member_types); + + /// Inverts the mask for vector code generation by xoring it. + void invert_mask(); + + /// Generates IR that loads the elements of the array even during vectorization. If the value is + /// specified, then it is stored to the array at the given index. + llvm::Value* load_to_or_store_from_array(const std::string& id_name, + llvm::Value* id_value, + llvm::Value* array, + llvm::Value* maybe_value_to_store = nullptr); + + /// Lookups the value by its name in the current function's symbol table. + llvm::Value* lookup_value(const std::string& value_name); + + /// Generates IR to replicate the value if vectorizing the code. + void maybe_replicate_value(llvm::Value* value); + + /// Sets builder's insertion point to the given block. + void set_insertion_point(llvm::BasicBlock* block); + + /// Pops the last visited value from the value stack. + llvm::Value* pop_last_value(); + + /// Generates an inbounds GEP instruction for the given value and returns calculated address. + llvm::Value* create_inbounds_gep(llvm::Value* variable, llvm::Value* index); + + /// Creates a vector splat of starting addresses of the given member. + llvm::Value* create_member_addresses(llvm::Value* member_ptr); + + /// Creates IR for calculating offest to member values. For more context, see + /// `visit_codegen_atomic_statement` in LLVM visitor. + llvm::Value* create_member_offsets(llvm::Value* start, llvm::Value* indices); + + /// Creates IR to perform scalar updates to instance member based on `ptrs_arr` for every + /// element in a vector by + /// member[*ptrs_arr[i]] = member[*ptrs_arr[i]] op rhs. + /// Returns condition (i1 value) to break out of atomic update loop. + llvm::Value* create_atomic_loop(llvm::Value* ptrs_arr, llvm::Value* rhs, ast::BinaryOp op); + + private: + /// Generates an inbounds GEP instruction for the given name and returns calculated address. + llvm::Value* create_inbounds_gep(const std::string& variable_name, llvm::Value* index); + + /// Returns a scalar constant of the provided type. + template + llvm::Value* get_scalar_constant(llvm::Type* type, V value); + + /// Returns a vector constant of the provided type. + template + llvm::Value* get_vector_constant(llvm::Type* type, V value); +}; + +} // namespace codegen +} // namespace nmodl diff --git a/src/codegen/llvm/llvm_utils.cpp b/src/codegen/llvm/llvm_utils.cpp new file mode 100644 index 0000000000..075af794b0 --- /dev/null +++ b/src/codegen/llvm/llvm_utils.cpp @@ -0,0 +1,226 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include "codegen/llvm/llvm_utils.hpp" +#include "codegen/llvm/annotation.hpp" +#include "codegen/llvm/replace_with_lib_functions.hpp" + +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" + +namespace nmodl { +namespace utils { + +/// Populates pass managers with passes for the given optimisation levels. +static void populate_pms(llvm::legacy::FunctionPassManager& func_pm, + llvm::legacy::PassManager& module_pm, + int opt_level, + int size_level, + llvm::TargetMachine* tm) { + // First, set the pass manager builder with some basic optimisation information. + llvm::PassManagerBuilder pm_builder; + pm_builder.OptLevel = opt_level; + pm_builder.SizeLevel = size_level; + pm_builder.DisableUnrollLoops = opt_level == 0; + + // If target machine is defined, then initialise the TargetTransformInfo for the target. + if (tm) { + module_pm.add(createTargetTransformInfoWrapperPass(tm->getTargetIRAnalysis())); + func_pm.add(createTargetTransformInfoWrapperPass(tm->getTargetIRAnalysis())); + } + + // Populate pass managers. + pm_builder.populateModulePassManager(module_pm); + pm_builder.populateFunctionPassManager(func_pm); +} + +/// Runs the function and module passes on the provided module. +static void run_optimisation_passes(llvm::Module& module, + llvm::legacy::FunctionPassManager& func_pm, + llvm::legacy::PassManager& module_pm) { + func_pm.doInitialization(); + auto& functions = module.getFunctionList(); + for (auto& function: functions) { + llvm::verifyFunction(function); + func_pm.run(function); + } + func_pm.doFinalization(); + module_pm.run(module); +} + +/****************************************************************************************/ +/* Optimisation utils */ +/****************************************************************************************/ + +void initialise_nvptx_passes() { + // Register targets. + LLVMInitializeNVPTXTarget(); + LLVMInitializeNVPTXTargetMC(); + LLVMInitializeNVPTXTargetInfo(); + LLVMInitializeNVPTXAsmPrinter(); + + // Initialize passes. + initialise_optimisation_passes(); +} + +std::unique_ptr create_CUDA_target_machine(const codegen::Platform& platform, + llvm::Module& module) { + // CUDA target machine we generating code for. + std::string platform_name = platform.get_name(); + + // Target and layout information. + static const std::map triple_str = {{"nvptx", "nvptx-nvidia-cuda"}, + {"nvptx64", + "nvptx64-nvidia-cuda"}}; + static const std::map data_layout_str = { + {"nvptx", + "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32" + "-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32" + "-v64:64:64-v128:128:128-n16:32:64"}, + {"nvptx64", + "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32" + "-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32" + "-v64:64:64-v128:128:128-n16:32:64"}}; + + // Set data layout and target triple information for the module. + auto triple = triple_str.at(platform_name); + module.setDataLayout(data_layout_str.at(platform_name)); + module.setTargetTriple(triple); + + std::string subtarget = platform.get_subtarget_name(); + std::string features = "+ptx70"; + + // Find the specified target in registry. + std::string error_msg; + auto* target = llvm::TargetRegistry::lookupTarget(triple, error_msg); + if (!target) + throw std::runtime_error("Error: " + error_msg + "\n"); + + std::unique_ptr tm; + tm.reset(target->createTargetMachine(triple, subtarget, features, {}, {})); + if (!tm) + throw std::runtime_error("Error: creating target machine failed! Aborting."); + return tm; +} + +std::string get_module_ptx(llvm::TargetMachine& tm, llvm::Module& module) { + std::string target_asm; + llvm::raw_string_ostream stream(target_asm); + llvm::buffer_ostream pstream(stream); + llvm::legacy::PassManager codegen_pm; + + tm.addPassesToEmitFile(codegen_pm, pstream, nullptr, llvm::CGFT_AssemblyFile); + codegen_pm.run(module); + return target_asm; +} + +void optimise_module_for_nvptx(const codegen::Platform& platform, + llvm::Module& module, + int opt_level, + std::string& target_asm) { + // Create target machine for CUDA GPU + auto tm = create_CUDA_target_machine(platform, module); + + // Create pass managers. + llvm::legacy::FunctionPassManager func_pm(&module); + llvm::legacy::PassManager module_pm; + llvm::PassManagerBuilder pm_builder; + pm_builder.OptLevel = opt_level; + pm_builder.SizeLevel = 0; + pm_builder.Inliner = llvm::createFunctionInliningPass(); + + // Do not vectorize! + pm_builder.LoopVectorize = false; + + // Adjusting pass manager adds target-specific IR transformations, e.g. + // inferring address spaces. + tm->adjustPassManager(pm_builder); + pm_builder.populateFunctionPassManager(func_pm); + pm_builder.populateModulePassManager(module_pm); + + // This runs target-indepependent optimizations. + run_optimisation_passes(module, func_pm, module_pm); + + // Now, we want to run target-specific (e.g. NVPTX) passes. In LLVM, this + // is done via `addPassesToEmitFile`. + target_asm = get_module_ptx(*tm, module); +} + +void initialise_optimisation_passes() { + auto& registry = *llvm::PassRegistry::getPassRegistry(); + llvm::initializeCore(registry); + llvm::initializeTransformUtils(registry); + llvm::initializeScalarOpts(registry); + llvm::initializeIPO(registry); + llvm::initializeInstCombine(registry); + llvm::initializeAggressiveInstCombine(registry); + llvm::initializeAnalysis(registry); +} + +void optimise_module(llvm::Module& module, int opt_level, llvm::TargetMachine* tm) { + llvm::legacy::FunctionPassManager func_pm(&module); + llvm::legacy::PassManager module_pm; + populate_pms(func_pm, module_pm, opt_level, /*size_level=*/0, tm); + run_optimisation_passes(module, func_pm, module_pm); +} + +void replace_with_lib_functions(codegen::Platform& platform, llvm::Module& module) { + llvm::legacy::PassManager pm; + + Replacer* replacer = nullptr; + if (platform.is_CUDA_gpu()) { + replacer = new custom::CUDAReplacer(); + } else { + replacer = new custom::DefaultCPUReplacer(platform.get_math_library()); + } + pm.add(new llvm::ReplacePass(replacer)); + pm.run(module); + + delete replacer; +} + +void annotate(codegen::Platform& platform, llvm::Module& module) { + llvm::legacy::PassManager pm; + + Annotator* annotator = nullptr; + if (platform.is_CUDA_gpu()) { + annotator = new custom::CUDAAnnotator(); + } else { + annotator = new custom::DefaultCPUAnnotator(); + } + pm.add(new llvm::AnnotationPass(annotator)); + pm.run(module); + + delete annotator; +} + +/****************************************************************************************/ +/* File utils */ +/****************************************************************************************/ + +void save_ir_to_ll_file(llvm::Module& module, const std::string& filename) { + std::error_code error_code; + std::unique_ptr out = std::make_unique( + filename + ".ll", error_code, llvm::sys::fs::OF_Text); + if (error_code) + throw std::runtime_error("Error: " + error_code.message()); + + std::unique_ptr annotator; + module.print(out->os(), annotator.get()); + out->keep(); +} +} // namespace utils +} // namespace nmodl diff --git a/src/codegen/llvm/llvm_utils.hpp b/src/codegen/llvm/llvm_utils.hpp new file mode 100644 index 0000000000..f4bce67fb4 --- /dev/null +++ b/src/codegen/llvm/llvm_utils.hpp @@ -0,0 +1,53 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +#include "codegen/llvm/target_platform.hpp" + +#include "llvm/IR/Module.h" +#include "llvm/Support/TargetRegistry.h" + +namespace nmodl { +namespace utils { + +/// Initialises some LLVM optimisation passes. +void initialise_optimisation_passes(); + +/// Initialises NVPTX-specific optimisation passes. +void initialise_nvptx_passes(); + +//// Initializes a CUDA target machine +std::unique_ptr create_CUDA_target_machine(const codegen::Platform& platform, + llvm::Module& module); + +/// Generate PTX code given a CUDA target machine and the module +std::string get_module_ptx(llvm::TargetMachine& tm, llvm::Module& module); + +/// Replaces calls to LLVM intrinsics with appropriate library calls. +void replace_with_lib_functions(codegen::Platform& platform, llvm::Module& module); + +/// Annotates LLVM module with appropriate metadata. +/// TODO: this function and replace_with_lib_functions will be chnaged +/// oncePlatform evolves into PlatformConfig which would be responsible +/// for platform-dependent pass initialisation. +void annotate(codegen::Platform& platform, llvm::Module& module); + +/// Optimises the given LLVM IR module for NVPTX targets. +void optimise_module_for_nvptx(const codegen::Platform& platform, + llvm::Module& module, + int opt_level, + std::string& target_asm); + +/// Optimises the given LLVM IR module. +void optimise_module(llvm::Module& module, int opt_level, llvm::TargetMachine* tm = nullptr); + +/// Saves generated IR module to .ll file. +void save_ir_to_ll_file(llvm::Module& module, const std::string& filename); + +} // namespace utils +} // namespace nmodl diff --git a/src/codegen/llvm/main.cpp b/src/codegen/llvm/main.cpp new file mode 100644 index 0000000000..cd2ec2cb12 --- /dev/null +++ b/src/codegen/llvm/main.cpp @@ -0,0 +1,81 @@ +/************************************************************************* + * Copyright (C) 2018-2021 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include + +#include "ast/program.hpp" +#include "codegen/llvm/codegen_llvm_visitor.hpp" +#include "parser/nmodl_driver.hpp" +#include "test/benchmark/jit_driver.hpp" +#include "utils/logger.hpp" +#include "visitors/symtab_visitor.hpp" + +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" + +using namespace nmodl; +using namespace runner; + +int main(int argc, const char* argv[]) { + CLI::App app{ + "NMODL LLVM Runner : Executes functions from a MOD file via LLVM IR code generation"}; + + // Currently, only a single MOD file is supported, as well as an entry point with a double + // return type. While returning a double value is a general case in NMODL, it will be nice to + // have a more generic functionality. \todo: Add support for different return types (int, void). + + std::string filename; + std::string entry_point_name = "main"; + + app.add_option("-f,--file,file", filename, "A single MOD file source") + ->required() + ->check(CLI::ExistingFile); + app.add_option("-e,--entry-point,entry-point", + entry_point_name, + "An entry point function from the MOD file"); + + CLI11_PARSE(app, argc, argv); + + logger->info("Parsing MOD file to AST"); + parser::NmodlDriver driver; + const auto& ast = driver.parse_file(filename); + + logger->info("Running Symtab Visitor"); + visitor::SymtabVisitor().visit_program(*ast); + + // Use default platform for this toy example. + codegen::Platform platform; + + logger->info("Running LLVM Visitor"); + codegen::CodegenLLVMVisitor llvm_visitor(filename, + /*output_dir=*/".", + platform, + /*opt_level_ir=*/0); + llvm_visitor.visit_program(*ast); + std::unique_ptr module = llvm_visitor.get_module(); + + // Check if the entry-point is valid for JIT driver to execute. + auto func = module->getFunction(entry_point_name); + if (!func) + throw std::runtime_error("Error: entry-point is not found\n"); + + if (func->getNumOperands() != 0) + throw std::runtime_error("Error: entry-point functions with arguments are not supported\n"); + + if (!func->getReturnType()->isDoubleTy()) + throw std::runtime_error( + "Error: entry-point functions with non-double return type are not supported\n"); + + TestRunner runner(std::move(module)); + runner.initialize_driver(); + + // Since only double type is supported, provide explicit double type to the running function. + auto r = runner.run_without_arguments(entry_point_name); + fprintf(stderr, "Result: %f\n", r); + + return 0; +} diff --git a/src/codegen/llvm/replace_with_lib_functions.cpp b/src/codegen/llvm/replace_with_lib_functions.cpp new file mode 100644 index 0000000000..984e83d912 --- /dev/null +++ b/src/codegen/llvm/replace_with_lib_functions.cpp @@ -0,0 +1,219 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include "codegen/llvm/replace_with_lib_functions.hpp" + +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/CodeGen/ReplaceWithVeclib.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/IR/LegacyPassManager.h" + +namespace nmodl { +namespace custom { + +Patterns DefaultCPUReplacer::patterns() const { + throw std::runtime_error( + "Error: DefaultCPUReplacer has no patterns and uses built-in LLVM passes instead.\n"); +} + +std::string DefaultCPUReplacer::get_library_name() { + return this->library_name; +} + +Patterns CUDAReplacer::patterns() const { + return {{"llvm.exp.f32", "__nv_expf"}, + {"llvm.exp.f64", "__nv_exp"}, + {"llvm.pow.f32", "__nv_powf"}, + {"llvm.pow.f64", "__nv_pow"}, + {"llvm.log.f32", "__nv_logf"}, + {"llvm.log.f64", "__nv_log"}, + {"llvm.fabs.f32", "__nv_fabsf"}, + {"llvm.fabs.f64", "__nv_fabs"}}; +} +} // namespace custom +} // namespace nmodl + +using nmodl::custom::DefaultCPUReplacer; +namespace llvm { + +char ReplacePass::ID = 0; + +bool ReplacePass::runOnModule(Module& module) { + bool modified = false; + + // If the platform supports SIMD, replace math intrinsics with library + // functions. + if (dynamic_cast(replacer)) { + legacy::FunctionPassManager fpm(&module); + + // First, get the target library information and add vectorizable functions for the + // specified vector library. + Triple triple(sys::getDefaultTargetTriple()); + TargetLibraryInfoImpl tli = TargetLibraryInfoImpl(triple); + add_vectorizable_functions_from_vec_lib(tli, triple); + + // Add passes that replace math intrinsics with calls. + fpm.add(new TargetLibraryInfoWrapperPass(tli)); + fpm.add(new ReplaceWithVeclibLegacy); + + // Run passes. + fpm.doInitialization(); + for (auto& function: module.getFunctionList()) { + if (!function.isDeclaration()) + modified |= fpm.run(function); + } + fpm.doFinalization(); + } else { + // Otherwise, the replacer is not default and we need to apply patterns + // from it to each function! + for (auto& function: module.getFunctionList()) { + if (!function.isDeclaration()) { + // Try to replace a call instruction. + std::vector replaced_calls; + for (auto& instruction: instructions(function)) { + if (auto* call_inst = dyn_cast(&instruction)) { + if (replace_call(*call_inst)) { + replaced_calls.push_back(call_inst); + modified = true; + } + } + } + + // Remove calls to replaced functions. + for (auto* call_inst: replaced_calls) { + call_inst->eraseFromParent(); + } + } + } + } + + return modified; +} + +void ReplacePass::getAnalysisUsage(AnalysisUsage& au) const { + au.setPreservesCFG(); + au.addPreserved(); + au.addPreserved(); + au.addPreserved(); + au.addPreserved(); + au.addPreserved(); + au.addPreserved(); +} + +void ReplacePass::add_vectorizable_functions_from_vec_lib(TargetLibraryInfoImpl& tli, + Triple& triple) { + // Since LLVM does not support SLEEF as a vector library yet, process it separately. + if (((DefaultCPUReplacer*) replacer)->get_library_name() == "SLEEF") { +// clang-format off +#define FIXED(w) ElementCount::getFixed(w) +// clang-format on +#define DISPATCH(func, vec_func, width) {func, vec_func, width}, + + // Populate function definitions of only exp and pow (for now). + const VecDesc aarch64_functions[] = { + // clang-format off + DISPATCH("llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4)) + DISPATCH("llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2)) + DISPATCH("llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4)) + DISPATCH("llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2)) + DISPATCH("llvm.log.f32", "_ZGVnN4v_logf", FIXED(4)) + DISPATCH("llvm.log.f64", "_ZGVnN2v_log", FIXED(2)) + // clang-format on + }; + const VecDesc x86_functions[] = { + // clang-format off + DISPATCH("llvm.exp.f64", "_ZGVbN2v_exp", FIXED(2)) + DISPATCH("llvm.exp.f64", "_ZGVdN4v_exp", FIXED(4)) + DISPATCH("llvm.exp.f64", "_ZGVeN8v_exp", FIXED(8)) + DISPATCH("llvm.pow.f64", "_ZGVbN2vv_pow", FIXED(2)) + DISPATCH("llvm.pow.f64", "_ZGVdN4vv_pow", FIXED(4)) + DISPATCH("llvm.pow.f64", "_ZGVeN8vv_pow", FIXED(8)) + DISPATCH("llvm.log.f64", "_ZGVbN2v_log", FIXED(2)) + DISPATCH("llvm.log.f64", "_ZGVdN4v_log", FIXED(4)) + DISPATCH("llvm.log.f64", "_ZGVeN8v_log", FIXED(8)) + // clang-format on + }; +#undef DISPATCH +#undef FIXED + + if (triple.isAArch64()) { + tli.addVectorizableFunctions(aarch64_functions); + } + if (triple.isX86() && triple.isArch64Bit()) { + tli.addVectorizableFunctions(x86_functions); + } + + } else { + // A map to query vector library by its string value. + using VecLib = TargetLibraryInfoImpl::VectorLibrary; + static const std::map llvm_supported_vector_libraries = { + {"Accelerate", VecLib::Accelerate}, + {"libmvec", VecLib::LIBMVEC_X86}, + {"libsystem_m", VecLib ::DarwinLibSystemM}, + {"MASSV", VecLib::MASSV}, + {"none", VecLib::NoLibrary}, + {"SVML", VecLib::SVML}}; + + const auto& library = llvm_supported_vector_libraries.find( + ((DefaultCPUReplacer*) replacer)->get_library_name()); + if (library == llvm_supported_vector_libraries.end()) + throw std::runtime_error("Error: unknown vector library - " + + ((DefaultCPUReplacer*) replacer)->get_library_name() + "\n"); + + // Add vectorizable functions to the target library info. + if (library->second != VecLib::LIBMVEC_X86 || (triple.isX86() && triple.isArch64Bit())) { + tli.addVectorizableFunctionsFromVecLib(library->second); + } + } +} + +bool ReplacePass::replace_call(CallInst& call_inst) { + Module* m = call_inst.getModule(); + Function* function = call_inst.getCalledFunction(); + + // Get supported replacement patterns. + Patterns patterns = replacer->patterns(); + + // Check if replacement is not supported. + std::string old_name = function->getName().str(); + auto it = patterns.find(old_name); + if (it == patterns.end()) + return false; + + // Get (or create) new function. + Function* new_func = m->getFunction(it->second); + if (!new_func) { + new_func = Function::Create(function->getFunctionType(), + Function::ExternalLinkage, + it->second, + *m); + new_func->copyAttributesFrom(function); + } + + // Create a call to libdevice function with the same operands. + IRBuilder<> builder(&call_inst); + std::vector args(call_inst.arg_operands().begin(), call_inst.arg_operands().end()); + SmallVector op_bundles; + call_inst.getOperandBundlesAsDefs(op_bundles); + CallInst* new_call = builder.CreateCall(new_func, args, op_bundles); + + // Replace all uses of old instruction with the new one. Also, copy + // fast math flags if necessary. + call_inst.replaceAllUsesWith(new_call); + if (isa(new_call)) { + new_call->copyFastMathFlags(&call_inst); + } + + return true; +} +} // namespace llvm diff --git a/src/codegen/llvm/replace_with_lib_functions.hpp b/src/codegen/llvm/replace_with_lib_functions.hpp new file mode 100644 index 0000000000..5385505686 --- /dev/null +++ b/src/codegen/llvm/replace_with_lib_functions.hpp @@ -0,0 +1,97 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +#include "codegen/llvm/target_platform.hpp" + +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Host.h" + +using Patterns = std::map; + +namespace nmodl { +namespace custom { + +/** + * \class Replacer + * \brief Base class that can be overriden to specify how LLVM math intrinsics + * are replaced. + */ +class Replacer { + public: + virtual Patterns patterns() const = 0; + virtual ~Replacer() = default; +}; + +/** + * \class DefaultCPUReplacer + * \brief Specifies how LLVM IR math functions are replaced on CPUs by default. + * Here we reuse LLVM's API so patterns() has no meaning and throws an error + * instead! `DefaultCPUReplacer` threfore cannot be overriden. + */ +class DefaultCPUReplacer: public Replacer { + private: + std::string library_name; + + public: + DefaultCPUReplacer(std::string library_name) + : Replacer() + , library_name(library_name) {} + + Patterns patterns() const final override; + + /// Returns the name of underlying library for which this default + /// replacer is used. + std::string get_library_name(); +}; + +/** + * \class CUDAReplacer + * \brief Specifies replacement patterns for CUDA platforms. + */ +class CUDAReplacer: public Replacer { + public: + Patterns patterns() const override; +}; +} // namespace custom +} // namespace nmodl + +using nmodl::custom::Replacer; +namespace llvm { + +/** + * \class ReplacePass + * \brief A module LLVM pass that replaces math intrinsics with + * library calls. + */ +class ReplacePass: public ModulePass { + private: + // Underlying replacer that provides replacement patterns. + const Replacer* replacer; + + public: + static char ID; + + ReplacePass(Replacer* replacer) + : ModulePass(ID) + , replacer(replacer) {} + + bool runOnModule(Module& module) override; + + void getAnalysisUsage(AnalysisUsage& au) const override; + + private: + /// Populates `tli` with vectorizable function definitions (hook for default replacements). + void add_vectorizable_functions_from_vec_lib(TargetLibraryInfoImpl& tli, Triple& triple); + + /// Replaces call instruction with a new call from Replacer's patterns. + bool replace_call(CallInst& call_inst); +}; +} // namespace llvm diff --git a/src/codegen/llvm/target_platform.cpp b/src/codegen/llvm/target_platform.cpp new file mode 100644 index 0000000000..bcab739fb3 --- /dev/null +++ b/src/codegen/llvm/target_platform.cpp @@ -0,0 +1,66 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include "codegen/llvm/target_platform.hpp" + +#include + +namespace nmodl { +namespace codegen { + +const std::string Platform::DEFAULT_PLATFORM_NAME = "default"; +const std::string Platform::DEFAULT_MATH_LIBRARY = "none"; + +bool Platform::is_default_platform() const { + // Default platform is a CPU. + return platform_id == PlatformID::CPU && name == Platform::DEFAULT_PLATFORM_NAME; +} + +bool Platform::is_cpu() const { + return platform_id == PlatformID::CPU; +} + +bool Platform::is_cpu_with_simd() const { + return platform_id == PlatformID::CPU && instruction_width > 1; +} + +bool Platform::is_gpu() const { + return platform_id == PlatformID::GPU; +} + +bool Platform::is_CUDA_gpu() const { + return platform_id == PlatformID::GPU && (name == "nvptx" || name == "nvptx64"); +} + +bool Platform::is_single_precision() { + return use_single_precision; +} + +std::string Platform::get_name() const { + return name; +} + +std::string Platform::get_subtarget_name() const { + if (platform_id != PlatformID::GPU) + throw std::runtime_error("Error: platform must be a GPU to query the subtarget!\n"); + return subtarget_name; +} + +std::string Platform::get_math_library() const { + return math_library; +} + +int Platform::get_instruction_width() const { + return instruction_width; +} + +int Platform::get_precision() const { + return use_single_precision ? 32 : 64; +} + +} // namespace codegen +} // namespace nmodl diff --git a/src/codegen/llvm/target_platform.hpp b/src/codegen/llvm/target_platform.hpp new file mode 100644 index 0000000000..d15f220d1c --- /dev/null +++ b/src/codegen/llvm/target_platform.hpp @@ -0,0 +1,112 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +#include + +namespace nmodl { +namespace codegen { + +enum PlatformID { CPU, GPU }; + +/** + * \class Platform + * \brief A class that represents the target platform. It is needed to + * reduce the amount of code passed to LLVM visitor and its helpers. + */ +class Platform { + public: + /// Default name of the target and math library. + static const std::string DEFAULT_PLATFORM_NAME; + static const std::string DEFAULT_MATH_LIBRARY; + + private: + /// Name of the platform. + std::string name = Platform::DEFAULT_PLATFORM_NAME; + + /// Target chip for GPUs. + /// TODO: this should only be available to GPUs! If we refactor target + /// classes so that GPUPlatform <: Platform, it will be nicer! + std::string subtarget_name = "sm_70"; + + /// Target-specific id to compare platforms easily. + PlatformID platform_id; + + /// User-provided width that is used to construct LLVM instructions + /// and types. + int instruction_width = 1; + + /// Use single-precision floating-point types. + bool use_single_precision = false; + + /// A name of user-provided math library. + std::string math_library = Platform::DEFAULT_MATH_LIBRARY; + + public: + Platform(PlatformID platform_id, + const std::string& name, + const std::string& subtarget_name, + std::string& math_library, + bool use_single_precision = false, + int instruction_width = 1) + : platform_id(platform_id) + , name(name) + , subtarget_name(subtarget_name) + , math_library(math_library) + , use_single_precision(use_single_precision) + , instruction_width(instruction_width) {} + + Platform(PlatformID platform_id, + const std::string& name, + std::string& math_library, + bool use_single_precision = false, + int instruction_width = 1) + : platform_id(platform_id) + , name(name) + , math_library(math_library) + , use_single_precision(use_single_precision) + , instruction_width(instruction_width) {} + + Platform(bool use_single_precision, int instruction_width) + : platform_id(PlatformID::CPU) + , use_single_precision(use_single_precision) + , instruction_width(instruction_width) {} + + Platform() + : platform_id(PlatformID::CPU) {} + + /// Checks if this platform is a default platform. + bool is_default_platform() const; + + /// Checks if this platform is a CPU. + bool is_cpu() const; + + /// Checks if this platform is a CPU with SIMD support. + bool is_cpu_with_simd() const; + + /// Checks if this platform is a GPU. + bool is_gpu() const; + + /// Checks if this platform is CUDA platform. + bool is_CUDA_gpu() const; + + bool is_single_precision(); + + std::string get_name() const; + + std::string get_subtarget_name() const; + + std::string get_math_library() const; + + int get_instruction_width() const; + + int get_precision() const; +}; + +} // namespace codegen +} // namespace nmodl diff --git a/src/language/code_generator.cmake b/src/language/code_generator.cmake index 36d10117e4..c1aaf733e6 100644 --- a/src/language/code_generator.cmake +++ b/src/language/code_generator.cmake @@ -65,6 +65,18 @@ set(AST_GENERATED_SOURCES ${PROJECT_BINARY_DIR}/src/ast/block_comment.hpp ${PROJECT_BINARY_DIR}/src/ast/boolean.hpp ${PROJECT_BINARY_DIR}/src/ast/breakpoint_block.hpp + ${PROJECT_BINARY_DIR}/src/ast/codegen_atomic_statement.hpp + ${PROJECT_BINARY_DIR}/src/ast/codegen_for_statement.hpp + ${PROJECT_BINARY_DIR}/src/ast/codegen_function.hpp + ${PROJECT_BINARY_DIR}/src/ast/codegen_grid_stride.hpp + ${PROJECT_BINARY_DIR}/src/ast/codegen_instance_var.hpp + ${PROJECT_BINARY_DIR}/src/ast/codegen_return_statement.hpp + ${PROJECT_BINARY_DIR}/src/ast/codegen_struct.hpp + ${PROJECT_BINARY_DIR}/src/ast/codegen_thread_id.hpp + ${PROJECT_BINARY_DIR}/src/ast/codegen_var.hpp + ${PROJECT_BINARY_DIR}/src/ast/codegen_var_list_statement.hpp + ${PROJECT_BINARY_DIR}/src/ast/codegen_var_type.hpp + ${PROJECT_BINARY_DIR}/src/ast/codegen_var_with_type.hpp ${PROJECT_BINARY_DIR}/src/ast/compartment.hpp ${PROJECT_BINARY_DIR}/src/ast/conductance_hint.hpp ${PROJECT_BINARY_DIR}/src/ast/conserve.hpp @@ -108,6 +120,7 @@ set(AST_GENERATED_SOURCES ${PROJECT_BINARY_DIR}/src/ast/independent_definition.hpp ${PROJECT_BINARY_DIR}/src/ast/indexed_name.hpp ${PROJECT_BINARY_DIR}/src/ast/initial_block.hpp + ${PROJECT_BINARY_DIR}/src/ast/instance_struct.hpp ${PROJECT_BINARY_DIR}/src/ast/integer.hpp ${PROJECT_BINARY_DIR}/src/ast/kinetic_block.hpp ${PROJECT_BINARY_DIR}/src/ast/lag_statement.hpp @@ -184,6 +197,7 @@ set(AST_GENERATED_SOURCES ${PROJECT_BINARY_DIR}/src/ast/valence.hpp ${PROJECT_BINARY_DIR}/src/ast/var_name.hpp ${PROJECT_BINARY_DIR}/src/ast/verbatim.hpp + ${PROJECT_BINARY_DIR}/src/ast/void.hpp ${PROJECT_BINARY_DIR}/src/ast/watch.hpp ${PROJECT_BINARY_DIR}/src/ast/watch_statement.hpp ${PROJECT_BINARY_DIR}/src/ast/while_statement.hpp diff --git a/src/language/codegen.yaml b/src/language/codegen.yaml index 5511d14478..d6f18e96d2 100644 --- a/src/language/codegen.yaml +++ b/src/language/codegen.yaml @@ -28,7 +28,51 @@ - Expression: children: - Number: + - Void: + nmodl: "VOID" + brief: "Represent void type in code generation" - Identifier: + children: + - CodegenVarType: + brief: "Represent type of the variable" + members: + - type: + brief: "Type of the ast node" + type: AstNodeType + - CodegenVar: + brief: "Represent variable used for code generation" + members: + - pointer: + brief: "If variable is pointer type" + type: int + - name: + brief: "Name of the variable" + type: Identifier + node_name: true + - CodegenVarWithType: + brief: "Represent variable used for code generation" + members: + - type: + brief: "Type of the variable" + type: CodegenVarType + suffix: {value: " "} + - is_pointer: + brief: "If variable is pointer type" + type: int + - name: + brief: "Name of the variable" + type: Identifier + node_name: true + - CodegenInstanceVar: + brief: "Represent instance variable" + members: + - instance_var: + brief: "Instance variable" + type: Name + suffix: {value: "->"} + - member_var: + brief: "Member variable within instance" + type: Identifier - Block: children: - NrnStateBlock: @@ -89,7 +133,44 @@ type: StatementBlock - finalize_block: brief: "Statement block to be executed after calling linear solver" - type: StatementBlock + type: StatementBlock + - CodegenFunction: + brief: "Function generated from FUNCTION or PROCEDURE block" + members: + - return_type: + brief: "Return type of the function" + type: CodegenVarType + suffix: {value: " "} + - name: + brief: "Name of the function" + type: Name + node_name: true + - arguments: + brief: "Vector of the parameters to the function" + type: CodegenVarWithType + vector: true + prefix: {value: "(", force: true} + suffix: {value: ")", force: true} + separator: ", " + - statement_block: + brief: "Body of the function" + type: StatementBlock + getter: {override: true} + - is_kernel: + brief: "If function is compute kernel" + type: int + - InstanceStruct: + nmodl: "INSTANCE_STRUCT " + members: + - codegen_vars: + brief: "Vector of CodegenVars" + type: CodegenVarWithType + vector: true + add: true + separator: "\\n " + prefix: {value: "{\\n ", force: true} + suffix: {value: "\\n}", force: true} + brief: "LLVM IR Struct that holds the mechanism instance's variables" - WrappedExpression: brief: "Wrap any other expression type" members: @@ -110,6 +191,38 @@ - node_to_solve: brief: "Block to be solved (callback node or solution node itself)" type: Expression + - CodegenStruct: + brief: "Represent a struct or class for code generation" + members: + - variable_statements: + brief: "member variables of the class/struct" + type: CodegenVarListStatement + vector: true + - functions: + brief: "member functions of the class/struct" + type: CodegenFunction + vector: true + - CodegenThreadId: + brief: "Represents thread id expression for GPU code generation" + description: | + For GPU code generation, we use a special AST node to enocde the initial + thread id calculation. In NMODL, this expression is usually of the form: + \code{.cpp} + tid = blockId.x * blockDim.x + threadId.x + \endcode + To be able to support multiple GPU backends, we choose to have a custom AST + node. Therefore, the code generation for this node is kept very simple, + mapping expression to target-specific GPU inrinsics. + nmodl: "THREAD_ID" + - CodegenGridStride: + brief: "Represents grid stride for GPU code generation" + description: | + For GPU code generation, we use a special AST node to enocde the loop + increment expression. In NMODL, this expression is usually of the form: + \code{.cpp} + for (int i = tid; i < n; i += blockDim.x * gridDim.x) + \endcode + nmodl: "GRID_STRIDE" - Statement: brief: "Statement base class" children: @@ -120,4 +233,80 @@ brief: "Value of new timestep" type: Double prefix: {value: " = "} - brief: "Statement to indicate a change in timestep in a given block" \ No newline at end of file + brief: "Statement to indicate a change in timestep in a given block" + - CodegenForStatement: + brief: "Represent for loop used for code generation" + nmodl: "for(" + members: + - initialization: + brief: "initialization expression for the loop" + type: Expression + optional: true + - condition: + brief: "condition expression for the loop" + type: Expression + optional: true + prefix: {value: "; "} + suffix: {value: "; "} + - increment: + brief: "increment or decrement expression for the loop" + type: Expression + optional: true + suffix: {value: ") "} + - statement_block: + brief: "body of the loop" + type: StatementBlock + getter: {override: true} + - CodegenReturnStatement: + brief: "Represent return statement for code generation" + nmodl: "return " + members: + - statement: + brief: "return statement" + type: Expression + optional: true + - CodegenVarListStatement: + brief: "Represent list of variables used for code generation" + members: + - var_type: + brief: "Type of the variables" + type: CodegenVarType + suffix: {value: " "} + - variables: + brief: "List of the variables to define" + type: CodegenVar + vector: true + separator: ", " + add: true + - CodegenAtomicStatement: + brief: "Represent atomic operation" + description: | + During code generation certain operations like ion updates, vec_rhs or + vec_d updates (for synapse) needs to be atomic operations if executed by + multiple threads. In case of SIMD, there are conflicts for `vec_d` and + `vec_rhs` for synapse types. Here are some statements from C++ backend: + + \code{.cpp} + vec_d[node_id] += g + vec_rhs[node_id] -= rhs + ion_ina[indexes[some_index]] += ina[id] + ion_cai[indexes[some_index]] = cai[id] // cai here is state variable + \endcode + + These operations will be represented by atomic statement node type: + * `vec_d[node_id]` : lhs + * `+=` : atomic_op + * `g` : rhs + + members: + - lhs: + brief: "Variable to be updated atomically" + type: Identifier + - atomic_op: + brief: "Operator" + type: BinaryOperator + prefix: {value: " "} + suffix: {value: " "} + - rhs: + brief: "Expression for atomic operation" + type: Expression diff --git a/src/language/nmodl.yaml b/src/language/nmodl.yaml index 9f94ada917..4999dc87a1 100644 --- a/src/language/nmodl.yaml +++ b/src/language/nmodl.yaml @@ -1368,7 +1368,7 @@ type: Double - Statement: - brief: "TODO" + brief: "Base class to represent a statement in the NMODL" children: - UnitState: brief: "TODO" @@ -1573,11 +1573,28 @@ \sa nmodl::visitor::SympyConductanceVisitor - ExpressionStatement: - brief: "TODO" members: - expression: - brief: "TODO" + brief: "An expression representing a construct in the mod file" type: Expression + brief: "Represent statement encpasulated by underlying expression of ast node typeExpression" + description: | + Certain statements defined in the NMODL are complex than typical "single line" statements. + For example, often SOLVE block is written as: + + SOLVE states METHOD cnexp + + but language allow it to be more complex as: + + SOLVE states METHOD cnexp { + statement_1 + statement_2 + } + + So this type of construct is not really "single line" statement. There are other such cases + where they are categorised as "statement" in the bison specification. Also, there are cases + when a binary expression `a = b` is also a full statement.. + In all such cases we wrap underlying expression as statement using ExpressionStatement node. - ProtectStatement: brief: "TODO" diff --git a/src/language/node_info.py b/src/language/node_info.py index 821d263287..4a8e5fcc53 100644 --- a/src/language/node_info.py +++ b/src/language/node_info.py @@ -28,6 +28,7 @@ "FirstLastType", "BAType", "UnitStateType", + "AstNodeType", } BASE_TYPES = {"std::string" } | INTEGRAL_TYPES @@ -164,6 +165,9 @@ STATEMENT_BLOCK_NODE = "StatementBlock" STRING_NODE = "String" UNIT_BLOCK = "UnitBlock" +AST_NODETYPE_NODE= "AstNodeType" +CODEGEN_VAR_TYPE_NODE = "CodegenVarType" +CODEGEN_VAR_WITH_TYPE_NODE = "CodegenVarWithType" # name of variable in prime node which represent order of derivative ORDER_VAR_NAME = "order" diff --git a/src/language/nodes.py b/src/language/nodes.py index 88ad1bb000..fd8a64f528 100644 --- a/src/language/nodes.py +++ b/src/language/nodes.py @@ -136,6 +136,18 @@ def is_boolean_node(self): def is_name_node(self): return self.class_name == node_info.NAME_NODE + @property + def is_ast_nodetype_node(self): + return self.class_name == node_info.AST_NODETYPE_NODE + + @property + def is_codegen_var_type_node(self): + return self.class_name == node_info.CODEGEN_VAR_TYPE_NODE + + @property + def is_codegen_var_with_type_node(self): + return self.class_name == node_info.CODEGEN_VAR_WITH_TYPE_NODE + @property def is_enum_node(self): data_type = node_info.DATA_TYPES[self.class_name] diff --git a/src/language/templates/ast/ast_decl.hpp b/src/language/templates/ast/ast_decl.hpp index a6bdae69a1..82e43048fc 100644 --- a/src/language/templates/ast/ast_decl.hpp +++ b/src/language/templates/ast/ast_decl.hpp @@ -12,7 +12,9 @@ #pragma once #include +#include #include +#include /// \file /// \brief Auto generated AST node types and aliases declaration @@ -50,6 +52,15 @@ enum class AstNodeType { /** @} */ // end of ast_type +static inline std::string to_string(AstNodeType type) { + {% for node in nodes %} + if(type == AstNodeType::{{ node.class_name|snake_case|upper }}) { + return "{{ node.class_name|snake_case|upper }}"; + } + {% endfor %} + throw std::runtime_error("Unhandled type in to_string(AstNodeType type)!"); +} + /** * @defgroup ast_vec_type AST Vector Type Aliases * @ingroup ast diff --git a/src/language/templates/visitors/json_visitor.cpp b/src/language/templates/visitors/json_visitor.cpp index 11bd9cf556..4633ce338e 100644 --- a/src/language/templates/visitors/json_visitor.cpp +++ b/src/language/templates/visitors/json_visitor.cpp @@ -22,33 +22,40 @@ using namespace ast; {% for node in nodes %} void JSONVisitor::visit_{{ node.class_name|snake_case }}(const {{ node.class_name }}& node) { {% if node.has_children() %} - printer->push_block(node.get_node_type_name()); - if (embed_nmodl) { - printer->add_block_property("nmodl", to_nmodl(node)); - } - node.visit_children(*this); - {% if node.is_data_type_node %} + printer->push_block(node.get_node_type_name()); + if (embed_nmodl) { + printer->add_block_property("nmodl", to_nmodl(node)); + } + node.visit_children(*this); + {% if node.is_data_type_node %} {% if node.is_integer_node %} - if(!node.get_macro()) { - std::stringstream ss; - ss << node.eval(); - printer->add_node(ss.str()); - } + if(!node.get_macro()) { + std::stringstream ss; + ss << node.eval(); + printer->add_node(ss.str()); + } {% else %} - std::stringstream ss; - ss << node.eval(); - printer->add_node(ss.str()); + std::stringstream ss; + ss << node.eval(); + printer->add_node(ss.str()); {% endif %} {% endif %} - printer->pop_block(); + + {% if node.is_codegen_var_type_node %} + printer->add_node(ast::to_string(node.get_type())); + {% endif %} + + printer->pop_block(); + {% if node.is_program_node %} - if (node.get_parent() == nullptr) { - flush(); - } + if (node.get_parent() == nullptr) { + flush(); + } {% endif %} + {% else %} - (void)node; - printer->add_node("{{ node.class_name }}"); + (void)node; + printer->add_node("{{ node.class_name }}"); {% endif %} } diff --git a/src/language/templates/visitors/nmodl_visitor.cpp b/src/language/templates/visitors/nmodl_visitor.cpp index 9c60bf8f87..c4d43ec755 100644 --- a/src/language/templates/visitors/nmodl_visitor.cpp +++ b/src/language/templates/visitors/nmodl_visitor.cpp @@ -115,7 +115,15 @@ void NmodlPrintVisitor::visit_{{ node.class_name|snake_case}}(const {{ node.clas {% endif %} {% for child in node.children %} {% call guard(child.force_prefix, child.force_suffix) -%} - {% if child.is_base_type_node %} + + {% if node.is_codegen_var_with_type_node and child.varname == "is_pointer" %} + if(node.get_{{ child.varname }}()) { + printer->add_element("*"); + } + {% elif child.is_base_type_node %} + {% if child.is_ast_nodetype_node %} + printer->add_element(ast::to_string(node.get_{{child.varname}}())); + {% endif %} {% else %} {% if child.optional or child.is_statement_block_node %} if(node.get_{{ child.varname }}()) { diff --git a/src/main.cpp b/src/main.cpp index 1e2a43871f..80e3fcb5fb 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,44 +10,23 @@ #include -#include "ast/program.hpp" #include "codegen/codegen_acc_visitor.hpp" #include "codegen/codegen_c_visitor.hpp" -#include "codegen/codegen_compatibility_visitor.hpp" #include "codegen/codegen_cuda_visitor.hpp" #include "codegen/codegen_ispc_visitor.hpp" -#include "codegen/codegen_transform_visitor.hpp" +#ifdef NMODL_LLVM_BACKEND +#include "codegen/llvm/codegen_llvm_visitor.hpp" +#include "test/benchmark/llvm_benchmark.hpp" +#endif + +#include "codegen/codegen_driver.hpp" #include "config/config.h" #include "parser/nmodl_driver.hpp" #include "pybind/pyembed.hpp" #include "utils/common_utils.hpp" #include "utils/logger.hpp" -#include "visitors/after_cvode_to_cnexp_visitor.hpp" -#include "visitors/ast_visitor.hpp" -#include "visitors/constant_folder_visitor.hpp" -#include "visitors/global_var_visitor.hpp" -#include "visitors/implicit_argument_visitor.hpp" -#include "visitors/indexedname_visitor.hpp" -#include "visitors/inline_visitor.hpp" -#include "visitors/ispc_rename_visitor.hpp" #include "visitors/json_visitor.hpp" -#include "visitors/kinetic_block_visitor.hpp" -#include "visitors/local_to_assigned_visitor.hpp" -#include "visitors/local_var_rename_visitor.hpp" -#include "visitors/localize_visitor.hpp" -#include "visitors/loop_unroll_visitor.hpp" -#include "visitors/neuron_solve_visitor.hpp" #include "visitors/nmodl_visitor.hpp" -#include "visitors/perf_visitor.hpp" -#include "visitors/semantic_analysis_visitor.hpp" -#include "visitors/solve_block_visitor.hpp" -#include "visitors/steadystate_visitor.hpp" -#include "visitors/sympy_conductance_visitor.hpp" -#include "visitors/sympy_solver_visitor.hpp" -#include "visitors/symtab_visitor.hpp" -#include "visitors/units_visitor.hpp" -#include "visitors/verbatim_var_rename_visitor.hpp" -#include "visitors/verbatim_visitor.hpp" /** * \dir @@ -70,93 +49,30 @@ int main(int argc, const char* argv[]) { /// true if debug logger statements should be shown std::string verbose("info"); - /// true if serial c code to be generated - bool c_backend(true); - - /// true if ispc code to be generated - bool ispc_backend(false); - - /// true if c code with openacc to be generated - bool oacc_backend(false); - - /// true if cuda code to be generated - bool cuda_backend(false); - - /// true if sympy should be used for solving ODEs analytically - bool sympy_analytic(false); - - /// true if Pade approximation to be used - bool sympy_pade(false); - - /// true if CSE (temp variables) to be used - bool sympy_cse(false); - - /// true if conductance keyword can be added to breakpoint - bool sympy_conductance(false); - - /// true if inlining at nmodl level to be done - bool nmodl_inline(false); - - /// true if unroll at nmodl level to be done - bool nmodl_unroll(false); - - /// true if perform constant folding at nmodl level to be done - bool nmodl_const_folding(false); - - /// true if range variables to be converted to local - bool nmodl_localize(false); - - /// true if global variables to be converted to range - bool nmodl_global_to_range(false); - - /// true if top level local variables to be converted to range - bool nmodl_local_to_range(false); - - /// true if localize variables even if verbatim block is used - bool localize_verbatim(false); - - /// true if local variables to be renamed - bool local_rename(true); - - /// true if inline even if verbatim block exist - bool verbatim_inline(false); - - /// true if verbatim blocks - bool verbatim_rename(true); - - /// true if code generation is forced to happen even if there - /// is any incompatibility - bool force_codegen(false); - - /// true if we want to only check compatibility without generating code - bool only_check_compatibility(false); + /// true if symbol table should be printed + bool show_symtab(false); - /// true if ion variable copies should be avoided - bool optimize_ionvar_copies_codegen(false); + /// floating point data type + std::string data_type("double"); - /// directory where code will be generated - std::string output_dir("."); +#ifdef NMODL_LLVM_BACKEND + /// run llvm benchmark + bool llvm_benchmark(false); - /// directory where intermediate file will be generated - std::string scratch_dir("tmp"); + /// the size of the instance struct for the benchmark + int instance_size = 10000; - /// directory where units lib file is located - std::string units_dir(NrnUnitsLib::get_path()); + /// the number of repeated experiments for the benchmarking + int num_experiments = 100; - /// true if ast should be converted to json - bool json_ast(false); + /// X dimension of grid in blocks for GPU execution + int llvm_cuda_grid_dim_x = 1; - /// true if ast should be converted to nmodl - bool nmodl_ast(false); + /// X dimension of block in threads for GPU execution + int llvm_cuda_block_dim_x = 1; +#endif - /// true if performance stats should be converted to json - bool json_perfstat(false); - - /// true if symbol table should be printed - bool show_symtab(false); - - /// floating point data type - std::string data_type("double"); + CodeGenConfig cfg; // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers,readability-magic-numbers) app.get_formatter()->column_width(40); @@ -172,121 +88,199 @@ int main(int argc, const char* argv[]) { ->required() ->check(CLI::ExistingFile); - app.add_option("-o,--output", output_dir, "Directory for backend code output") + app.add_option("-o,--output", cfg.output_dir, "Directory for backend code output") ->capture_default_str() ->ignore_case(); - app.add_option("--scratch", scratch_dir, "Directory for intermediate code output") + app.add_option("--scratch", cfg.scratch_dir, "Directory for intermediate code output") ->capture_default_str() ->ignore_case(); - app.add_option("--units", units_dir, "Directory of units lib file") + app.add_option("--units", cfg.units_dir, "Directory of units lib file") ->capture_default_str() ->ignore_case(); auto host_opt = app.add_subcommand("host", "HOST/CPU code backends")->ignore_case(); - host_opt->add_flag("--c", c_backend, fmt::format("C/C++ backend ({})", c_backend)) + host_opt->add_flag("--c", cfg.c_backend, fmt::format("C/C++ backend ({})", cfg.c_backend)) ->ignore_case(); host_opt ->add_flag("--ispc", - ispc_backend, - fmt::format("C/C++ backend with ISPC ({})", ispc_backend)) + cfg.ispc_backend, + fmt::format("C/C++ backend with ISPC ({})", cfg.ispc_backend)) ->ignore_case(); auto acc_opt = app.add_subcommand("acc", "Accelerator code backends")->ignore_case(); acc_opt ->add_flag("--oacc", - oacc_backend, - fmt::format("C/C++ backend with OpenACC ({})", oacc_backend)) + cfg.oacc_backend, + fmt::format("C/C++ backend with OpenACC ({})", cfg.oacc_backend)) ->ignore_case(); acc_opt ->add_flag("--cuda", - cuda_backend, - fmt::format("C/C++ backend with CUDA ({})", cuda_backend)) + cfg.cuda_backend, + fmt::format("C/C++ backend with CUDA ({})", cfg.cuda_backend)) ->ignore_case(); // clang-format off auto sympy_opt = app.add_subcommand("sympy", "SymPy based analysis and optimizations")->ignore_case(); sympy_opt->add_flag("--analytic", - sympy_analytic, - fmt::format("Solve ODEs using SymPy analytic integration ({})", sympy_analytic))->ignore_case(); + cfg.sympy_analytic, + fmt::format("Solve ODEs using SymPy analytic integration ({})", cfg.sympy_analytic))->ignore_case(); sympy_opt->add_flag("--pade", - sympy_pade, - fmt::format("Pade approximation in SymPy analytic integration ({})", sympy_pade))->ignore_case(); + cfg.sympy_pade, + fmt::format("Pade approximation in SymPy analytic integration ({})", cfg.sympy_pade))->ignore_case(); sympy_opt->add_flag("--cse", - sympy_cse, - fmt::format("CSE (Common Subexpression Elimination) in SymPy analytic integration ({})", sympy_cse))->ignore_case(); + cfg.sympy_cse, + fmt::format("CSE (Common Subexpression Elimination) in SymPy analytic integration ({})", cfg.sympy_cse))->ignore_case(); sympy_opt->add_flag("--conductance", - sympy_conductance, - fmt::format("Add CONDUCTANCE keyword in BREAKPOINT ({})", sympy_conductance))->ignore_case(); + cfg.sympy_conductance, + fmt::format("Add CONDUCTANCE keyword in BREAKPOINT ({})", cfg.sympy_conductance))->ignore_case(); auto passes_opt = app.add_subcommand("passes", "Analyse/Optimization passes")->ignore_case(); passes_opt->add_flag("--inline", - nmodl_inline, - fmt::format("Perform inlining at NMODL level ({})", nmodl_inline))->ignore_case(); + cfg.nmodl_inline, + fmt::format("Perform inlining at NMODL level ({})", cfg.nmodl_inline))->ignore_case(); passes_opt->add_flag("--unroll", - nmodl_unroll, - fmt::format("Perform loop unroll at NMODL level ({})", nmodl_unroll))->ignore_case(); + cfg.nmodl_unroll, + fmt::format("Perform loop unroll at NMODL level ({})", cfg.nmodl_unroll))->ignore_case(); passes_opt->add_flag("--const-folding", - nmodl_const_folding, - fmt::format("Perform constant folding at NMODL level ({})", nmodl_const_folding))->ignore_case(); + cfg.nmodl_const_folding, + fmt::format("Perform constant folding at NMODL level ({})", cfg.nmodl_const_folding))->ignore_case(); passes_opt->add_flag("--localize", - nmodl_localize, - fmt::format("Convert RANGE variables to LOCAL ({})", nmodl_localize))->ignore_case(); + cfg.nmodl_localize, + fmt::format("Convert RANGE variables to LOCAL ({})", cfg.nmodl_localize))->ignore_case(); passes_opt->add_flag("--global-to-range", - nmodl_global_to_range, - fmt::format("Convert GLOBAL variables to RANGE ({})", nmodl_global_to_range))->ignore_case(); + cfg.nmodl_global_to_range, + fmt::format("Convert GLOBAL variables to RANGE ({})", cfg.nmodl_global_to_range))->ignore_case(); passes_opt->add_flag("--local-to-range", - nmodl_local_to_range, - fmt::format("Convert top level LOCAL variables to RANGE ({})", nmodl_local_to_range))->ignore_case(); + cfg.nmodl_local_to_range, + fmt::format("Convert top level LOCAL variables to RANGE ({})", cfg.nmodl_local_to_range))->ignore_case(); passes_opt->add_flag("--localize-verbatim", - localize_verbatim, - fmt::format("Convert RANGE variables to LOCAL even if verbatim block exist ({})", localize_verbatim))->ignore_case(); + cfg.localize_verbatim, + fmt::format("Convert RANGE variables to LOCAL even if verbatim block exist ({})", cfg.localize_verbatim))->ignore_case(); passes_opt->add_flag("--local-rename", - local_rename, - fmt::format("Rename LOCAL variable if variable of same name exist in global scope ({})", local_rename))->ignore_case(); + cfg.local_rename, + fmt::format("Rename LOCAL variable if variable of same name exist in global scope ({})", cfg.local_rename))->ignore_case(); passes_opt->add_flag("--verbatim-inline", - verbatim_inline, - fmt::format("Inline even if verbatim block exist ({})", verbatim_inline))->ignore_case(); + cfg.verbatim_inline, + fmt::format("Inline even if verbatim block exist ({})", cfg.verbatim_inline))->ignore_case(); passes_opt->add_flag("--verbatim-rename", - verbatim_rename, - fmt::format("Rename variables in verbatim block ({})", verbatim_rename))->ignore_case(); + cfg.verbatim_rename, + fmt::format("Rename variables in verbatim block ({})", cfg.verbatim_rename))->ignore_case(); passes_opt->add_flag("--json-ast", - json_ast, - fmt::format("Write AST to JSON file ({})", json_ast))->ignore_case(); + cfg.json_ast, + fmt::format("Write AST to JSON file ({})", cfg.json_ast))->ignore_case(); passes_opt->add_flag("--nmodl-ast", - nmodl_ast, - fmt::format("Write AST to NMODL file ({})", nmodl_ast))->ignore_case(); + cfg.nmodl_ast, + fmt::format("Write AST to NMODL file ({})", cfg.nmodl_ast))->ignore_case(); passes_opt->add_flag("--json-perf", - json_perfstat, - fmt::format("Write performance statistics to JSON file ({})", json_perfstat))->ignore_case(); + cfg.json_perfstat, + fmt::format("Write performance statistics to JSON file ({})", cfg.json_perfstat))->ignore_case(); passes_opt->add_flag("--show-symtab", show_symtab, fmt::format("Write symbol table to stdout ({})", show_symtab))->ignore_case(); auto codegen_opt = app.add_subcommand("codegen", "Code generation options")->ignore_case(); codegen_opt->add_option("--datatype", - data_type, + cfg.data_type, "Data type for floating point variables")->capture_default_str()->ignore_case()->check(CLI::IsMember({"float", "double"})); codegen_opt->add_flag("--force", - force_codegen, + cfg.force_codegen, "Force code generation even if there is any incompatibility"); codegen_opt->add_flag("--only-check-compatibility", - only_check_compatibility, + cfg.only_check_compatibility, "Check compatibility and return without generating code"); codegen_opt->add_flag("--opt-ionvar-copy", - optimize_ionvar_copies_codegen, - fmt::format("Optimize copies of ion variables ({})", optimize_ionvar_copies_codegen))->ignore_case(); - + cfg.optimize_ionvar_copies_codegen, + fmt::format("Optimize copies of ion variables ({})", cfg.optimize_ionvar_copies_codegen))->ignore_case(); + +#ifdef NMODL_LLVM_BACKEND + + // LLVM IR code generation options. + auto llvm_opt = app.add_subcommand("llvm", "LLVM code generation option")->ignore_case(); + auto llvm_ir_opt = llvm_opt->add_flag("--ir", + cfg.llvm_ir, + fmt::format("Generate LLVM IR ({})", cfg.llvm_ir))->ignore_case(); + llvm_ir_opt->required(true); + llvm_opt->add_flag("--no-debug", + cfg.llvm_no_debug, + fmt::format("Disable debug information ({})", cfg.llvm_no_debug))->ignore_case(); + llvm_opt->add_option("--opt-level-ir", + cfg.llvm_opt_level_ir, + fmt::format("LLVM IR optimisation level (O{})", cfg.llvm_opt_level_ir))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"})); + llvm_opt->add_flag("--single-precision", + cfg.llvm_float_type, + fmt::format("Use single precision floating-point types ({})", cfg.llvm_float_type))->ignore_case(); + llvm_opt->add_option("--fmf", + cfg.llvm_fast_math_flags, + "Fast math flags for floating-point optimizations (none)")->check(CLI::IsMember({"afn", "arcp", "contract", "ninf", "nnan", "nsz", "reassoc", "fast"})); + + // Platform options for LLVM code generation. + auto cpu_opt = app.add_subcommand("cpu", "LLVM CPU option")->ignore_case(); + cpu_opt->needs(llvm_opt); + cpu_opt->add_option("--name", + cfg.llvm_cpu_name, + "Name of CPU platform to use")->ignore_case(); + auto simd_math_library_opt = cpu_opt->add_option("--math-library", + cfg.llvm_math_library, + fmt::format("Math library for SIMD code generation ({})", cfg.llvm_math_library)); + simd_math_library_opt->check(CLI::IsMember({"Accelerate", "libmvec", "libsystem_m", "MASSV", "SLEEF", "SVML", "none"})); + cpu_opt->add_option("--vector-width", + cfg.llvm_vector_width, + fmt::format("Explicit vectorization width for IR generation ({})", cfg.llvm_vector_width))->ignore_case(); + + auto gpu_opt = app.add_subcommand("gpu", "LLVM GPU option")->ignore_case(); + gpu_opt->needs(llvm_opt); + auto gpu_target_name = gpu_opt->add_option("--name", + cfg.llvm_gpu_name, + "Name of GPU platform to use")->ignore_case(); + gpu_target_name->check(CLI::IsMember({"nvptx", "nvptx64"})); + gpu_opt->add_option("--target-arch", + cfg.llvm_gpu_target_architecture, + "Name of target architecture to use")->ignore_case(); + auto gpu_math_library_opt = gpu_opt->add_option("--math-library", + cfg.llvm_math_library, + fmt::format("Math library for GPU code generation ({})", cfg.llvm_math_library)); + gpu_math_library_opt->check(CLI::IsMember({"libdevice"})); + + // Allow only one platform at a time. + cpu_opt->excludes(gpu_opt); + gpu_opt->excludes(cpu_opt); + + // LLVM IR benchmark options. + auto benchmark_opt = app.add_subcommand("benchmark", "LLVM benchmark option")->ignore_case(); + benchmark_opt->needs(llvm_opt); + benchmark_opt->add_flag("--run", + llvm_benchmark, + fmt::format("Run LLVM benchmark ({})", llvm_benchmark))->ignore_case(); + benchmark_opt->add_option("--opt-level-codegen", + cfg.llvm_opt_level_codegen, + fmt::format("Machine code optimisation level (O{})", cfg.llvm_opt_level_codegen))->ignore_case()->check(CLI::IsMember({"0", "1", "2", "3"})); + benchmark_opt->add_option("--libs", cfg.shared_lib_paths, "Shared libraries to link IR against") + ->ignore_case() + ->check(CLI::ExistingFile); + benchmark_opt->add_option("--instance-size", + instance_size, + fmt::format("Instance struct size ({})", instance_size))->ignore_case(); + benchmark_opt->add_option("--repeat", + num_experiments, + fmt::format("Number of experiments for benchmarking ({})", num_experiments))->ignore_case(); + benchmark_opt->add_option("--grid-dim-x", + llvm_cuda_grid_dim_x, + fmt::format("Grid dimension X ({})", llvm_cuda_grid_dim_x))->ignore_case(); + benchmark_opt->add_option("--block-dim-x", + llvm_cuda_block_dim_x, + fmt::format("Block dimension X ({})", llvm_cuda_block_dim_x))->ignore_case(); +#endif // clang-format on CLI11_PARSE(app, argc, argv); // if any of the other backends is used we force the C backend to be off. - if (ispc_backend) { - c_backend = false; + if (cfg.ispc_backend) { + cfg.c_backend = false; } - utils::make_path(output_dir); - utils::make_path(scratch_dir); + utils::make_path(cfg.output_dir); + utils::make_path(cfg.scratch_dir); if (sympy_opt) { nmodl::pybind_wrappers::EmbeddedPythonLoader::get_instance() @@ -296,13 +290,6 @@ int main(int argc, const char* argv[]) { logger->set_level(spdlog::level::from_str(verbose)); - /// write ast to nmodl - const auto ast_to_nmodl = [nmodl_ast](ast::Program& ast, const std::string& filepath) { - if (nmodl_ast) { - NmodlPrintVisitor(filepath).visit_program(ast); - logger->info("AST to NMODL transformation written to {}", filepath); - } - }; for (const auto& file: mod_files) { logger->info("Processing {}", file); @@ -310,92 +297,20 @@ int main(int argc, const char* argv[]) { const auto modfile = utils::remove_extension(utils::base_name(file)); /// create file path for nmodl file - auto filepath = [scratch_dir, modfile](const std::string& suffix) { + auto filepath = [cfg, modfile](const std::string& suffix, const std::string& ext) { static int count = 0; return fmt::format( - "{}/{}.{}.{}.mod", scratch_dir, modfile, std::to_string(count++), suffix); + "{}/{}.{}.{}.{}", cfg.scratch_dir, modfile, std::to_string(count++), suffix, ext); }; - /// driver object creates lexer and parser, just call parser method - NmodlDriver driver; + /// nmodl_driver object creates lexer and parser, just call parser method + NmodlDriver nmodl_driver; /// parse mod file and construct ast - const auto& ast = driver.parse_file(file); - - /// whether to update existing symbol table or create new - /// one whenever we run symtab visitor. - bool update_symtab = false; + const auto& ast = nmodl_driver.parse_file(file); - /// just visit the ast - AstVisitor().visit_program(*ast); - - /// construct symbol table - { - logger->info("Running symtab visitor"); - SymtabVisitor(update_symtab).visit_program(*ast); - } - - /// Check some rules that ast should follow - { - logger->info("Running semantic analysis visitor"); - if (SemanticAnalysisVisitor().check(*ast)) { - return 1; - } - } - - /// use cnexp instead of after_cvode solve method - { - logger->info("Running CVode to cnexp visitor"); - AfterCVodeToCnexpVisitor().visit_program(*ast); - ast_to_nmodl(*ast, filepath("after_cvode_to_cnexp")); - } - - /// Rename variables that match ISPC compiler double constants - if (ispc_backend) { - logger->info("Running ISPC variables rename visitor"); - IspcRenameVisitor(ast).visit_program(*ast); - SymtabVisitor(update_symtab).visit_program(*ast); - ast_to_nmodl(*ast, filepath("ispc_double_rename")); - } - - /// GLOBAL to RANGE rename visitor - if (nmodl_global_to_range) { - // make sure to run perf visitor because code generator - // looks for read/write counts const/non-const declaration - PerfVisitor().visit_program(*ast); - // make sure to run the GlobalToRange visitor after all the - // reinitializations of Symtab - logger->info("Running GlobalToRange visitor"); - GlobalToRangeVisitor(*ast).visit_program(*ast); - SymtabVisitor(update_symtab).visit_program(*ast); - ast_to_nmodl(*ast, filepath("global_to_range")); - } - - /// LOCAL to ASSIGNED visitor - if (nmodl_local_to_range) { - logger->info("Running LOCAL to ASSIGNED visitor"); - PerfVisitor().visit_program(*ast); - LocalToAssignedVisitor().visit_program(*ast); - SymtabVisitor(update_symtab).visit_program(*ast); - ast_to_nmodl(*ast, filepath("local_to_assigned")); - } - - { - // Compatibility Checking - logger->info("Running code compatibility checker"); - // run perfvisitor to update read/write counts - PerfVisitor().visit_program(*ast); - - // If we want to just check compatibility we return the result - if (only_check_compatibility) { - return CodegenCompatibilityVisitor().find_unhandled_ast_nodes(*ast); - } - - // If there is an incompatible construct and code generation is not forced exit NMODL - if (CodegenCompatibilityVisitor().find_unhandled_ast_nodes(*ast) && !force_codegen) { - return 1; - } - } + auto cg_driver = CodegenDriver(cfg); + auto success = cg_driver.prepare_mod(ast, modfile); if (show_symtab) { logger->info("Printing symbol table"); @@ -403,187 +318,126 @@ int main(int argc, const char* argv[]) { symtab->print(std::cout); } - ast_to_nmodl(*ast, filepath("ast")); - - if (json_ast) { - std::string file{scratch_dir}; - file += "/"; - file += modfile; - file += ".ast.json"; - logger->info("Writing AST into {}", file); - JSONVisitor(file).write(*ast); - } - - if (verbatim_rename) { - logger->info("Running verbatim rename visitor"); - VerbatimVarRenameVisitor().visit_program(*ast); - ast_to_nmodl(*ast, filepath("verbatim_rename")); - } - - if (nmodl_const_folding) { - logger->info("Running nmodl constant folding visitor"); - ConstantFolderVisitor().visit_program(*ast); - ast_to_nmodl(*ast, filepath("constfold")); - } - - if (nmodl_unroll) { - logger->info("Running nmodl loop unroll visitor"); - LoopUnrollVisitor().visit_program(*ast); - ConstantFolderVisitor().visit_program(*ast); - ast_to_nmodl(*ast, filepath("unroll")); - SymtabVisitor(update_symtab).visit_program(*ast); - } - - /// note that we can not symtab visitor in update mode as we - /// replace kinetic block with derivative block of same name - /// in global scope - { - logger->info("Running KINETIC block visitor"); - auto kineticBlockVisitor = KineticBlockVisitor(); - kineticBlockVisitor.visit_program(*ast); - SymtabVisitor(update_symtab).visit_program(*ast); - const auto filename = filepath("kinetic"); - ast_to_nmodl(*ast, filename); - if (nmodl_ast && kineticBlockVisitor.get_conserve_statement_count()) { - logger->warn( - fmt::format("{} presents non-standard CONSERVE statements in DERIVATIVE " - "blocks. Use it only for debugging/developing", - filename)); - } - } - - { - logger->info("Running STEADYSTATE visitor"); - SteadystateVisitor().visit_program(*ast); - SymtabVisitor(update_symtab).visit_program(*ast); - ast_to_nmodl(*ast, filepath("steadystate")); - } - - /// Parsing units fron "nrnunits.lib" and mod files - { - logger->info("Parsing Units"); - UnitsVisitor(units_dir).visit_program(*ast); - } - - /// once we start modifying (especially removing) older constructs - /// from ast then we should run symtab visitor in update mode so - /// that old symbols (e.g. prime variables) are not lost - update_symtab = true; - - if (nmodl_inline) { - logger->info("Running nmodl inline visitor"); - InlineVisitor().visit_program(*ast); - ast_to_nmodl(*ast, filepath("inline")); - } - - if (local_rename) { - logger->info("Running local variable rename visitor"); - LocalVarRenameVisitor().visit_program(*ast); - SymtabVisitor(update_symtab).visit_program(*ast); - ast_to_nmodl(*ast, filepath("local_rename")); - } - - if (nmodl_localize) { - // localize pass must follow rename pass to avoid conflict - logger->info("Running localize visitor"); - LocalizeVisitor(localize_verbatim).visit_program(*ast); - LocalVarRenameVisitor().visit_program(*ast); - SymtabVisitor(update_symtab).visit_program(*ast); - ast_to_nmodl(*ast, filepath("localize")); + if (cfg.only_check_compatibility) { + return !success; } - - if (sympy_conductance) { - logger->info("Running sympy conductance visitor"); - SympyConductanceVisitor().visit_program(*ast); - SymtabVisitor(update_symtab).visit_program(*ast); - ast_to_nmodl(*ast, filepath("sympy_conductance")); - } - - if (sympy_analytic || sparse_solver_exists(*ast)) { - if (!sympy_analytic) { - logger->info( - "Automatically enable sympy_analytic because it exists solver of type sparse"); - } - logger->info("Running sympy solve visitor"); - SympySolverVisitor(sympy_pade, sympy_cse).visit_program(*ast); - SymtabVisitor(update_symtab).visit_program(*ast); - ast_to_nmodl(*ast, filepath("sympy_solve")); + if (!success && !cfg.force_codegen) { + return 1; } { - logger->info("Running cnexp visitor"); - NeuronSolveVisitor().visit_program(*ast); - ast_to_nmodl(*ast, filepath("cnexp")); - } - - { - SolveBlockVisitor().visit_program(*ast); - SymtabVisitor(update_symtab).visit_program(*ast); - ast_to_nmodl(*ast, filepath("solveblock")); - } - - if (json_perfstat) { - std::string file{scratch_dir}; - file.append("/"); - file.append(modfile); - file.append(".perf.json"); - logger->info("Writing performance statistics to {}", file); - PerfVisitor(file).visit_program(*ast); - } - - // Add implicit arguments (like celsius, nt) to NEURON functions (like - // nrn_ghk, at_time) whose signatures we have to massage. - ImplicitArgumentVisitor{}.visit_program(*ast); - SymtabVisitor(update_symtab).visit_program(*ast); - - { - // make sure to run perf visitor because code generator - // looks for read/write counts const/non-const declaration - PerfVisitor().visit_program(*ast); - } - - { - CodegenTransformVisitor{}.visit_program(*ast); - ast_to_nmodl(*ast, filepath("TransformVisitor")); - SymtabVisitor(update_symtab).visit_program(*ast); - } - - { - if (ispc_backend) { + if (cfg.ispc_backend) { logger->info("Running ISPC backend code generator"); CodegenIspcVisitor visitor(modfile, - output_dir, + cfg.output_dir, data_type, - optimize_ionvar_copies_codegen); + cfg.optimize_ionvar_copies_codegen); visitor.visit_program(*ast); } - else if (oacc_backend) { + else if (cfg.oacc_backend) { logger->info("Running OpenACC backend code generator"); CodegenAccVisitor visitor(modfile, - output_dir, + cfg.output_dir, data_type, - optimize_ionvar_copies_codegen); + cfg.optimize_ionvar_copies_codegen); visitor.visit_program(*ast); } - else if (c_backend) { + else if (cfg.c_backend) { logger->info("Running C backend code generator"); CodegenCVisitor visitor(modfile, - output_dir, + cfg.output_dir, data_type, - optimize_ionvar_copies_codegen); + cfg.optimize_ionvar_copies_codegen); visitor.visit_program(*ast); } - if (cuda_backend) { + if (cfg.cuda_backend) { logger->info("Running CUDA backend code generator"); CodegenCudaVisitor visitor(modfile, - output_dir, + cfg.output_dir, data_type, - optimize_ionvar_copies_codegen); + cfg.optimize_ionvar_copies_codegen); + visitor.visit_program(*ast); + } + +#ifdef NMODL_LLVM_BACKEND + if (cfg.llvm_ir || llvm_benchmark) { + // If benchmarking, we want to optimize the IR with target + // information and not in LLVM visitor. + int llvm_opt_level = llvm_benchmark ? 0 : cfg.llvm_opt_level_ir; + + // If benchmarking, kernel functions should be wrapped taking void*. + bool wrap_kernel_functions = llvm_benchmark; + + // Create platform abstraction. + PlatformID pid = cfg.llvm_gpu_name == "default" ? PlatformID::CPU : PlatformID::GPU; + const std::string name = cfg.llvm_gpu_name == "default" ? cfg.llvm_cpu_name + : cfg.llvm_gpu_name; + Platform platform(pid, + name, + cfg.llvm_gpu_target_architecture, + cfg.llvm_math_library, + cfg.llvm_float_type, + cfg.llvm_vector_width); + + // GPU code generation doesn't support debug information at the moment so disable it + // in case it's enabled + if (!cfg.llvm_no_debug && platform.is_gpu()) { + logger->warn("Disabling addition of debug symbols in GPU code."); + cfg.llvm_no_debug = true; + } + + logger->info("Running LLVM backend code generator"); + CodegenLLVMVisitor visitor(modfile, + cfg.output_dir, + platform, + llvm_opt_level, + !cfg.llvm_no_debug, + cfg.llvm_fast_math_flags, + wrap_kernel_functions); visitor.visit_program(*ast); + if (cfg.nmodl_ast) { + NmodlPrintVisitor(filepath("llvm", "mod")).visit_program(*ast); + logger->info("AST to NMODL transformation written to {}", + filepath("llvm", "mod")); + } + if (cfg.json_ast) { + JSONVisitor(filepath("llvm", "json")).write(*ast); + logger->info("AST to JSON transformation written to {}", + filepath("llvm", "json")); + } + + if (llvm_benchmark) { + logger->info("Running LLVM benchmark"); + if (platform.is_gpu() && !platform.is_CUDA_gpu()) { + throw std::runtime_error( + "Benchmarking is only supported on CUDA GPUs at the moment"); + } +#ifndef NMODL_LLVM_CUDA_BACKEND + if (platform.is_CUDA_gpu()) { + throw std::runtime_error( + "GPU benchmarking is not supported if NMODL is not built with CUDA " + "backend enabled."); + } +#endif + const GPUExecutionParameters gpu_execution_parameters{llvm_cuda_grid_dim_x, + llvm_cuda_block_dim_x}; + benchmark::LLVMBenchmark benchmark(visitor, + modfile, + cfg.output_dir, + cfg.shared_lib_paths, + num_experiments, + instance_size, + platform, + cfg.llvm_opt_level_ir, + cfg.llvm_opt_level_codegen, + gpu_execution_parameters); + benchmark.run(); + } } +#endif } } diff --git a/src/pybind/CMakeLists.txt b/src/pybind/CMakeLists.txt index a78174cefd..eb4d57ea94 100644 --- a/src/pybind/CMakeLists.txt +++ b/src/pybind/CMakeLists.txt @@ -63,6 +63,13 @@ if(NMODL_ENABLE_PYTHON_BINDINGS) add_dependencies(_nmodl lexer pyastgen util) target_link_libraries(_nmodl PRIVATE printer symtab visitor pyembed) + # Additional options are needed when LLVM JIT functionality is built + if(NMODL_ENABLE_LLVM) + set_property(TARGET codegen llvm_codegen llvm_benchmark benchmark_data + PROPERTY POSITION_INDEPENDENT_CODE ON) + target_link_libraries(_nmodl PRIVATE codegen llvm_codegen llvm_benchmark benchmark_data + ${LLVM_LIBS_TO_LINK}) + endif() # in case of wheel, python module shouldn't link to wrapper library if(LINK_AGAINST_PYTHON) target_link_libraries(_nmodl PRIVATE pywrapper) diff --git a/src/pybind/pynmodl.cpp b/src/pybind/pynmodl.cpp index 259df6bb8b..38e9cb4b53 100644 --- a/src/pybind/pynmodl.cpp +++ b/src/pybind/pynmodl.cpp @@ -5,6 +5,7 @@ * Lesser General Public License. See top-level LICENSE file for details. *************************************************************************/ #include "ast/program.hpp" +#include "codegen/codegen_driver.hpp" #include "config/config.h" #include "parser/nmodl_driver.hpp" #include "pybind/pybind_utils.hpp" @@ -17,6 +18,11 @@ #include #include +#ifdef NMODL_LLVM_BACKEND +#include "codegen/llvm/codegen_llvm_visitor.hpp" +#include "test/benchmark/llvm_benchmark.hpp" +#endif + /** * \dir * \brief Python Interface Implementation @@ -106,6 +112,12 @@ static const char* const to_json = R"( '{"Program":[{"NeuronBlock":[{"StatementBlock":[]}]}]}' )"; +#ifdef NMODL_LLVM_BACKEND +static const char* jit = R"( + This is the Jit class documentation +)"; +#endif + } // namespace docstring /** @@ -128,6 +140,88 @@ class PyNmodlDriver: public nmodl::parser::NmodlDriver { } }; +#ifdef NMODL_LLVM_BACKEND +class JitDriver { + private: + nmodl::codegen::Platform platform; + + nmodl::codegen::CodeGenConfig cfg; + nmodl::codegen::CodegenDriver cg_driver; + + void init_platform() { + // Create platform abstraction. + nmodl::codegen::PlatformID pid = cfg.llvm_gpu_name == "default" + ? nmodl::codegen::PlatformID::CPU + : nmodl::codegen::PlatformID::GPU; + const std::string name = cfg.llvm_gpu_name == "default" ? cfg.llvm_cpu_name + : cfg.llvm_gpu_name; + platform = nmodl::codegen::Platform(pid, + name, + cfg.llvm_gpu_target_architecture, + cfg.llvm_math_library, + cfg.llvm_float_type, + cfg.llvm_vector_width); + if (platform.is_gpu() && !platform.is_CUDA_gpu()) { + throw std::runtime_error("Benchmarking is only supported on CUDA GPUs at the moment"); + } +#ifndef NMODL_LLVM_CUDA_BACKEND + if (platform.is_CUDA_gpu()) { + throw std::runtime_error( + "GPU benchmarking is not supported if NMODL is not built with CUDA " + "backend enabled."); + } +#endif + } + + public: + JitDriver() + : cg_driver(cfg) { + init_platform(); + } + + explicit JitDriver(const nmodl::codegen::CodeGenConfig& cfg) + : cfg(cfg) + , cg_driver(cfg) { + init_platform(); + } + + + benchmark::BenchmarkResults run(std::shared_ptr node, + std::string& modname, + int num_experiments, + int instance_size, + int cuda_grid_dim_x, + int cuda_block_dim_x) { + // New directory is needed to be created otherwise the directory cannot be created + // automatically through python + if (cfg.nmodl_ast || cfg.json_ast || cfg.json_perfstat) { + utils::make_path(cfg.scratch_dir); + } + cg_driver.prepare_mod(node, modname); + nmodl::codegen::CodegenLLVMVisitor visitor(modname, + cfg.output_dir, + platform, + 0, + !cfg.llvm_no_debug, + cfg.llvm_fast_math_flags, + true); + visitor.visit_program(*node); + const GPUExecutionParameters gpu_execution_parameters{cuda_grid_dim_x, cuda_block_dim_x}; + nmodl::benchmark::LLVMBenchmark benchmark(visitor, + modname, + cfg.output_dir, + cfg.shared_lib_paths, + num_experiments, + instance_size, + platform, + cfg.llvm_opt_level_ir, + cfg.llvm_opt_level_codegen, + gpu_execution_parameters); + return benchmark.run(); + } +}; +#endif + } // namespace nmodl // forward declaration of submodule init functions @@ -160,6 +254,72 @@ PYBIND11_MODULE(_nmodl, m_nmodl) { nmodl::docstring::driver_parse_stream) .def("get_ast", &nmodl::PyNmodlDriver::get_ast, nmodl::docstring::driver_ast); + py::class_ cfg(m_nmodl, "CodeGenConfig"); + cfg.def(py::init([]() { + auto cfg = std::make_unique(); +#ifdef NMODL_LLVM_BACKEND + // set to more sensible defaults for python binding + cfg->llvm_ir = true; +#endif + return cfg; + })) + .def_readwrite("sympy_analytic", &nmodl::codegen::CodeGenConfig::sympy_analytic) + .def_readwrite("sympy_pade", &nmodl::codegen::CodeGenConfig::sympy_pade) + .def_readwrite("sympy_cse", &nmodl::codegen::CodeGenConfig::sympy_cse) + .def_readwrite("sympy_conductance", &nmodl::codegen::CodeGenConfig::sympy_conductance) + .def_readwrite("nmodl_inline", &nmodl::codegen::CodeGenConfig::nmodl_inline) + .def_readwrite("nmodl_unroll", &nmodl::codegen::CodeGenConfig::nmodl_unroll) + .def_readwrite("nmodl_const_folding", &nmodl::codegen::CodeGenConfig::nmodl_const_folding) + .def_readwrite("nmodl_localize", &nmodl::codegen::CodeGenConfig::nmodl_localize) + .def_readwrite("nmodl_global_to_range", + &nmodl::codegen::CodeGenConfig::nmodl_global_to_range) + .def_readwrite("nmodl_local_to_range", &nmodl::codegen::CodeGenConfig::nmodl_local_to_range) + .def_readwrite("localize_verbatim", &nmodl::codegen::CodeGenConfig::localize_verbatim) + .def_readwrite("local_rename", &nmodl::codegen::CodeGenConfig::local_rename) + .def_readwrite("verbatim_inline", &nmodl::codegen::CodeGenConfig::verbatim_inline) + .def_readwrite("verbatim_rename", &nmodl::codegen::CodeGenConfig::verbatim_rename) + .def_readwrite("force_codegen", &nmodl::codegen::CodeGenConfig::force_codegen) + .def_readwrite("only_check_compatibility", + &nmodl::codegen::CodeGenConfig::only_check_compatibility) + .def_readwrite("optimize_ionvar_copies_codegen", + &nmodl::codegen::CodeGenConfig::optimize_ionvar_copies_codegen) + .def_readwrite("output_dir", &nmodl::codegen::CodeGenConfig::output_dir) + .def_readwrite("scratch_dir", &nmodl::codegen::CodeGenConfig::scratch_dir) + .def_readwrite("data_type", &nmodl::codegen::CodeGenConfig::data_type) + .def_readwrite("nmodl_ast", &nmodl::codegen::CodeGenConfig::nmodl_ast) + .def_readwrite("json_ast", &nmodl::codegen::CodeGenConfig::json_ast) + .def_readwrite("json_perfstat", &nmodl::codegen::CodeGenConfig::json_perfstat) +#ifdef NMODL_LLVM_BACKEND + .def_readwrite("llvm_ir", &nmodl::codegen::CodeGenConfig::llvm_ir) + .def_readwrite("llvm_float_type", &nmodl::codegen::CodeGenConfig::llvm_float_type) + .def_readwrite("llvm_opt_level_ir", &nmodl::codegen::CodeGenConfig::llvm_opt_level_ir) + .def_readwrite("llvm_math_library", &nmodl::codegen::CodeGenConfig::llvm_math_library) + .def_readwrite("llvm_no_debug", &nmodl::codegen::CodeGenConfig::llvm_no_debug) + .def_readwrite("llvm_fast_math_flags", &nmodl::codegen::CodeGenConfig::llvm_fast_math_flags) + .def_readwrite("llvm_cpu_name", &nmodl::codegen::CodeGenConfig::llvm_cpu_name) + .def_readwrite("llvm_gpu_name", &nmodl::codegen::CodeGenConfig::llvm_gpu_name) + .def_readwrite("llvm_gpu_target_architecture", + &nmodl::codegen::CodeGenConfig::llvm_gpu_target_architecture) + .def_readwrite("llvm_vector_width", &nmodl::codegen::CodeGenConfig::llvm_vector_width) + .def_readwrite("llvm_opt_level_codegen", + &nmodl::codegen::CodeGenConfig::llvm_opt_level_codegen) + .def_readwrite("shared_lib_paths", &nmodl::codegen::CodeGenConfig::shared_lib_paths); + + py::class_ jit_driver(m_nmodl, "Jit", nmodl::docstring::jit); + jit_driver.def(py::init<>()) + .def(py::init()) + .def("run", + &nmodl::JitDriver::run, + "node"_a, + "modname"_a, + "num_experiments"_a, + "instance_size"_a, + "cuda_grid_dim_x"_a = 1, + "cuda_block_dim_x"_a = 1); +#else + ; +#endif + m_nmodl.def("to_nmodl", static_cast&)>( diff --git a/src/visitors/inline_visitor.cpp b/src/visitors/inline_visitor.cpp index b72c9cbdb9..3f0dd8e6c6 100644 --- a/src/visitors/inline_visitor.cpp +++ b/src/visitors/inline_visitor.cpp @@ -305,6 +305,8 @@ void InlineVisitor::visit_statement_block(StatementBlock& node) { /** Visit all wrapped expressions which can contain function calls. * If a function call is replaced then the wrapped expression is * also replaced with new variable node from the inlining result. + * Note that we use `VarName` so that LHS of assignment expression + * is `VarName`, similar to parser. */ void InlineVisitor::visit_wrapped_expression(WrappedExpression& node) { node.visit_children(*this); @@ -313,7 +315,9 @@ void InlineVisitor::visit_wrapped_expression(WrappedExpression& node) { auto expression = dynamic_cast(e.get()); if (replaced_fun_calls.find(expression) != replaced_fun_calls.end()) { auto var = replaced_fun_calls[expression]; - node.set_expression(std::make_shared(new String(var))); + node.set_expression(std::make_shared(new Name(new String(var)), + /*at=*/nullptr, + /*index=*/nullptr)); } } } diff --git a/test/benchmark/CMakeLists.txt b/test/benchmark/CMakeLists.txt new file mode 100644 index 0000000000..da72c43514 --- /dev/null +++ b/test/benchmark/CMakeLists.txt @@ -0,0 +1,57 @@ +# ============================================================================= +# llvm benchmark sources +# ============================================================================= +set(LLVM_BENCHMARK_SOURCE_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/jit_driver.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.cpp ${CMAKE_CURRENT_SOURCE_DIR}/llvm_benchmark.hpp) + +if(NMODL_ENABLE_LLVM_CUDA) + list(APPEND LLVM_BENCHMARK_SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/cuda_driver.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cuda_driver.hpp) +endif() + +# ============================================================================= +# LLVM benchmark library +# ============================================================================= +include_directories(${LLVM_INCLUDE_DIRS}) +add_library(llvm_benchmark STATIC ${LLVM_BENCHMARK_SOURCE_FILES}) +add_dependencies(llvm_benchmark lexer util visitor) +target_link_libraries(llvm_benchmark PRIVATE util visitor) +if(NMODL_ENABLE_LLVM_CUDA) + target_link_libraries(llvm_benchmark PRIVATE CUDA::cudart CUDA::nvrtc) +endif() + +if(NMODL_ENABLE_JIT_EVENT_LISTENERS) + target_compile_definitions(llvm_benchmark PUBLIC NMODL_HAVE_JIT_EVENT_LISTENERS) +endif() + +# ============================================================================= +# LLVM pyjit +# ============================================================================= + +if(NMODL_ENABLE_PYTHON_BINDINGS) + file(GLOB modfiles "${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/kernels/*.mod") + list(APPEND modfiles "${NMODL_PROJECT_SOURCE_DIR}/test/integration/mod/test_math.mod") + foreach(modfile ${modfiles}) + get_filename_component(modfile_name "${modfile}" NAME) + add_test(NAME "PyJIT/${modfile_name}" + COMMAND ${PYTHON_EXECUTABLE} ${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/benchmark.py + --file ${modfile} ${extra_args}) + set_tests_properties( + "PyJIT/${modfile_name}" PROPERTIES ENVIRONMENT + PYTHONPATH=${PROJECT_BINARY_DIR}/lib:$ENV{PYTHONPATH}) + + if(NMODL_ENABLE_LLVM_CUDA) + add_test(NAME "PyJIT/${modfile_name}_gpu" + COMMAND ${PYTHON_EXECUTABLE} ${NMODL_PROJECT_SOURCE_DIR}/test/benchmark/benchmark.py + --file ${modfile} --gpu ${extra_args}) + message(STATUS "CUDA_HOME is ${CUDAToolkit_TARGET_DIR}") + set_tests_properties( + "PyJIT/${modfile_name}_gpu" + PROPERTIES + ENVIRONMENT + "PYTHONPATH=${PROJECT_BINARY_DIR}/lib:$ENV{PYTHONPATH};CUDA_HOME=${CUDAToolkit_TARGET_DIR}" + ) + endif() + endforeach() +endif() diff --git a/test/benchmark/benchmark.py b/test/benchmark/benchmark.py new file mode 100644 index 0000000000..55c86bba46 --- /dev/null +++ b/test/benchmark/benchmark.py @@ -0,0 +1,52 @@ +import argparse +import sys +import os + +import nmodl.dsl as nmodl +from nmodl import ast, visitor + +def parse_arguments(): + parser = argparse.ArgumentParser(description='Benchmark test script for NMODL.') + parser.add_argument('--gpu', action='store_true', default=False, + help='Enable GPU JIT execution') + parser.add_argument('--vec', type=int, default=1, + help='Vector width for CPU execution') + parser.add_argument('--file', type=str, + help='NMODL file to benchmark') + args, _ = parser.parse_known_args() + return args + +def main(): + args = parse_arguments() + + driver = nmodl.NmodlDriver() + lookup_visitor = visitor.AstLookupVisitor() + + cfg = nmodl.CodeGenConfig() + cfg.llvm_vector_width = args.vec + cfg.llvm_opt_level_ir = 2 + cfg.llvm_fast_math_flags = ["nnan", "contract", "afn"] + cfg.llvm_no_debug = False + cfg.nmodl_ast = True + fname = args.file + if args.gpu: # GPU enabled + cfg.llvm_math_library = "libdevice" + cfg.llvm_gpu_name = "nvptx64" + cfg.llvm_gpu_target_architecture = "sm_70" + # Disable debug symbols generation for GPU code since the PTX generated is not valid + cfg.llvm_no_debug = True + if not os.environ.get("CUDA_HOME"): + raise RuntimeError("CUDA_HOME environment variable not set") + cfg.shared_lib_paths = [os.getenv("CUDA_HOME") + "/nvvm/libdevice/libdevice.10.bc"] + with open(fname) as f: + hh = f.read() + modast = driver.parse_string(hh) + modname = lookup_visitor.lookup(modast, ast.AstNodeType.SUFFIX)[0].get_node_name() + jit = nmodl.Jit(cfg) + + res = jit.run(modast, modname, 1000, 1000) + print(res) + + +if __name__ == "__main__": + main() diff --git a/test/benchmark/benchmark_info.hpp b/test/benchmark/benchmark_info.hpp new file mode 100644 index 0000000000..d02d33ce2e --- /dev/null +++ b/test/benchmark/benchmark_info.hpp @@ -0,0 +1,29 @@ +/************************************************************************* + * Copyright (C) 2018-2022 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +#include +#include + +/// A struct to hold the information for benchmarking. +struct BenchmarkInfo { + /// Object or PTX filename to dump. + std::string filename; + + /// Object file output directory. + std::string output_dir; + + /// Shared libraries' paths to link against. + std::vector shared_lib_paths; + + /// Optimisation level for IT. + int opt_level_ir; + + /// Optimisation level for machine code generation. + int opt_level_codegen; +}; diff --git a/test/benchmark/cuda_driver.cpp b/test/benchmark/cuda_driver.cpp new file mode 100644 index 0000000000..b65caeff0d --- /dev/null +++ b/test/benchmark/cuda_driver.cpp @@ -0,0 +1,200 @@ +/************************************************************************* + * Copyright (C) 2018-2022 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include +#include + +#include "codegen/llvm/codegen_llvm_visitor.hpp" +#include "codegen/llvm/llvm_utils.hpp" +#include "cuda_driver.hpp" +#include "fmt/format.h" +#include "utils/common_utils.hpp" + +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Linker/Linker.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Target/TargetMachine.h" + +namespace nmodl { +namespace runner { + +void CUDADriver::checkCudaErrors(CUresult err) { + if (err != CUDA_SUCCESS) { + const char* ret = NULL; + cuGetErrorName(err, &ret); + throw std::runtime_error("CUDA error: " + std::string(ret)); + } +} + +void CUDADriver::link_libraries(llvm::Module& module, BenchmarkInfo* benchmark_info) { + llvm::Linker linker(module); + for (const auto& lib_path: benchmark_info->shared_lib_paths) { + const auto lib_name = lib_path.substr(lib_path.find_last_of("/\\") + 1); + std::regex libdevice_bitcode_name{"libdevice.*.bc"}; + if (!std::regex_match(lib_name, libdevice_bitcode_name)) { + throw std::runtime_error("Only libdevice is supported for now"); + } + // Load libdevice module to the LLVM Module + auto libdevice_file_memory_buffer = llvm::MemoryBuffer::getFile(lib_path); + llvm::Expected> libdevice_expected_module = + parseBitcodeFile(libdevice_file_memory_buffer->get()->getMemBufferRef(), + module.getContext()); + if (std::error_code error = errorToErrorCode(libdevice_expected_module.takeError())) { + throw std::runtime_error(fmt::format("Error reading bitcode: {}", error.message())); + } + linker.linkInModule(std::move(libdevice_expected_module.get()), + llvm::Linker::LinkOnlyNeeded); + } +} + +void print_string_to_file(const std::string& ptx_compiled_module, const std::string& filename) { + std::ofstream ptx_file(filename); + ptx_file << ptx_compiled_module; + ptx_file.close(); +} + +// Converts the CUDA compute version to the CUjit_target enum used by the CUJIT +CUjit_target get_CUjit_target(const int compute_version_major, const int compute_version_minor) { + auto compute_architecture = compute_version_major * 10 + compute_version_minor; + switch (compute_architecture) { + case 20: + return CU_TARGET_COMPUTE_20; + case 21: + return CU_TARGET_COMPUTE_21; + case 30: + return CU_TARGET_COMPUTE_30; + case 32: + return CU_TARGET_COMPUTE_32; + case 35: + return CU_TARGET_COMPUTE_35; + case 37: + return CU_TARGET_COMPUTE_37; + case 50: + return CU_TARGET_COMPUTE_50; + case 52: + return CU_TARGET_COMPUTE_52; + case 53: + return CU_TARGET_COMPUTE_53; + case 60: + return CU_TARGET_COMPUTE_60; + case 61: + return CU_TARGET_COMPUTE_61; + case 62: + return CU_TARGET_COMPUTE_62; + case 70: + return CU_TARGET_COMPUTE_70; + case 72: + return CU_TARGET_COMPUTE_72; + case 75: + return CU_TARGET_COMPUTE_75; + case 80: + return CU_TARGET_COMPUTE_80; + case 86: + return CU_TARGET_COMPUTE_86; + default: + throw std::runtime_error("Unsupported compute architecture"); + } +} + +void CUDADriver::init(const codegen::Platform& platform, BenchmarkInfo* benchmark_info) { + // CUDA initialization + checkCudaErrors(cuInit(0)); + checkCudaErrors(cuDeviceGetCount(&device_info.count)); + checkCudaErrors(cuDeviceGet(&device, 0)); + + char name[128]; + checkCudaErrors(cuDeviceGetName(name, 128, device)); + device_info.name = name; + logger->info(fmt::format("Using CUDA Device [0]: {}", device_info.name)); + + // Get the compute capability of the device that is actually going to be used to run the kernel + checkCudaErrors(cuDeviceGetAttribute(&device_info.compute_version_major, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, + device)); + checkCudaErrors(cuDeviceGetAttribute(&device_info.compute_version_minor, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, + device)); + logger->info(fmt::format("Device Compute Capability: {}.{}", + device_info.compute_version_major, + device_info.compute_version_minor)); + if (device_info.compute_version_major < 2) { + throw std::runtime_error("ERROR: Device 0 is not SM 2.0 or greater"); + } + + // Load the external libraries modules to the NVVM program + // Currently only libdevice is supported + link_libraries(*module, benchmark_info); + + // Compile the program + logger->info("Compiling the LLVM IR to PTX"); + + // Optimize code for nvptx including the wrapper functions and generate PTX + const auto opt_level_codegen = benchmark_info ? benchmark_info->opt_level_codegen : 0; + utils::optimise_module_for_nvptx(platform, *module, opt_level_codegen, ptx_compiled_module); + utils::save_ir_to_ll_file(*module, + benchmark_info->output_dir + "/" + benchmark_info->filename + + "_benchmark"); + if (benchmark_info) { + print_string_to_file(ptx_compiled_module, + benchmark_info->output_dir + "/" + benchmark_info->filename + ".ptx"); + } + + // Create driver context + checkCudaErrors(cuCtxCreate(&context, 0, device)); + + // Create module for object + logger->info("Loading PTX to CUDA module"); + const unsigned int jitNumOptions = 5; + CUjit_option* jitOptions = new CUjit_option[jitNumOptions]; + void** jitOptVals = new void*[jitNumOptions]; + + // set up size of compilation log buffer + jitOptions[0] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + size_t jitLogBufferSize = 1024 * 1024; + jitOptVals[0] = (void*) jitLogBufferSize; + + // set up pointer to the compilation log buffer + jitOptions[1] = CU_JIT_INFO_LOG_BUFFER; + char* jitLogBuffer = new char[jitLogBufferSize]; + jitOptVals[1] = jitLogBuffer; + + // set up size of compilation error log buffer + jitOptions[2] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + size_t jitErrorLogBufferSize = 1024 * 1024; + jitOptVals[2] = (void*) jitErrorLogBufferSize; + + // set up pointer to the compilation error log buffer + jitOptions[3] = CU_JIT_ERROR_LOG_BUFFER; + char* jitErrorLogBuffer = new char[jitErrorLogBufferSize]; + jitOptVals[3] = jitErrorLogBuffer; + + // set the exact CUDA compute target architecture based on the GPU it's going to be actually + // used + jitOptions[4] = CU_JIT_TARGET; + auto target_architecture = get_CUjit_target(device_info.compute_version_major, + device_info.compute_version_minor); + jitOptVals[4] = (void*) target_architecture; + + // load the LLVM module to the CUDA module (CUDA JIT compilation) + auto cuda_jit_ret = cuModuleLoadDataEx( + &cudaModule, ptx_compiled_module.c_str(), jitNumOptions, jitOptions, jitOptVals); + if (!std::string(jitLogBuffer).empty()) { + logger->info(fmt::format("CUDA JIT INFO LOG: {}", std::string(jitLogBuffer))); + } + if (!std::string(jitErrorLogBuffer).empty()) { + logger->info(fmt::format("CUDA JIT ERROR LOG: {}", std::string(jitErrorLogBuffer))); + } + delete[] jitOptions; + delete[] jitOptVals; + delete[] jitLogBuffer; + delete[] jitErrorLogBuffer; + checkCudaErrors(cuda_jit_ret); +} + +} // namespace runner +} // namespace nmodl diff --git a/test/benchmark/cuda_driver.hpp b/test/benchmark/cuda_driver.hpp new file mode 100644 index 0000000000..3fd02fd55e --- /dev/null +++ b/test/benchmark/cuda_driver.hpp @@ -0,0 +1,187 @@ +/************************************************************************* + * Copyright (C) 2018-2022 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +/** + * \dir + * \brief Implementation of CUDA and NVVM-based execution engine to run functions from MOD files + * + * \file + * \brief \copybrief nmodl::runner::CUDADriver + */ + +#include +#include + +#include "llvm/IR/Module.h" + +#include "benchmark_info.hpp" +#include "cuda.h" +#include "cuda_runtime.h" +#include "gpu_parameters.hpp" + +using nmodl::cuda_details::GPUExecutionParameters; + +namespace nmodl { +namespace runner { + +struct DeviceInfo { + int count; + std::string name; + int compute_version_major; + int compute_version_minor; +}; + +/** + * @brief Throw meaningful error in case CUDA API call fails + * + * Checks whether a call to the CUDA API was succsful and if not it throws a runntime_error with + * the error message from CUDA. + * + * @param err Return value of the CUDA API call + */ +void checkCudaErrors(CUresult err); + +/** + * \class CUDADriver + * \brief Driver to execute a MOD file function via the CUDA JIT backend. + */ +class CUDADriver { + /// LLVM IR module to execute. + std::unique_ptr module; + CUdevice device; + CUmodule cudaModule; + CUcontext context; + CUfunction function; + CUlinkState linker; + DeviceInfo device_info; + std::string ptx_compiled_module; + + void checkCudaErrors(CUresult err); + void link_libraries(llvm::Module& module, BenchmarkInfo* benchmark_info); + + public: + explicit CUDADriver(std::unique_ptr m) + : module(std::move(m)) {} + + /// Initializes the CUDA GPU JIT driver. + void init(const codegen::Platform& platform, BenchmarkInfo* benchmark_info = nullptr); + + void launch_cuda_kernel(const std::string& entry_point, + const GPUExecutionParameters& gpu_execution_parameters, + void* kernel_parameters) { + // Get kernel function + checkCudaErrors(cuModuleGetFunction(&function, cudaModule, entry_point.c_str())); + + // Kernel launch + checkCudaErrors(cuLaunchKernel(function, + gpu_execution_parameters.gridDimX, + 1, + 1, + gpu_execution_parameters.blockDimX, + 1, + 1, + 0, + nullptr, + &kernel_parameters, + nullptr)); + auto asyncErr = cudaDeviceSynchronize(); + if (asyncErr != cudaSuccess) { + throw std::runtime_error( + fmt::format("CUDA Execution Error: {}\n", cudaGetErrorString(asyncErr))); + } + } + + /// Lookups the entry-point without arguments in the CUDA module and executes it. + void execute_without_arguments(const std::string& entry_point, + const GPUExecutionParameters& gpu_execution_parameters) { + launch_cuda_kernel(entry_point, gpu_execution_parameters, {}); + } + + /// Lookups the entry-point with arguments in the CUDA module and executes it. + template + void execute_with_arguments(const std::string& entry_point, + ArgType arg, + const GPUExecutionParameters& gpu_execution_parameters) { + launch_cuda_kernel(entry_point, gpu_execution_parameters, {&arg}); + } +}; + +/** + * \class BaseGPURunner + * \brief A base runner class that provides functionality to execute an + * entry point in the CUDA module. + */ +class BaseGPURunner { + protected: + std::unique_ptr driver; + + explicit BaseGPURunner(std::unique_ptr m) + : driver(std::make_unique(std::move(m))) {} + + public: + /// Sets up the CUDA driver. + virtual void initialize_driver(const codegen::Platform& platform) = 0; + + /// Runs the entry-point function without arguments. + void run_without_arguments(const std::string& entry_point, + const GPUExecutionParameters& gpu_execution_parameters) { + return driver->execute_without_arguments(entry_point, gpu_execution_parameters); + } + + /// Runs the entry-point function with a pointer to the data as an argument. + template + void run_with_argument(const std::string& entry_point, + ArgType arg, + const GPUExecutionParameters& gpu_execution_parameters) { + return driver->template execute_with_arguments(entry_point, arg, gpu_execution_parameters); + } +}; + +/** + * \class TestGPURunner + * \brief A simple runner for testing purposes. + */ +class TestGPURunner: public BaseGPURunner { + public: + explicit TestGPURunner(std::unique_ptr m) + : BaseGPURunner(std::move(m)) {} + + virtual void initialize_driver(const codegen::Platform& platform) { + driver->init(platform); + } +}; + +/** + * \class BenchmarkGPURunner + * \brief A runner with benchmarking functionality. It takes user-specified GPU + * features into account, as well as it can link against shared libraries. + */ +class BenchmarkGPURunner: public BaseGPURunner { + private: + /// Benchmarking information passed to JIT driver. + BenchmarkInfo benchmark_info; + + public: + BenchmarkGPURunner(std::unique_ptr m, + std::string filename, + std::string output_dir, + std::vector lib_paths = {}, + int opt_level_ir = 0, + int opt_level_codegen = 0) + : BaseGPURunner(std::move(m)) + , benchmark_info{filename, output_dir, lib_paths, opt_level_ir, opt_level_codegen} {} + + virtual void initialize_driver(const codegen::Platform& platform) { + driver->init(platform, &benchmark_info); + } +}; + + +} // namespace runner +} // namespace nmodl diff --git a/test/benchmark/gpu_parameters.hpp b/test/benchmark/gpu_parameters.hpp new file mode 100644 index 0000000000..5e72edb147 --- /dev/null +++ b/test/benchmark/gpu_parameters.hpp @@ -0,0 +1,27 @@ +/************************************************************************* + * Copyright (C) 2018-2022 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +/** + * \dir + * \brief GPU execution parameters struct + * + * \file + * \brief \copybrief nmodl::cuda_details::GPUExecutionParameters + */ + +namespace nmodl { +namespace cuda_details { + +struct GPUExecutionParameters { + int gridDimX; + int blockDimX; +}; + +} // namespace cuda_details +} // namespace nmodl diff --git a/test/benchmark/jit_driver.cpp b/test/benchmark/jit_driver.cpp new file mode 100644 index 0000000000..f91b41cda0 --- /dev/null +++ b/test/benchmark/jit_driver.cpp @@ -0,0 +1,205 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include "jit_driver.hpp" +#include "codegen/llvm/codegen_llvm_visitor.hpp" +#include "codegen/llvm/llvm_utils.hpp" +#include "utils/common_utils.hpp" + +#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/ObjectCache.h" +#include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" +#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" +#include "llvm/ExecutionEngine/Orc/LLJIT.h" +#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h" +#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" + +namespace nmodl { +namespace runner { + +/****************************************************************************************/ +/* Utilities for JIT driver */ +/****************************************************************************************/ + +/// Get the host CPU features in the format: +/// +feature,+feature,-feature,+feature,... +/// where `+` indicates that the feature is enabled. +std::string get_cpu_features(const std::string& cpu) { + llvm::SubtargetFeatures features; + llvm::StringMap host_features; + if (llvm::sys::getHostCPUFeatures(host_features)) { + for (auto& f: host_features) + features.AddFeature(f.first(), f.second); + } + return llvm::join(features.getFeatures().begin(), features.getFeatures().end(), ","); +} + +/// Sets the target triple and the data layout of the module. +static void set_triple_and_data_layout(llvm::Module& module, const std::string& cpu) { + // Get the default target triple for the host. + auto target_triple = llvm::sys::getDefaultTargetTriple(); + std::string error_msg; + auto* target = llvm::TargetRegistry::lookupTarget(target_triple, error_msg); + if (!target) + throw std::runtime_error("Error " + error_msg + "\n"); + + // Set a target machine to create the data layout. + std::string features = get_cpu_features(cpu); + std::unique_ptr tm( + target->createTargetMachine(target_triple, cpu, features, {}, {})); + if (!tm) + throw std::runtime_error("Error: could not create the target machine\n"); + + // Set data layout and the target triple to the module. + module.setDataLayout(tm->createDataLayout()); + module.setTargetTriple(target_triple); +} + +/// Creates llvm::TargetMachine with for a specified CPU. +static std::unique_ptr create_target( + llvm::orc::JITTargetMachineBuilder* tm_builder, + const std::string& cpu, + int opt_level) { + // First, look up the target. + std::string error_msg; + auto target_triple = tm_builder->getTargetTriple().getTriple(); + auto* target = llvm::TargetRegistry::lookupTarget(target_triple, error_msg); + if (!target) + throw std::runtime_error("Error " + error_msg + "\n"); + + // Create default target machine with provided features. + auto tm = target->createTargetMachine(target_triple, + cpu, + get_cpu_features(cpu), + tm_builder->getOptions(), + tm_builder->getRelocationModel(), + tm_builder->getCodeModel(), + static_cast(opt_level), + /*JIT=*/true); + if (!tm) + throw std::runtime_error("Error: could not create the target machine\n"); + + return std::unique_ptr(tm); +} + +/****************************************************************************************/ +/* JIT driver */ +/****************************************************************************************/ + +void JITDriver::init(const std::string& cpu, BenchmarkInfo* benchmark_info) { + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + utils::initialise_optimisation_passes(); + + // Set the target triple and the data layout for the module. + set_triple_and_data_layout(*module, cpu); + auto data_layout = module->getDataLayout(); + + // If benchmarking, enable listeners to use GDB, perf or VTune. Note that LLVM should be built + // with listeners on (e.g. -DLLVM_USE_PERF=ON). + if (benchmark_info) { + gdb_event_listener = llvm::JITEventListener::createGDBRegistrationListener(); +#if defined(NMODL_HAVE_JIT_EVENT_LISTENERS) + perf_event_listener = llvm::JITEventListener::createPerfJITEventListener(); + intel_event_listener = llvm::JITEventListener::createIntelJITEventListener(); +#endif + } + + // Create object linking function callback. + auto object_linking_layer_creator = [&](llvm::orc::ExecutionSession& session, + const llvm::Triple& triple) { + // Create linking layer. + auto layer = std::make_unique(session, []() { + return std::make_unique(); + }); + + // Register event listeners if they exist. + if (gdb_event_listener) + layer->registerJITEventListener(*gdb_event_listener); + if (perf_event_listener) + layer->registerJITEventListener(*perf_event_listener); + if (intel_event_listener) + layer->registerJITEventListener(*intel_event_listener); + + // If benchmarking, resolve shared libraries. + if (benchmark_info) { + for (const auto& lib_path: benchmark_info->shared_lib_paths) { + // For every library path, create a corresponding memory buffer. + auto memory_buffer = llvm::MemoryBuffer::getFile(lib_path); + if (!memory_buffer) + throw std::runtime_error("Unable to create memory buffer for " + lib_path); + + // Create a new JIT library instance for this session and resolve symbols. + auto& jd = session.createBareJITDylib(std::string(lib_path)); + auto loaded = + llvm::orc::DynamicLibrarySearchGenerator::Load(lib_path.data(), + data_layout.getGlobalPrefix()); + + if (!loaded) + throw std::runtime_error("Unable to load " + lib_path); + jd.addGenerator(std::move(*loaded)); + cantFail(layer->add(jd, std::move(*memory_buffer))); + } + } + return layer; + }; + + // Create IR compile function callback. + auto compile_function_creator = [&](llvm::orc::JITTargetMachineBuilder tm_builder) + -> llvm::Expected> { + // Create target machine with some features possibly turned off. + int opt_level_codegen = benchmark_info ? benchmark_info->opt_level_codegen : 0; + auto tm = create_target(&tm_builder, cpu, opt_level_codegen); + + // Optimise the LLVM IR module and save it to .ll file if benchmarking. + if (benchmark_info) { + utils::optimise_module(*module, benchmark_info->opt_level_ir, tm.get()); + const std::string filename = benchmark_info->output_dir + "/" + + benchmark_info->filename + "_opt"; + utils::save_ir_to_ll_file(*module, filename); + } + + return std::make_unique(std::move(tm)); + }; + + // Set the JIT instance. + auto jit_instance = cantFail(llvm::orc::LLJITBuilder() + .setCompileFunctionCreator(compile_function_creator) + .setObjectLinkingLayerCreator(object_linking_layer_creator) + .create()); + + // Add a ThreadSafeModule to the driver. + llvm::orc::ThreadSafeModule tsm(std::move(module), std::make_unique()); + cantFail(jit_instance->addIRModule(std::move(tsm))); + jit = std::move(jit_instance); + + // Resolve symbols. + llvm::orc::JITDylib& sym_tab = jit->getMainJITDylib(); + sym_tab.addGenerator(cantFail(llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess( + data_layout.getGlobalPrefix()))); + + // Optionally, dump the binary to the object file. + if (benchmark_info) { + std::string object_file = benchmark_info->filename + ".o"; + if (utils::file_exists(object_file)) { + int status = remove(object_file.c_str()); + if (status) { + throw std::runtime_error("Can not remove object file " + object_file); + } + } + jit->getObjTransformLayer().setTransform( + llvm::orc::DumpObjects(benchmark_info->output_dir, benchmark_info->filename)); + } +} +} // namespace runner +} // namespace nmodl diff --git a/test/benchmark/jit_driver.hpp b/test/benchmark/jit_driver.hpp new file mode 100644 index 0000000000..96b46a447c --- /dev/null +++ b/test/benchmark/jit_driver.hpp @@ -0,0 +1,155 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +/** + * \dir + * \brief Implementation of LLVM's JIT-based execution engine to run functions from MOD files + * + * \file + * \brief \copybrief nmodl::runner::JITDriver + */ + +#include "benchmark_info.hpp" + +#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/Orc/LLJIT.h" +#include "llvm/Support/Host.h" + +namespace nmodl { +namespace runner { + +/** + * \class JITDriver + * \brief Driver to execute a MOD file function via LLVM IR backend. + */ +class JITDriver { + private: + std::unique_ptr context = std::make_unique(); + + std::unique_ptr jit; + + /// LLVM IR module to execute. + std::unique_ptr module; + + /// GDB event listener. + llvm::JITEventListener* gdb_event_listener = nullptr; + + /// perf event listener. + llvm::JITEventListener* perf_event_listener = nullptr; + + /// Intel event listener. + llvm::JITEventListener* intel_event_listener = nullptr; + + public: + explicit JITDriver(std::unique_ptr m) + : module(std::move(m)) {} + + /// Initializes the JIT driver. + void init(const std::string& cpu, BenchmarkInfo* benchmark_info = nullptr); + + /// Lookups the entry-point without arguments in the JIT and executes it, returning the result. + template + ReturnType execute_without_arguments(const std::string& entry_point) { + auto expected_symbol = jit->lookup(entry_point); + if (!expected_symbol) + throw std::runtime_error("Error: entry-point symbol not found in JIT\n"); + + auto (*res)() = (ReturnType(*)())(intptr_t) expected_symbol->getAddress(); + ReturnType result = res(); + return result; + } + + /// Lookups the entry-point with an argument in the JIT and executes it, returning the result. + template + ReturnType execute_with_arguments(const std::string& entry_point, ArgType arg) { + auto expected_symbol = jit->lookup(entry_point); + if (!expected_symbol) + throw std::runtime_error("Error: entry-point symbol not found in JIT\n"); + + auto (*res)(ArgType) = (ReturnType(*)(ArgType))(intptr_t) expected_symbol->getAddress(); + ReturnType result = res(arg); + return result; + } +}; + +/** + * \class BaseRunner + * \brief A base runner class that provides functionality to execute an + * entry point in the LLVM IR module. + */ +class BaseRunner { + protected: + std::unique_ptr driver; + + explicit BaseRunner(std::unique_ptr m) + : driver(std::make_unique(std::move(m))) {} + + public: + /// Sets up the JIT driver. + virtual void initialize_driver() = 0; + + /// Runs the entry-point function without arguments. + template + ReturnType run_without_arguments(const std::string& entry_point) { + return driver->template execute_without_arguments(entry_point); + } + + /// Runs the entry-point function with a pointer to the data as an argument. + template + ReturnType run_with_argument(const std::string& entry_point, ArgType arg) { + return driver->template execute_with_arguments(entry_point, arg); + } +}; + +/** + * \class TestRunner + * \brief A simple runner for testing purposes. + */ +class TestRunner: public BaseRunner { + public: + explicit TestRunner(std::unique_ptr m) + : BaseRunner(std::move(m)) {} + + virtual void initialize_driver() { + driver->init(llvm::sys::getHostCPUName().str()); + } +}; + +/** + * \class BenchmarkRunner + * \brief A runner with benchmarking functionality. It takes user-specified CPU + * features into account, as well as it can link against shared libraries. + */ +class BenchmarkRunner: public BaseRunner { + private: + /// Benchmarking information passed to JIT driver. + BenchmarkInfo benchmark_info; + + /// CPU to target. + std::string cpu; + + public: + BenchmarkRunner(std::unique_ptr m, + std::string filename, + std::string output_dir, + std::string cpu, + std::vector lib_paths = {}, + int opt_level_ir = 0, + int opt_level_codegen = 0) + : BaseRunner(std::move(m)) + , cpu(cpu) + , benchmark_info{filename, output_dir, lib_paths, opt_level_ir, opt_level_codegen} {} + + virtual void initialize_driver() { + driver->init(cpu, &benchmark_info); + } +}; + +} // namespace runner +} // namespace nmodl diff --git a/test/benchmark/kernels/expsyn.mod b/test/benchmark/kernels/expsyn.mod new file mode 100644 index 0000000000..56ddde3b19 --- /dev/null +++ b/test/benchmark/kernels/expsyn.mod @@ -0,0 +1,42 @@ +NEURON { + POINT_PROCESS ExpSyn + RANGE tau, e, i + NONSPECIFIC_CURRENT i +} + +UNITS { + (nA) = (nanoamp) + (mV) = (millivolt) + (uS) = (microsiemens) +} + +PARAMETER { + tau = 0.1 (ms) <1e-9,1e9> + e = 0 (mV) +} + +ASSIGNED { + v (mV) + i (nA) +} + +STATE { + g (uS) +} + +INITIAL { + g=0 +} + +BREAKPOINT { + SOLVE state METHOD cnexp + i = g*(v - e) +} + +DERIVATIVE state { + g' = -g/tau +} + +NET_RECEIVE(weight (uS)) { + g = g + weight +} diff --git a/test/benchmark/kernels/hh.mod b/test/benchmark/kernels/hh.mod new file mode 100644 index 0000000000..053a15f43f --- /dev/null +++ b/test/benchmark/kernels/hh.mod @@ -0,0 +1,125 @@ +TITLE hh.mod squid sodium, potassium, and leak channels + +COMMENT + This is the original Hodgkin-Huxley treatment for the set of sodium, + potassium, and leakage channels found in the squid giant axon membrane. + ("A quantitative description of membrane current and its application + conduction and excitation in nerve" J.Physiol. (Lond.) 117:500-544 (1952).) + Membrane voltage is in absolute mV and has been reversed in polarity + from the original HH convention and shifted to reflect a resting potential + of -65 mV. + Remember to set celsius=6.3 (or whatever) in your HOC file. + See squid.hoc for an example of a simulation using this model. + SW Jaslove 6 March, 1992 +ENDCOMMENT + +UNITS { + (mA) = (milliamp) + (mV) = (millivolt) + (S) = (siemens) +} + +? interface +NEURON { + SUFFIX hh + USEION na READ ena WRITE ina + USEION k READ ek WRITE ik + NONSPECIFIC_CURRENT il + RANGE gnabar, gkbar, gl, el, gna, gk + :GLOBAL minf, hinf, ninf, mtau, htau, ntau + RANGE minf, hinf, ninf, mtau, htau, ntau + THREADSAFE : assigned GLOBALs will be per thread +} + +PARAMETER { + gnabar = .12 (S/cm2) <0,1e9> + gkbar = .036 (S/cm2) <0,1e9> + gl = .0003 (S/cm2) <0,1e9> + el = -54.3 (mV) +} + +STATE { + m h n +} + +ASSIGNED { + v (mV) + celsius (degC) + ena (mV) + ek (mV) + + gna (S/cm2) + gk (S/cm2) + ina (mA/cm2) + ik (mA/cm2) + il (mA/cm2) + minf hinf ninf + mtau (ms) htau (ms) ntau (ms) +} + +? currents +BREAKPOINT { + SOLVE states METHOD cnexp + gna = gnabar*m*m*m*h + ina = gna*(v - ena) + gk = gkbar*n*n*n*n + ik = gk*(v - ek) + il = gl*(v - el) +} + + +INITIAL { + rates(v) + m = minf + h = hinf + n = ninf +} + +? states +DERIVATIVE states { + rates(v) + m' = (minf-m)/mtau + h' = (hinf-h)/htau + n' = (ninf-n)/ntau +} + +:LOCAL q10 + + +? rates +PROCEDURE rates(v(mV)) { :Computes rate and other constants at current v. + :Call once from HOC to initialize inf at resting v. + LOCAL alpha, beta, sum, q10 +: TABLE minf, mtau, hinf, htau, ninf, ntau DEPEND celsius FROM -100 TO 100 WITH 200 + +UNITSOFF + q10 = 3^((celsius - 6.3)/10) + :"m" sodium activation system + alpha = .1 * vtrap(-(v+40),10) + beta = 4 * exp(-(v+65)/18) + sum = alpha + beta + mtau = 1/(q10*sum) + minf = alpha/sum + :"h" sodium inactivation system + alpha = .07 * exp(-(v+65)/20) + beta = 1 / (exp(-(v+35)/10) + 1) + sum = alpha + beta + htau = 1/(q10*sum) + hinf = alpha/sum + :"n" potassium activation system + alpha = .01*vtrap(-(v+55),10) + beta = .125*exp(-(v+65)/80) + sum = alpha + beta + ntau = 1/(q10*sum) + ninf = alpha/sum +} + +FUNCTION vtrap(x,y) { :Traps for 0 in denominator of rate eqns. + if (fabs(x/y) < 1e-6) { + vtrap = y*(1 - x/y/2) + }else{ + vtrap = x/(exp(x/y) - 1) + } +} + +UNITSON diff --git a/test/benchmark/llvm_benchmark.cpp b/test/benchmark/llvm_benchmark.cpp new file mode 100644 index 0000000000..3ccf84e5c0 --- /dev/null +++ b/test/benchmark/llvm_benchmark.cpp @@ -0,0 +1,130 @@ +/************************************************************************* + * Copyright (C) 2018-2021 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include +#include +#include + +#include "llvm_benchmark.hpp" +#include "test/benchmark/jit_driver.hpp" +#include "utils/logger.hpp" +#include "llvm/Support/Host.h" + +#include "test/unit/codegen/codegen_data_helper.hpp" + +#ifdef NMODL_LLVM_CUDA_BACKEND +#include "test/benchmark/cuda_driver.hpp" +#endif + +namespace nmodl { +namespace benchmark { + +BenchmarkResults LLVMBenchmark::run() { + // Set the codegen data helper and find the kernels. + auto codegen_data = codegen::CodegenDataHelper(llvm_visitor.get_instance_struct_ptr()); + std::vector kernel_names; + llvm_visitor.find_kernel_names(kernel_names); + + // Get feature's string and turn them off depending on the cpu. + std::string backend_name; +#ifdef NMODL_LLVM_CUDA_BACKEND + if (platform.is_CUDA_gpu()) { + backend_name = platform.get_name(); + } else { +#endif + backend_name = platform.get_name() == "default" ? llvm::sys::getHostCPUName().str() + : platform.get_name(); +#ifdef NMODL_LLVM_CUDA_BACKEND + } +#endif + logger->info("Backend: {}", backend_name); + + std::unique_ptr m = llvm_visitor.get_module(); + + // Create the benchmark runner and initialize it. +#ifdef NMODL_LLVM_CUDA_BACKEND + if (platform.is_CUDA_gpu()) { + std::string filename = "cuda_" + mod_filename; + cuda_runner = std::make_unique( + std::move(m), filename, output_dir, shared_libs, opt_level_ir, opt_level_codegen); + cuda_runner->initialize_driver(platform); + } else { +#endif + std::string filename = "v" + std::to_string(llvm_visitor.get_vector_width()) + "_" + + mod_filename; + cpu_runner = std::make_unique(std::move(m), + filename, + output_dir, + backend_name, + shared_libs, + opt_level_ir, + opt_level_codegen); + cpu_runner->initialize_driver(); +#ifdef NMODL_LLVM_CUDA_BACKEND + } +#endif + + BenchmarkResults results{}; + // Benchmark every kernel. + for (const auto& kernel_name: kernel_names) { + // For every kernel run the benchmark `num_experiments` times and collect runtimes. + auto times = std::vector(num_experiments, 0.0); + for (int i = 0; i < num_experiments; ++i) { + // Initialise the data. + auto instance_data = codegen_data.create_data(instance_size, /*seed=*/1); + + // Log instance size once. + if (i == 0) { + double size_mbs = instance_data.num_bytes / (1024.0 * 1024.0); + logger->info("Benchmarking kernel '{}' with {} MBs dataset", kernel_name, size_mbs); + } + + // Record the execution time of the kernel. + auto start = std::chrono::steady_clock::now(); +#ifdef NMODL_LLVM_CUDA_BACKEND + if (platform.is_CUDA_gpu()) { + cuda_runner->run_with_argument(kernel_name, + instance_data.base_ptr, + gpu_execution_parameters); + } else { +#endif + cpu_runner->run_with_argument(kernel_name, instance_data.base_ptr); +#ifdef NMODL_LLVM_CUDA_BACKEND + } +#endif + auto end = std::chrono::steady_clock::now(); + std::chrono::duration diff = end - start; + + // Log the time taken for each run. + logger->debug("Experiment {} compute time = {:.6f} sec", i, diff.count()); + + times[i] = diff.count(); + } + // Calculate statistics + double time_mean = std::accumulate(times.begin(), times.end(), 0.0) / num_experiments; + double time_var = std::accumulate(times.begin(), + times.end(), + 0.0, + [time_mean](const double& pres, const double& e) { + return (e - time_mean) * (e - time_mean); + }) / + num_experiments; + double time_stdev = std::sqrt(time_var); + double time_min = *std::min_element(times.begin(), times.end()); + double time_max = *std::max_element(times.begin(), times.end()); + // Log the average time taken for the kernel. + logger->info("Average compute time = {:.6f}", time_mean); + logger->info("Compute time standard deviation = {:8f}", time_stdev); + logger->info("Minimum compute time = {:.6f}", time_min); + logger->info("Maximum compute time = {:.6f}\n", time_max); + results[kernel_name] = {time_mean, time_stdev, time_min, time_max}; + } + return results; +} + +} // namespace benchmark +} // namespace nmodl diff --git a/test/benchmark/llvm_benchmark.hpp b/test/benchmark/llvm_benchmark.hpp new file mode 100644 index 0000000000..0d63bf78ba --- /dev/null +++ b/test/benchmark/llvm_benchmark.hpp @@ -0,0 +1,133 @@ +/************************************************************************* + * Copyright (C) 2018-2021 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +#include +#include +#include +#include + +#include "codegen/llvm/codegen_llvm_visitor.hpp" +#include "gpu_parameters.hpp" +#include "test/benchmark/jit_driver.hpp" +#include "utils/logger.hpp" + +#ifdef NMODL_LLVM_CUDA_BACKEND +#include "test/benchmark/cuda_driver.hpp" +#endif + +using nmodl::codegen::Platform; +using nmodl::cuda_details::GPUExecutionParameters; + +namespace nmodl { +namespace benchmark { + +/** + * map of {name: [avg, stdev, min, max]} + */ +using BenchmarkResults = std::map>; + +/** + * \class LLVMBenchmark + * \brief A wrapper to execute MOD file kernels via LLVM IR backend, and + * benchmark compile-time and runtime. + */ +class LLVMBenchmark { + private: + /// LLVM visitor. + codegen::CodegenLLVMVisitor& llvm_visitor; + + /// Source MOD file name. + std::string mod_filename; + + /// The output directory for logs and other files. + std::string output_dir; + + /// Paths to shared libraries. + std::vector shared_libs; + + /// The number of experiments to repeat. + int num_experiments; + + /// The size of the instance struct for benchmarking. + int instance_size; + + /// Target platform for the code generation. + Platform platform; + + /// The GPU execution parameters needed to configure the kernels' execution. + GPUExecutionParameters gpu_execution_parameters; + + /// Optimisation level for IR generation. + int opt_level_ir; + + /// Optimisation level for machine code generation. + int opt_level_codegen; + + /// Filestream for dumping logs to the file. + std::ofstream ofs; + + /// CPU benchmark runner + std::unique_ptr cpu_runner; + +#ifdef NMODL_LLVM_CUDA_BACKEND + /// CUDA benchmark runner + std::unique_ptr cuda_runner; +#endif + + public: + LLVMBenchmark(codegen::CodegenLLVMVisitor& llvm_visitor, + const std::string& mod_filename, + const std::string& output_dir, + std::vector shared_libs, + int num_experiments, + int instance_size, + const Platform& platform, + int opt_level_ir, + int opt_level_codegen) + : llvm_visitor(llvm_visitor) + , mod_filename(mod_filename) + , output_dir(output_dir) + , shared_libs(shared_libs) + , num_experiments(num_experiments) + , instance_size(instance_size) + , platform(platform) + , opt_level_ir(opt_level_ir) + , opt_level_codegen(opt_level_codegen) {} + LLVMBenchmark(codegen::CodegenLLVMVisitor& llvm_visitor, + const std::string& mod_filename, + const std::string& output_dir, + std::vector shared_libs, + int num_experiments, + int instance_size, + const Platform& platform, + int opt_level_ir, + int opt_level_codegen, + const GPUExecutionParameters& gpu_exec_params) + : llvm_visitor(llvm_visitor) + , mod_filename(mod_filename) + , output_dir(output_dir) + , shared_libs(shared_libs) + , num_experiments(num_experiments) + , instance_size(instance_size) + , platform(platform) + , opt_level_ir(opt_level_ir) + , opt_level_codegen(opt_level_codegen) + , gpu_execution_parameters(gpu_exec_params) {} + + /// Runs the main body of the benchmark, executing the compute kernels. + BenchmarkResults run(); + + private: + /// Sets the log output stream (file or console). + void set_log_output(); +}; + + +} // namespace benchmark +} // namespace nmodl diff --git a/test/integration/CMakeLists.txt b/test/integration/CMakeLists.txt index 82c72a6a40..a760ff96f8 100644 --- a/test/integration/CMakeLists.txt +++ b/test/integration/CMakeLists.txt @@ -11,5 +11,10 @@ file(GLOB modfiles "${NMODL_PROJECT_SOURCE_DIR}/test/integration/mod/*.mod") foreach(modfile ${modfiles}) get_filename_component(modfile_name "${modfile}" NAME) add_test(NAME ${modfile_name} COMMAND ${CMAKE_BINARY_DIR}/bin/nmodl ${modfile}) + add_test(NAME ${modfile_name}_oacc COMMAND ${PROJECT_BINARY_DIR}/bin/nmodl ${modfile} host --c + acc --oacc) + add_test(NAME ${modfile_name}_ispc COMMAND ${PROJECT_BINARY_DIR}/bin/nmodl ${modfile} host --ispc) cpp_cc_configure_sanitizers(TEST ${modfile_name}) + cpp_cc_configure_sanitizers(TEST ${modfile_name}_oacc) + cpp_cc_configure_sanitizers(TEST ${modfile_name}_ispc) endforeach() diff --git a/test/integration/mod/procedure.mod b/test/integration/mod/procedure.mod new file mode 100644 index 0000000000..daa4ad33ad --- /dev/null +++ b/test/integration/mod/procedure.mod @@ -0,0 +1,37 @@ +NEURON { + SUFFIX procedure_test + THREADSAFE +} + +PROCEDURE hello_world() { + printf("Hello World") +} + +PROCEDURE simple_sum(x, y) { + LOCAL z + z = x + y +} + +PROCEDURE complex_sum(v) { + LOCAL alpha, beta, sum + { + alpha = .1 * exp(-(v+40)) + beta = 4 * exp(-(v+65)/18) + sum = alpha + beta + } +} + +PROCEDURE loop_proc(v, t) { + LOCAL i + i = 0 + WHILE(i < 10) { + printf("Hello World") + i = i + 1 + } +} + +FUNCTION square(x) { + LOCAL res + res = x * x + square = res +} diff --git a/test/integration/mod/test_math.mod b/test/integration/mod/test_math.mod new file mode 100644 index 0000000000..6e3174a846 --- /dev/null +++ b/test/integration/mod/test_math.mod @@ -0,0 +1,16 @@ +NEURON { + SUFFIX test + RANGE x, y +} + +ASSIGNED { x y } + +STATE { m } + +BREAKPOINT { + SOLVE states METHOD cnexp +} + +DERIVATIVE states { + m = exp(y) + x ^ 107 + log(x) +} diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index a2340b0414..6242da5f14 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -71,6 +71,11 @@ add_executable( target_link_libraries(testmodtoken lexer util) target_link_libraries(testlexer lexer util) +target_link_libraries(testprinter printer util) +target_link_libraries(testsymtab symtab lexer util) +target_link_libraries(testunitlexer lexer util) +target_link_libraries(testunitparser lexer test_util config) + target_link_libraries( testparser visitor @@ -80,6 +85,7 @@ target_link_libraries( test_util printer ${NMODL_WRAPPER_LIBS}) + target_link_libraries( testvisitor visitor @@ -89,6 +95,7 @@ target_link_libraries( test_util printer ${NMODL_WRAPPER_LIBS}) + target_link_libraries( testcodegen codegen @@ -99,10 +106,53 @@ target_link_libraries( test_util printer ${NMODL_WRAPPER_LIBS}) -target_link_libraries(testprinter printer util) -target_link_libraries(testsymtab symtab lexer util) -target_link_libraries(testunitlexer lexer util) -target_link_libraries(testunitparser lexer test_util config) + +if(NMODL_ENABLE_LLVM) + include_directories(${LLVM_INCLUDE_DIRS} codegen) + + add_library(benchmark_data STATIC codegen/codegen_data_helper.cpp) + add_dependencies(benchmark_data lexer) + target_link_libraries(benchmark_data PRIVATE util) + + add_executable( + testllvm visitor/main.cpp codegen/codegen_llvm_ir.cpp codegen/codegen_data_helper.cpp + codegen/codegen_llvm_instance_struct.cpp codegen/codegen_llvm_visitor.cpp) + add_executable(test_llvm_runner visitor/main.cpp codegen/codegen_data_helper.cpp + codegen/codegen_llvm_execution.cpp) + if(NMODL_ENABLE_LLVM_CUDA) + target_link_libraries(benchmark_data PRIVATE CUDA::cudart) + target_link_libraries(testllvm CUDA::cudart) + target_link_libraries(test_llvm_runner CUDA::cudart) + endif() + target_link_libraries( + testllvm + Catch2::Catch2 + llvm_codegen + codegen + visitor + symtab + lexer + util + test_util + printer + ${NMODL_WRAPPER_LIBS} + ${LLVM_LIBS_TO_LINK}) + target_link_libraries( + test_llvm_runner + Catch2::Catch2 + llvm_codegen + llvm_benchmark + codegen + visitor + symtab + lexer + util + test_util + printer + ${NMODL_WRAPPER_LIBS} + ${LLVM_LIBS_TO_LINK}) + set(CODEGEN_TEST "testllvm;test_llvm_runner") +endif() # ============================================================================= # Use catch_discover instead of add_test for granular test result reporting. @@ -127,7 +177,8 @@ foreach( testnewton testfast_math testunitlexer - testunitparser) + testunitparser + ${CODEGEN_TEST}) target_link_libraries(${test_name} Catch2::Catch2) cpp_cc_configure_sanitizers(TARGET ${test_name}) diff --git a/test/unit/codegen/codegen_data_helper.cpp b/test/unit/codegen/codegen_data_helper.cpp new file mode 100644 index 0000000000..d2b17277bc --- /dev/null +++ b/test/unit/codegen/codegen_data_helper.cpp @@ -0,0 +1,216 @@ +#include + +#ifdef NMODL_LLVM_CUDA_BACKEND +#include +#endif + +#include "ast/codegen_var_type.hpp" +#include "codegen/llvm/codegen_llvm_helper_visitor.hpp" + +#include "codegen_data_helper.hpp" + +namespace nmodl { +namespace codegen { + +// scalar variables with default values +const double default_nthread_dt_value = 0.025; +const double default_nthread_t_value = 100.0; +const double default_celsius_value = 34.0; +const int default_second_order_value = 0; + +// cleanup all members and struct base pointer +CodegenInstanceData::~CodegenInstanceData() { + // first free num_ptr_members members which are pointers + for (size_t i = 0; i < num_ptr_members; i++) { +#ifdef NMODL_LLVM_CUDA_BACKEND + cudaFree(members[i]); +#else + free(members[i]); +#endif + } +// and then pointer to container struct +#ifdef NMODL_LLVM_CUDA_BACKEND + cudaFree(base_ptr); +#else + free(base_ptr); +#endif +} + +/** + * \todo : various things can be improved here + * - if variable is voltage then initialization range could be -65 to +65 + * - if variable is double or float then those could be initialize with + * "some" floating point value between range like 1.0 to 100.0. Note + * it would be nice to have unique values to avoid errors like division + * by zero. We have simple implementation that is taking care of this. + * - if variable is integer then initialization range must be between + * 0 and num_elements. In practice, num_elements is number of instances + * of a particular mechanism. This would be <= number of compartments + * in the cell. For now, just initialize integer variables from 0 to + * num_elements - 1. + */ +void initialize_variable(const std::shared_ptr& var, + void* ptr, + size_t initial_value, + size_t num_elements) { + ast::AstNodeType type = var->get_type()->get_type(); + const std::string& name = var->get_name()->get_node_name(); + + if (type == ast::AstNodeType::DOUBLE) { + const auto& generated_double_data = generate_dummy_data(initial_value, + num_elements); + double* data = (double*) ptr; + for (size_t i = 0; i < num_elements; i++) { + data[i] = generated_double_data[i]; + } + } else if (type == ast::AstNodeType::FLOAT) { + const auto& generated_float_data = generate_dummy_data(initial_value, num_elements); + float* data = (float*) ptr; + for (size_t i = 0; i < num_elements; i++) { + data[i] = generated_float_data[i]; + } + } else if (type == ast::AstNodeType::INTEGER) { + const auto& generated_int_data = generate_dummy_data(initial_value, num_elements); + int* data = (int*) ptr; + for (size_t i = 0; i < num_elements; i++) { + data[i] = generated_int_data[i]; + } + } else { + throw std::runtime_error("Unhandled data type during initialize_variable"); + }; +} + +CodegenInstanceData CodegenDataHelper::create_data(size_t num_elements, size_t seed) { + // alignment with 64-byte to generate aligned loads/stores + const unsigned NBYTE_ALIGNMENT = 64; + + // get variable information + const auto& variables = instance->get_codegen_vars(); + + // start building data + CodegenInstanceData data; + data.num_elements = num_elements; + + // base pointer to instance object + void* base = nullptr; + + // max size of each member : pointer / double has maximum size + size_t member_size = std::max(sizeof(double), sizeof(double*)); + +// allocate instance object with memory alignment +#ifdef NMODL_LLVM_CUDA_BACKEND + cudaMallocManaged(&base, member_size * variables.size()); +#else + posix_memalign(&base, NBYTE_ALIGNMENT, member_size * variables.size()); +#endif + + data.base_ptr = base; + data.num_bytes += member_size * variables.size(); + + size_t offset = 0; + void* ptr = base; + size_t variable_index = 0; + + // allocate each variable and allocate memory at particular offset in base pointer + for (auto& var: variables) { + // only process until first non-pointer variable + if (!var->get_is_pointer()) { + break; + } + + // check type of variable and it's size + size_t member_size = 0; + ast::AstNodeType type = var->get_type()->get_type(); + if (type == ast::AstNodeType::DOUBLE) { + member_size = sizeof(double); + } else if (type == ast::AstNodeType::FLOAT) { + member_size = sizeof(float); + } else if (type == ast::AstNodeType::INTEGER) { + member_size = sizeof(int); + } + + // allocate memory and setup a pointer + void* member; +#ifdef NMODL_LLVM_CUDA_BACKEND + cudaMallocManaged(&member, member_size * num_elements); +#else + posix_memalign(&member, NBYTE_ALIGNMENT, member_size * num_elements); +#endif + + // integer values are often offsets so they must start from + // 0 to num_elements-1 to avoid out of bound accesses. + int initial_value = variable_index; + if (type == ast::AstNodeType::INTEGER) { + initial_value = 0; + } + initialize_variable(var, member, initial_value, num_elements); + data.num_bytes += member_size * num_elements; + + // copy address at specific location in the struct + memcpy(ptr, &member, sizeof(double*)); + + data.offsets.push_back(offset); + data.members.push_back(member); + data.num_ptr_members++; + + // all pointer types are of same size, so just use double* + offset += sizeof(double*); + ptr = (char*) base + offset; + + variable_index++; + } + + // we are now switching from pointer type to next member type (e.g. double) + // ideally we should use padding but switching from double* to double should + // already meet alignment requirements + for (auto& var: variables) { + // process only scalar elements + if (var->get_is_pointer()) { + continue; + } + ast::AstNodeType type = var->get_type()->get_type(); + const std::string& name = var->get_name()->get_node_name(); + + // some default values for standard parameters + double value = 0; + if (name == naming::NTHREAD_DT_VARIABLE) { + value = default_nthread_dt_value; + } else if (name == naming::NTHREAD_T_VARIABLE) { + value = default_nthread_t_value; + } else if (name == naming::CELSIUS_VARIABLE) { + value = default_celsius_value; + } else if (name == CodegenLLVMHelperVisitor::NODECOUNT_VAR) { + value = num_elements; + } else if (name == naming::SECOND_ORDER_VARIABLE) { + value = default_second_order_value; + } + + if (type == ast::AstNodeType::DOUBLE) { + *((double*) ptr) = value; + data.offsets.push_back(offset); + data.members.push_back(ptr); + offset += sizeof(double); + ptr = (char*) base + offset; + } else if (type == ast::AstNodeType::FLOAT) { + *((float*) ptr) = float(value); + data.offsets.push_back(offset); + data.members.push_back(ptr); + offset += sizeof(float); + ptr = (char*) base + offset; + } else if (type == ast::AstNodeType::INTEGER) { + *((int*) ptr) = int(value); + data.offsets.push_back(offset); + data.members.push_back(ptr); + offset += sizeof(int); + ptr = (char*) base + offset; + } else { + throw std::runtime_error( + "Unhandled type while allocating data in CodegenDataHelper::create_data()"); + } + } + + return data; +} + +} // namespace codegen +} // namespace nmodl diff --git a/test/unit/codegen/codegen_data_helper.hpp b/test/unit/codegen/codegen_data_helper.hpp new file mode 100644 index 0000000000..c356a898ce --- /dev/null +++ b/test/unit/codegen/codegen_data_helper.hpp @@ -0,0 +1,110 @@ +/************************************************************************* + * Copyright (C) 2018-2021 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#pragma once + +#include + +#include "ast/ast.hpp" + +/// \file +/// \brief Generate test data for testing and benchmarking compute kernels + +namespace nmodl { +namespace codegen { + +/// common scalar variables +extern const double default_nthread_dt_value; +extern const double default_nthread_t_value; +extern const double default_celsius_value; +extern const int default_second_order_value; + +/** + * \class CodegenInstanceData + * \brief Wrapper class to pack data allocate for instance + */ +struct CodegenInstanceData { + /// base pointer which can be type casted + /// to instance struct at run time + void* base_ptr = nullptr; + + /// length of each member of pointer type + size_t num_elements = 0; + + /// number of pointer members + size_t num_ptr_members = 0; + + /// offset relative to base_ptr to locate + /// each member variable in instance struct + std::vector offsets; + + /// pointer to array allocated for each member variable + /// i.e. *(base_ptr + offsets[0]) will be members[0] + std::vector members; + + /// size in bytes + size_t num_bytes = 0; + + // cleanup all memory allocated for type and member variables + ~CodegenInstanceData(); +}; + + +/** + * Generate vector of dummy data according to the template type specified + * + * For double or float type: generate vector starting from `initial_value` + * with an increment of 1e-5. The increment can be any other + * value but 1e-5 is chosen because when we benchmark with + * a million elements then the values are in the range of + * . + * For int type: generate vector starting from initial_value with an + * increments of 1 + * + * \param inital_value Base value for initializing the data + * \param num_elements Number of element of the generated vector + * \return std::vector of dummy data for testing purposes + */ +template +std::vector generate_dummy_data(size_t initial_value, size_t num_elements) { + std::vector data(num_elements); + T increment; + if (std::is_same::value) { + increment = 1; + } else { + increment = 1e-5; + } + for (size_t i = 0; i < num_elements; i++) { + data[i] = initial_value + increment * i; + } + return data; +} + +/** + * \class CodegenDataHelper + * \brief Helper to allocate and initialize data for benchmarking + * + * The `ast::InstanceStruct` is has different number of member + * variables for different MOD files and hence we can't instantiate + * it at compile time. This class helps to inspect the variables + * information gathered from AST and allocate memory block that + * can be type cast to the `ast::InstanceStruct` corresponding + * to the MOD file. + */ +class CodegenDataHelper { + std::shared_ptr instance; + + public: + CodegenDataHelper() = delete; + CodegenDataHelper(const std::shared_ptr& instance) + : instance(instance) {} + + CodegenInstanceData create_data(size_t num_elements, size_t seed); +}; + +} // namespace codegen +} // namespace nmodl diff --git a/test/unit/codegen/codegen_llvm_execution.cpp b/test/unit/codegen/codegen_llvm_execution.cpp new file mode 100644 index 0000000000..2c9262902b --- /dev/null +++ b/test/unit/codegen/codegen_llvm_execution.cpp @@ -0,0 +1,934 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include +#include + +#include "ast/program.hpp" +#include "codegen/llvm/codegen_llvm_visitor.hpp" +#include "codegen_data_helper.hpp" +#include "parser/nmodl_driver.hpp" +#include "test/benchmark/jit_driver.hpp" +#include "visitors/checkparent_visitor.hpp" +#include "visitors/neuron_solve_visitor.hpp" +#include "visitors/solve_block_visitor.hpp" +#include "visitors/symtab_visitor.hpp" + +using namespace nmodl; +using namespace runner; +using namespace visitor; +using nmodl::parser::NmodlDriver; + +static double EPSILON = 1e-15; + +//============================================================================= +// Utilities for testing. +//============================================================================= + +struct InstanceTestInfo { + codegen::CodegenInstanceData* instance; + codegen::InstanceVarHelper helper; + int num_elements; +}; + +template +bool check_instance_variable(InstanceTestInfo& instance_info, + std::vector& expected, + const std::string& variable_name) { + std::vector actual; + int variable_index = instance_info.helper.get_variable_index(variable_name); + actual.assign(static_cast(instance_info.instance->members[variable_index]), + static_cast(instance_info.instance->members[variable_index]) + + instance_info.num_elements); + + // While we are comparing double types as well, for simplicity the test cases are hand-crafted + // so that no floating-point arithmetic is really involved. + return actual == expected; +} + +template +void initialise_instance_variable(InstanceTestInfo& instance_info, + std::vector& data, + const std::string& variable_name) { + int variable_index = instance_info.helper.get_variable_index(variable_name); + T* data_start = static_cast(instance_info.instance->members[variable_index]); + for (int i = 0; i < instance_info.num_elements; ++i) + *(data_start + i) = data[i]; +} + +//============================================================================= +// Simple functions: no optimisations +//============================================================================= + +SCENARIO("Arithmetic expression", "[llvm][runner]") { + GIVEN("Functions with some arithmetic expressions") { + std::string nmodl_text = R"( + FUNCTION exponential() { + LOCAL i + i = 1 + exponential = exp(i) + } + + FUNCTION constant() { + constant = 10 + } + + FUNCTION arithmetic() { + LOCAL x, y + x = 3 + y = 7 + arithmetic = x * y / (x + y) + } + + FUNCTION bar() { + LOCAL i, j + i = 2 + j = i + 2 + bar = 2 * 3 + j + } + + FUNCTION function_call() { + foo() + function_call = bar() / constant() + } + + PROCEDURE foo() {} + + FUNCTION with_argument(x) { + with_argument = x + } + + FUNCTION loop() { + LOCAL i, j, sum, result + result = 0 + j = 0 + WHILE (j < 2) { + i = 0 + sum = 0 + WHILE (i < 10) { + sum = sum + i + i = i + 1 + } + j = j + 1 + result = result + sum + } + loop = result + } + )"; + + + NmodlDriver driver; + const auto& ast = driver.parse_string(nmodl_text); + + SymtabVisitor().visit_program(*ast); + + codegen::Platform cpu_platform(/*use_single_precision=*/false, + /*instruction_width=*/1); + codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown", + /*output_dir=*/".", + cpu_platform, + /*opt_level_ir=*/0); + llvm_visitor.visit_program(*ast); + + std::unique_ptr m = llvm_visitor.get_module(); + TestRunner runner(std::move(m)); + runner.initialize_driver(); + + THEN("functions are evaluated correctly") { + auto exp_result = runner.run_without_arguments("exponential"); + REQUIRE(fabs(exp_result - 2.718281828459045) < EPSILON); + + auto constant_result = runner.run_without_arguments("constant"); + REQUIRE(fabs(constant_result - 10.0) < EPSILON); + + auto arithmetic_result = runner.run_without_arguments("arithmetic"); + REQUIRE(fabs(arithmetic_result - 2.1) < EPSILON); + + auto function_call_result = runner.run_without_arguments("function_call"); + REQUIRE(fabs(function_call_result - 1.0) < EPSILON); + + double data = 10.0; + auto with_argument_result = runner.run_with_argument("with_argument", + data); + REQUIRE(fabs(with_argument_result - 10.0) < EPSILON); + + auto loop_result = runner.run_without_arguments("loop"); + REQUIRE(fabs(loop_result - 90.0) < EPSILON); + } + } +} + +//============================================================================= +// Simple functions: with optimisations +//============================================================================= + +SCENARIO("Optimised arithmetic expression", "[llvm][runner]") { + GIVEN("Functions with some arithmetic expressions") { + std::string nmodl_text = R"( + FUNCTION exponential() { + LOCAL i + i = 1 + exponential = exp(i) + } + + FUNCTION constant() { + constant = 10 * 2 - 100 / 50 * 5 + } + + FUNCTION arithmetic() { + LOCAL x, y + x = 3 + y = 7 + arithmetic = x * y / (x + y) + } + + FUNCTION conditionals() { + LOCAL x, y, z + x = 100 + y = -100 + z = 0 + if (x == 200) { + conditionals = 1 + } else if (x == 400) { + conditionals = 2 + } else if (x == 100) { + if (y == -100 && z != 0) { + conditionals = 3 + } else { + if (y < -99 && z == 0) { + conditionals = 4 + } else { + conditionals = 5 + } + } + } else { + conditionals = 6 + } + } + + FUNCTION bar() { + LOCAL i, j + i = 2 + j = i + 2 + bar = 2 * 3 + j + } + + FUNCTION function_call() { + foo() + function_call = bar() / constant() + } + + PROCEDURE foo() {} + + )"; + + + NmodlDriver driver; + const auto& ast = driver.parse_string(nmodl_text); + + SymtabVisitor().visit_program(*ast); + + codegen::Platform cpu_platform(/*use_single_precision=*/false, + /*instruction_width=*/1); + codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown", + /*output_dir=*/".", + cpu_platform, + /*opt_level_ir=*/3); + llvm_visitor.visit_program(*ast); + + std::unique_ptr m = llvm_visitor.get_module(); + TestRunner runner(std::move(m)); + runner.initialize_driver(); + + THEN("optimizations preserve function results") { + // Check exponential is turned into a constant. + auto exp_result = runner.run_without_arguments("exponential"); + REQUIRE(fabs(exp_result - 2.718281828459045) < EPSILON); + + // Check constant folding. + auto constant_result = runner.run_without_arguments("constant"); + REQUIRE(fabs(constant_result - 10.0) < EPSILON); + + // Check nested conditionals + auto conditionals_result = runner.run_without_arguments("conditionals"); + REQUIRE(fabs(conditionals_result - 4.0) < EPSILON); + + // Check constant folding. + auto arithmetic_result = runner.run_without_arguments("arithmetic"); + REQUIRE(fabs(arithmetic_result - 2.1) < EPSILON); + + auto function_call_result = runner.run_without_arguments("function_call"); + REQUIRE(fabs(function_call_result - 1.0) < EPSILON); + } + } +} + +//============================================================================= +// State scalar kernel. +//============================================================================= + +SCENARIO("Simple scalar kernel", "[llvm][runner]") { + GIVEN("Simple MOD file with a state update") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + NONSPECIFIC_CURRENT i + RANGE x0, x1 + } + + STATE { + x + } + + ASSIGNED { + v + x0 + x1 + i (mA/cm2) + } + + BREAKPOINT { + SOLVE states METHOD cnexp + i = 0 + } + + DERIVATIVE states { + x = (x0 - x) / x1 + } + )"; + + + NmodlDriver driver; + const auto& ast = driver.parse_string(nmodl_text); + + // Run passes on the AST to generate LLVM. + SymtabVisitor().visit_program(*ast); + NeuronSolveVisitor().visit_program(*ast); + SolveBlockVisitor().visit_program(*ast); + + codegen::Platform cpu_platform(/*use_single_precision=*/false, + /*instruction_width=*/1); + codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown", + /*output_dir=*/".", + cpu_platform, + /*opt_level_ir=*/0, + /*add_debug_information=*/false, + /*fast_math_flags=*/{}, + /*wrap_kernel_functions=*/true); + llvm_visitor.visit_program(*ast); + + // Create the instance struct data. + int num_elements = 4; + const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr(); + auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct); + auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1); + + // Fill the instance struct data with some values. + std::vector x = {1.0, 2.0, 3.0, 4.0}; + std::vector x0 = {5.0, 5.0, 5.0, 5.0}; + std::vector x1 = {1.0, 1.0, 1.0, 1.0}; + + InstanceTestInfo instance_info{&instance_data, + llvm_visitor.get_instance_var_helper(), + num_elements}; + initialise_instance_variable(instance_info, x, "x"); + initialise_instance_variable(instance_info, x0, "x0"); + initialise_instance_variable(instance_info, x1, "x1"); + + // Set up the JIT runner. + std::unique_ptr module = llvm_visitor.get_module(); + TestRunner runner(std::move(module)); + runner.initialize_driver(); + + THEN("Values in struct have changed according to the formula") { + runner.run_with_argument("nrn_state_test", instance_data.base_ptr); + std::vector x_expected = {4.0, 3.0, 2.0, 1.0}; + REQUIRE(check_instance_variable(instance_info, x_expected, "x")); + } + } +} + +//============================================================================= +// State vectorised kernel with optimisations on. +//============================================================================= + +SCENARIO("Simple vectorised kernel", "[llvm][runner]") { + GIVEN("Simple MOD file with a state update") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + NONSPECIFIC_CURRENT i + RANGE x0, x1 + } + + STATE { + x y + } + + ASSIGNED { + v + x0 + x1 + i (mA/cm2) + } + + BREAKPOINT { + SOLVE states METHOD cnexp + i = 0 + } + + DERIVATIVE states { + x = (x0 - x) / x1 + y = v + } + )"; + + + NmodlDriver driver; + const auto& ast = driver.parse_string(nmodl_text); + + // Run passes on the AST to generate LLVM. + SymtabVisitor().visit_program(*ast); + NeuronSolveVisitor().visit_program(*ast); + SolveBlockVisitor().visit_program(*ast); + + codegen::Platform simd_cpu_platform(/*use_single_precision=*/true, + /*instruction_width=*/4); + codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown", + /*output_dir=*/".", + simd_cpu_platform, + /*opt_level_ir=*/3, + /*add_debug_information=*/false, + /*fast_math_flags=*/{}, + /*wrap_kernel_functions=*/true); + llvm_visitor.visit_program(*ast); + + // Create the instance struct data. + int num_elements = 10; + const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr(); + auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct); + auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1); + + // Fill the instance struct data with some values for unit testing. + std::vector x = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; + std::vector x0 = {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0}; + std::vector x1 = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + + std::vector voltage = {3.0, 4.0, 7.0, 1.0, 2.0, 5.0, 8.0, 6.0, 10.0, 9.0}; + std::vector node_index = {3, 4, 0, 1, 5, 7, 2, 6, 9, 8}; + + InstanceTestInfo instance_info{&instance_data, + llvm_visitor.get_instance_var_helper(), + num_elements}; + initialise_instance_variable(instance_info, x, "x"); + initialise_instance_variable(instance_info, x0, "x0"); + initialise_instance_variable(instance_info, x1, "x1"); + + initialise_instance_variable(instance_info, voltage, "voltage"); + initialise_instance_variable(instance_info, node_index, "node_index"); + + // Set up the JIT runner. + std::unique_ptr module = llvm_visitor.get_module(); + TestRunner runner(std::move(module)); + runner.initialize_driver(); + + THEN("Values in struct have changed according to the formula") { + runner.run_with_argument("nrn_state_test", instance_data.base_ptr); + // Check that the main and remainder loops correctly change the data stored in x. + std::vector x_expected = {10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0}; + REQUIRE(check_instance_variable(instance_info, x_expected, "x")); + + // Check that the gather load produces correct results in y: + // y[id] = voltage[node_index[id]] + std::vector y_expected = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; + REQUIRE(check_instance_variable(instance_info, y_expected, "y")); + } + } +} + +//============================================================================= +// Vectorised kernel with ion writes. +//============================================================================= + +SCENARIO("Vectorised kernel with scatter instruction", "[llvm][runner]") { + GIVEN("Simple MOD file with ion writes") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + USEION ca WRITE cai + } + + BREAKPOINT { + SOLVE states METHOD cnexp + } + + DERIVATIVE states { + : increment cai to test scatter + cai = cai + 1 + } + )"; + + + NmodlDriver driver; + const auto& ast = driver.parse_string(nmodl_text); + + // Run passes on the AST to generate LLVM. + SymtabVisitor().visit_program(*ast); + NeuronSolveVisitor().visit_program(*ast); + SolveBlockVisitor().visit_program(*ast); + + codegen::Platform simd_cpu_platform(/*use_single_precision=*/false, + /*instruction_width=*/2); + codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown", + /*output_dir=*/".", + simd_cpu_platform, + /*opt_level_ir=*/0, + /*add_debug_information=*/false, + /*fast_math_flags=*/{}, + /*wrap_kernel_functions=*/true); + llvm_visitor.visit_program(*ast); + + // Create the instance struct data. + int num_elements = 5; + const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr(); + auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct); + auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1); + + // Fill the instance struct data with some values. + std::vector cai = {1.0, 2.0, 3.0, 4.0, 5.0}; + std::vector ion_cai = {1.0, 2.0, 3.0, 4.0, 5.0}; + std::vector ion_cai_index = {4, 2, 3, 0, 1}; + + InstanceTestInfo instance_info{&instance_data, + llvm_visitor.get_instance_var_helper(), + num_elements}; + initialise_instance_variable(instance_info, cai, "cai"); + initialise_instance_variable(instance_info, ion_cai, "ion_cai"); + initialise_instance_variable(instance_info, ion_cai_index, "ion_cai_index"); + + // Set up the JIT runner. + std::unique_ptr module = llvm_visitor.get_module(); + TestRunner runner(std::move(module)); + runner.initialize_driver(); + + THEN("Ion values in struct have been updated correctly") { + runner.run_with_argument("nrn_state_test", instance_data.base_ptr); + // cai[id] = ion_cai[ion_cai_index[id]] + // cai[id] += 1 + std::vector cai_expected = {6.0, 4.0, 5.0, 2.0, 3.0}; + REQUIRE(check_instance_variable(instance_info, cai_expected, "cai")); + + // ion_cai[ion_cai_index[id]] = cai[id] + std::vector ion_cai_expected = {2.0, 3.0, 4.0, 5.0, 6.0}; + REQUIRE(check_instance_variable(instance_info, ion_cai_expected, "ion_cai")); + } + } +} + +//============================================================================= +// Vectorised kernel with control flow. +//============================================================================= + +SCENARIO("Vectorised kernel with simple control flow", "[llvm][runner]") { + GIVEN("Simple MOD file with if statement") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + } + + STATE { + w x y z + } + + BREAKPOINT { + SOLVE states METHOD cnexp + } + + DERIVATIVE states { + IF (v > 0) { + w = v * w + } + + IF (x < 0) { + x = 7 + } + + IF (0 <= y && y < 10 || z == 0) { + y = 2 * y + } ELSE { + z = z - y + } + + } + )"; + + + NmodlDriver driver; + const auto& ast = driver.parse_string(nmodl_text); + + // Run passes on the AST to generate LLVM. + SymtabVisitor().visit_program(*ast); + NeuronSolveVisitor().visit_program(*ast); + SolveBlockVisitor().visit_program(*ast); + + codegen::Platform simd_cpu_platform(/*use_single_precision=*/false, + /*instruction_width=*/2); + codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown", + /*output_dir=*/".", + simd_cpu_platform, + /*opt_level_ir=*/0, + /*add_debug_information=*/false, + /*fast_math_flags=*/{}, + /*wrap_kernel_functions=*/true); + llvm_visitor.visit_program(*ast); + + // Create the instance struct data. + int num_elements = 5; + const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr(); + auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct); + auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1); + + // Fill the instance struct data with some values. + std::vector x = {-1.0, 2.0, -3.0, 4.0, -5.0}; + std::vector y = {11.0, 2.0, -3.0, 4.0, 100.0}; + std::vector z = {0.0, 1.0, 20.0, 0.0, 40.0}; + + std::vector w = {10.0, 20.0, 30.0, 40.0, 50.0}; + std::vector voltage = {-1.0, 2.0, -1.0, 2.0, -1.0}; + std::vector node_index = {1, 2, 3, 4, 0}; + + InstanceTestInfo instance_info{&instance_data, + llvm_visitor.get_instance_var_helper(), + num_elements}; + initialise_instance_variable(instance_info, w, "w"); + initialise_instance_variable(instance_info, voltage, "voltage"); + initialise_instance_variable(instance_info, node_index, "node_index"); + + initialise_instance_variable(instance_info, x, "x"); + initialise_instance_variable(instance_info, y, "y"); + initialise_instance_variable(instance_info, z, "z"); + + // Set up the JIT runner. + std::unique_ptr module = llvm_visitor.get_module(); + TestRunner runner(std::move(module)); + runner.initialize_driver(); + + THEN("Masked instructions are generated") { + runner.run_with_argument("nrn_state_test", instance_data.base_ptr); + std::vector w_expected = {20.0, 20.0, 60.0, 40.0, 50.0}; + REQUIRE(check_instance_variable(instance_info, w_expected, "w")); + + std::vector x_expected = {7.0, 2.0, 7.0, 4.0, 7.0}; + REQUIRE(check_instance_variable(instance_info, x_expected, "x")); + + std::vector y_expected = {22.0, 4.0, -3.0, 8.0, 100.0}; + std::vector z_expected = {0.0, 1.0, 23.0, 0.0, -60.0}; + REQUIRE(check_instance_variable(instance_info, y_expected, "y")); + REQUIRE(check_instance_variable(instance_info, z_expected, "z")); + } + } +} + +//============================================================================= +// Kernel with atomic updates. +//============================================================================= + +SCENARIO("Kernel with atomic updates", "[llvm][runner]") { + GIVEN("An atomic update") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + USEION na READ ena WRITE ina + USEION ka READ eka WRITE ika + } + + STATE { } + + ASSIGNED { + v (mV) + ena (mV) + ina (mA/cm2) + } + + BREAKPOINT { } + + DERIVATIVE states { } + + : The atomic update that we want to check is: + : + : ion_ina_id = mech->ion_ina_index[id] + : ion_ika_id = mech->ion_ika_index[id] + : mech->ion_ina[ion_ina_id] += mech->ina[id] + : mech->ion_ika[ion_ika_id] += mech->ika[id] + )"; + + + NmodlDriver driver; + const auto& ast = driver.parse_string(nmodl_text); + + // Run passes on the AST to generate LLVM. + SymtabVisitor().visit_program(*ast); + NeuronSolveVisitor().visit_program(*ast); + SolveBlockVisitor().visit_program(*ast); + + codegen::Platform simd_cpu_platform(/*use_single_precision=*/false, + /*instruction_width=*/4); + codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown", + /*output_dir=*/".", + simd_cpu_platform, + /*opt_level_ir=*/3, + /*add_debug_information=*/false, + /*fast_math_flags=*/{}, + /*wrap_kernel_functions=*/true); + llvm_visitor.visit_program(*ast); + + // Create the instance struct data. + int num_elements = 5; + const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr(); + auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct); + auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1); + + // With these indices ion_ina[1] = 1 + 2 + 3 + 4 + 5 = 15. + std::vector ion_ina_index = {1, 1, 1, 1, 1}; + std::vector ion_ina = {0.0, 0.0, 0.0, 0.0, 0.0}; + std::vector ina = {1.0, 2.0, 3.0, 4.0, 5.0}; + + // With these indices: + // ion_ika[1] = 3 + 4 = 7. + // ion_ika[2] = 1 + 20 = 21. + // ion_ika[3] = -5 + 5 = 0. + std::vector ion_ika_index = {2, 2, 1, 1, 3}; + std::vector ion_ika = {0.0, 0.0, 0.0, -5.0, 0.0}; + std::vector ika = {1.0, 20.0, 3.0, 4.0, 5.0}; + + InstanceTestInfo instance_info{&instance_data, + llvm_visitor.get_instance_var_helper(), + num_elements}; + + initialise_instance_variable(instance_info, ion_ina_index, "ion_ina_index"); + initialise_instance_variable(instance_info, ion_ina, "ion_ina"); + initialise_instance_variable(instance_info, ina, "ina"); + initialise_instance_variable(instance_info, ion_ika_index, "ion_ika_index"); + initialise_instance_variable(instance_info, ion_ika, "ion_ika"); + initialise_instance_variable(instance_info, ika, "ika"); + + // Set up the JIT runner. + std::unique_ptr module = llvm_visitor.get_module(); + TestRunner runner(std::move(module)); + runner.initialize_driver(); + + THEN("updates are commputed correctly with vector instructions and optimizations on") { + runner.run_with_argument("nrn_cur_test", instance_data.base_ptr); + // Recall: + // ion_ina_id = mech->ion_ina_index[id] + // ion_ika_id = mech->ion_ika_index[id] + // mech->ion_ina[ion_ina_id] += mech->ina[id] + // mech->ion_ika[ion_ika_id] += mech->ika[id] + std::vector ion_ina_expected = {0.0, 15.0, 0.0, 0.0, 0.0}; + REQUIRE(check_instance_variable(instance_info, ion_ina_expected, "ion_ina")); + + std::vector ion_ika_expected = {0.0, 7.0, 21.0, 0.0, 0.0}; + REQUIRE(check_instance_variable(instance_info, ion_ika_expected, "ion_ika")); + } + } + + GIVEN("Another atomic update") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + USEION na READ ena WRITE ina + USEION ka READ eka WRITE ika + } + + STATE { } + + ASSIGNED { + v (mV) + ena (mV) + ina (mA/cm2) + } + + BREAKPOINT { } + + DERIVATIVE states { } + + : The atomic update that we want to check is again: + : + : ion_ina_id = mech->ion_ina_index[id] + : ion_ika_id = mech->ion_ika_index[id] + : mech->ion_ina[ion_ina_id] += mech->ina[id] + : mech->ion_ika[ion_ika_id] += mech->ika[id] + )"; + + + NmodlDriver driver; + const auto& ast = driver.parse_string(nmodl_text); + + // Run passes on the AST to generate LLVM. + SymtabVisitor().visit_program(*ast); + NeuronSolveVisitor().visit_program(*ast); + SolveBlockVisitor().visit_program(*ast); + + codegen::Platform simd_cpu_platform(/*use_single_precision=*/false, + /*instruction_width=*/2); + codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown", + /*output_dir=*/".", + simd_cpu_platform, + /*opt_level_ir=*/0, + /*add_debug_information=*/false, + /*fast_math_flags=*/{}, + /*wrap_kernel_functions=*/true); + llvm_visitor.visit_program(*ast); + + // Create the instance struct data. + int num_elements = 6; + const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr(); + auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct); + auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1); + + // With these indices ion_ina[1] = 1 + 3 + 5 = 9. + // With these indices ion_ina[4] = 2 + 4 + 6 = 12. + std::vector ion_ina_index = {1, 4, 1, 4, 1, 4}; + std::vector ion_ina = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + std::vector ina = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + + // With these indices: + // ion_ika[1] = 3 + 4 + 5 = 12. + // ion_ika[2] = 1 + 20 + 6 = 27. + std::vector ion_ika_index = {2, 2, 1, 1, 1, 2}; + std::vector ion_ika = {0.0, 0.0, 0.0, -5.0, 0.0, 0.0}; + std::vector ika = {1.0, 20.0, 3.0, 4.0, 5.0, 6.0}; + + InstanceTestInfo instance_info{&instance_data, + llvm_visitor.get_instance_var_helper(), + num_elements}; + + initialise_instance_variable(instance_info, ion_ina_index, "ion_ina_index"); + initialise_instance_variable(instance_info, ion_ina, "ion_ina"); + initialise_instance_variable(instance_info, ina, "ina"); + initialise_instance_variable(instance_info, ion_ika_index, "ion_ika_index"); + initialise_instance_variable(instance_info, ion_ika, "ion_ika"); + initialise_instance_variable(instance_info, ika, "ika"); + + // Set up the JIT runner. + std::unique_ptr module = llvm_visitor.get_module(); + TestRunner runner(std::move(module)); + runner.initialize_driver(); + + THEN("Atomic updates are correct without optimizations") { + runner.run_with_argument("nrn_cur_test", instance_data.base_ptr); + // Recall: + // ion_ina_id = mech->ion_ina_index[id] + // ion_ika_id = mech->ion_ika_index[id] + // mech->ion_ina[ion_ina_id] += mech->ina[id] + // mech->ion_ika[ion_ika_id] += mech->ika[id] + std::vector ion_ina_expected = {0.0, 9.0, 0.0, 0.0, 12.0, 0.0}; + REQUIRE(check_instance_variable(instance_info, ion_ina_expected, "ion_ina")); + + std::vector ion_ika_expected = {0.0, 12.0, 27.0, -5.0, 0.0, 0.0}; + REQUIRE(check_instance_variable(instance_info, ion_ika_expected, "ion_ika")); + } + } + + GIVEN("Atomic updates of rhs and d") { + std::string nmodl_text = R"( + NEURON { + POINT_PROCESS test + USEION na READ ena WRITE ina + USEION ka READ eka WRITE ika + } + + STATE { } + + ASSIGNED { + v (mV) + ena (mV) + ina (mA/cm2) + } + + BREAKPOINT { } + + DERIVATIVE states { } + + : The atomic update that we want to check is again: + : + : node_id = mech->node_index[id] + : mech->vec_rhs[node_id] -= rhs + : mech->vec_d[node_id] -= g + )"; + + + NmodlDriver driver; + const auto& ast = driver.parse_string(nmodl_text); + + // Run passes on the AST to generate LLVM. + SymtabVisitor().visit_program(*ast); + NeuronSolveVisitor().visit_program(*ast); + SolveBlockVisitor().visit_program(*ast); + + codegen::Platform simd_cpu_platform(/*use_single_precision=*/false, + /*instruction_width=*/2); + codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"unknown", + /*output_dir=*/".", + simd_cpu_platform, + /*opt_level_ir=*/0, + /*add_debug_information=*/false, + /*fast_math_flags=*/{}, + /*wrap_kernel_functions=*/true); + llvm_visitor.visit_program(*ast); + + // Create the instance struct data. + int num_elements = 6; + const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr(); + auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct); + auto instance_data = codegen_data.create_data(num_elements, /*seed=*/1); + + // With these indices vec_rhs[1] = -0.2-1.e2/1.5*2-1.e2/3.4*6-1.e2/5.2*10 = + // -502.3116138763197. + // With these indices vec_rhs[4] = + // -0.54-1.e2/2.3*22.0-1.e2/4.1*8.0-1.e2/6.0*12.0 = -1351.103690349947. + // vec_d remains the same because the contribution of g each time is 0.0. + std::vector node_index = {1, 4, 1, 4, 1, 4}; + std::vector ina = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + std::vector ika = {1.0, 20.0, 3.0, 4.0, 5.0, 6.0}; + std::vector vec_rhs = {0.64, -0.2, 1.1, 0.42, 0.54, -0.36}; + std::vector vec_d = {1.6, 2.5, 3.4, 4.3, 5.2, 6.1}; + std::vector node_area_index = {0, 1, 2, 3, 4, 5}; + std::vector node_area = {1.5, 2.3, 3.4, 4.1, 5.2, 6.0}; + + InstanceTestInfo instance_info{&instance_data, + llvm_visitor.get_instance_var_helper(), + num_elements}; + + initialise_instance_variable(instance_info, node_index, "node_index"); + initialise_instance_variable(instance_info, ina, "ina"); + initialise_instance_variable(instance_info, ika, "ika"); + initialise_instance_variable(instance_info, vec_rhs, "vec_rhs"); + initialise_instance_variable(instance_info, vec_d, "vec_d"); + initialise_instance_variable(instance_info, node_area_index, "node_area_index"); + initialise_instance_variable(instance_info, node_area, "node_area"); + + // Set up the JIT runner. + std::unique_ptr module = llvm_visitor.get_module(); + TestRunner runner(std::move(module)); + runner.initialize_driver(); + + THEN("Atomic updates are correct") { + runner.run_with_argument("nrn_cur_test", instance_data.base_ptr); + // Recall: + // node_id = mech->node_index[id] + // mech->vec_rhs[node_id] -= rhs + // mech->vec_d[node_id] -= g + std::vector vec_rhs_expected = { + 0.64, -502.3116138763197, 1.1, 0.42, -1351.103690349947, -0.36}; + REQUIRE(check_instance_variable(instance_info, vec_rhs_expected, "vec_rhs")); + + std::vector vec_d_expected = {1.6, 2.5, 3.4, 4.3, 5.2, 6.1}; + REQUIRE(check_instance_variable(instance_info, vec_d_expected, "vec_d")); + } + } +} diff --git a/test/unit/codegen/codegen_llvm_instance_struct.cpp b/test/unit/codegen/codegen_llvm_instance_struct.cpp new file mode 100644 index 0000000000..32fb9a1b6d --- /dev/null +++ b/test/unit/codegen/codegen_llvm_instance_struct.cpp @@ -0,0 +1,185 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include + +#include "ast/all.hpp" +#include "ast/program.hpp" +#include "codegen/llvm/codegen_llvm_visitor.hpp" +#include "codegen_data_helper.hpp" +#include "parser/nmodl_driver.hpp" +#include "visitors/checkparent_visitor.hpp" +#include "visitors/neuron_solve_visitor.hpp" +#include "visitors/solve_block_visitor.hpp" +#include "visitors/symtab_visitor.hpp" + +using namespace nmodl; +using namespace codegen; +using namespace visitor; +using nmodl::parser::NmodlDriver; + +//============================================================================= +// Utility to get initialized Struct Instance data +//============================================================================= + +codegen::CodegenInstanceData generate_instance_data(const std::string& text, + int opt_level = 0, + bool use_single_precision = false, + int vector_width = 1, + size_t num_elements = 100, + size_t seed = 1) { + NmodlDriver driver; + const auto& ast = driver.parse_string(text); + + // Generate full AST and solve the BREAKPOINT block to be able to generate the Instance Struct + SymtabVisitor().visit_program(*ast); + NeuronSolveVisitor().visit_program(*ast); + + codegen::Platform cpu_platform(use_single_precision, vector_width); + codegen::CodegenLLVMVisitor llvm_visitor(/*mod_filename=*/"test", + /*output_dir=*/".", + cpu_platform, + opt_level); + llvm_visitor.visit_program(*ast); + llvm_visitor.dump_module(); + const auto& generated_instance_struct = llvm_visitor.get_instance_struct_ptr(); + auto codegen_data = codegen::CodegenDataHelper(generated_instance_struct); + auto instance_data = codegen_data.create_data(num_elements, seed); + return instance_data; +} + +template +bool compare(void* instance_struct_data_ptr, const std::vector& generated_data) { + std::vector instance_struct_vector; + std::cout << "Generated data size: " << generated_data.size() << std::endl; + instance_struct_vector.assign(static_cast(instance_struct_data_ptr), + static_cast(instance_struct_data_ptr) + + generated_data.size()); + for (auto value: instance_struct_vector) { + std::cout << value << std::endl; + } + return instance_struct_vector == generated_data; +} + +//============================================================================= +// Simple Instance Struct creation +//============================================================================= + +SCENARIO("Instance Struct creation", "[visitor][llvm][instance_struct]") { + GIVEN("Instantiate simple Instance Struct") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + USEION na READ ena + RANGE minf, mtau + } + + STATE { + m + } + + ASSIGNED { + v (mV) + celsius (degC) + ena (mV) + minf + mtau + } + + BREAKPOINT { + SOLVE states METHOD cnexp + } + + DERIVATIVE states { + m' = (minf-m)/mtau + } + )"; + + + THEN("instance struct elements are properly initialized") { + const size_t num_elements = 10; + constexpr static double seed = 42; + auto instance_data = generate_instance_data(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/false, + /*vector_width*/ 1, + num_elements, + seed); + size_t minf_index = 0; + size_t mtau_index = 1; + size_t m_index = 2; + size_t Dm_index = 3; + size_t ena_index = 4; + size_t v_unused_index = 5; + size_t g_unused_index = 6; + size_t ion_ena_index = 7; + size_t ion_ena_index_index = 8; + size_t voltage_index = 9; + size_t node_index_index = 10; + size_t rhs_index = 11; + size_t d_index = 12; + size_t rhs_shadow_index = 13; + size_t d_shadow_index = 14; + size_t t_index = 15; + size_t dt_index = 16; + size_t celsius_index = 17; + size_t secondorder_index = 18; + size_t node_count_index = 19; + // Check if the various instance struct fields are properly initialized + REQUIRE(compare(instance_data.members[minf_index], + generate_dummy_data(minf_index, num_elements))); + REQUIRE(compare(instance_data.members[ena_index], + generate_dummy_data(ena_index, num_elements))); + REQUIRE(compare(instance_data.members[ion_ena_index], + generate_dummy_data(ion_ena_index, num_elements))); + // index variables are offsets, they start from 0 + REQUIRE(compare(instance_data.members[ion_ena_index_index], + generate_dummy_data(0, num_elements))); + REQUIRE(compare(instance_data.members[node_index_index], + generate_dummy_data(0, num_elements))); + + REQUIRE(*static_cast(instance_data.members[t_index]) == + default_nthread_t_value); + REQUIRE(*static_cast(instance_data.members[node_count_index]) == num_elements); + + // Hard code TestInstanceType struct + struct TestInstanceType { + double* minf; + double* mtau; + double* m; + double* Dm; + double* ena; + double* v_unused; + double* g_unused; + double* ion_ena; + int* ion_ena_index; + double* voltage; + int* node_index; + double* vec_rhs; + double* vec_d; + double* _shadow_rhs; + double* _shadow_d; + double t; + double dt; + double celsius; + int secondorder; + int node_count; + }; + // Test if TestInstanceType struct is properly initialized + // Cast void ptr instance_data.base_ptr to TestInstanceType* + TestInstanceType* instance = (TestInstanceType*) instance_data.base_ptr; + REQUIRE(compare(instance->minf, generate_dummy_data(minf_index, num_elements))); + REQUIRE(compare(instance->ena, generate_dummy_data(ena_index, num_elements))); + REQUIRE(compare(instance->ion_ena, + generate_dummy_data(ion_ena_index, num_elements))); + REQUIRE(compare(instance->node_index, generate_dummy_data(0, num_elements))); + REQUIRE(instance->t == default_nthread_t_value); + REQUIRE(instance->celsius == default_celsius_value); + REQUIRE(instance->secondorder == default_second_order_value); + } + } +} diff --git a/test/unit/codegen/codegen_llvm_ir.cpp b/test/unit/codegen/codegen_llvm_ir.cpp new file mode 100644 index 0000000000..ff7601d05f --- /dev/null +++ b/test/unit/codegen/codegen_llvm_ir.cpp @@ -0,0 +1,1941 @@ +/************************************************************************* + * Copyright (C) 2018-2020 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include +#include + +#include "test/unit/utils/test_utils.hpp" + +#include "ast/program.hpp" +#include "ast/statement_block.hpp" +#include "codegen/llvm/codegen_llvm_helper_visitor.hpp" +#include "codegen/llvm/codegen_llvm_visitor.hpp" +#include "parser/nmodl_driver.hpp" +#include "visitors/checkparent_visitor.hpp" +#include "visitors/inline_visitor.hpp" +#include "visitors/neuron_solve_visitor.hpp" +#include "visitors/solve_block_visitor.hpp" +#include "visitors/symtab_visitor.hpp" +#include "visitors/visitor_utils.hpp" + +using namespace nmodl; +using namespace codegen; +using namespace visitor; + +using namespace test_utils; + +using nmodl::parser::NmodlDriver; + +//============================================================================= +// Utility to get LLVM module as a string +//============================================================================= + +std::string run_gpu_llvm_visitor(const std::string& text, + int opt_level = 0, + bool use_single_precision = false, + std::string math_library = "none", + bool nmodl_inline = false) { + NmodlDriver driver; + const auto& ast = driver.parse_string(text); + + SymtabVisitor().visit_program(*ast); + if (nmodl_inline) { + InlineVisitor().visit_program(*ast); + } + NeuronSolveVisitor().visit_program(*ast); + SolveBlockVisitor().visit_program(*ast); + + codegen::Platform gpu_platform( + codegen::PlatformID::GPU, /*name=*/"nvptx64", math_library, use_single_precision, 1); + codegen::CodegenLLVMVisitor llvm_visitor( + /*mod_filename=*/"unknown", + /*output_dir=*/".", + gpu_platform, + opt_level, + /*add_debug_information=*/false); + + llvm_visitor.visit_program(*ast); + return llvm_visitor.dump_module(); +} + +std::string run_llvm_visitor(const std::string& text, + int opt_level = 0, + bool use_single_precision = false, + int vector_width = 1, + std::string vec_lib = "none", + std::vector fast_math_flags = {}, + bool nmodl_inline = false) { + NmodlDriver driver; + const auto& ast = driver.parse_string(text); + + SymtabVisitor().visit_program(*ast); + if (nmodl_inline) { + InlineVisitor().visit_program(*ast); + } + NeuronSolveVisitor().visit_program(*ast); + SolveBlockVisitor().visit_program(*ast); + + codegen::Platform cpu_platform( + codegen::PlatformID::CPU, /*name=*/"default", vec_lib, use_single_precision, vector_width); + codegen::CodegenLLVMVisitor llvm_visitor( + /*mod_filename=*/"unknown", + /*output_dir=*/".", + cpu_platform, + opt_level, + /*add_debug_information=*/false, + fast_math_flags); + + llvm_visitor.visit_program(*ast); + return llvm_visitor.dump_module(); +} + +//============================================================================= +// Utility to get specific NMODL AST nodes +//============================================================================= + +std::vector> run_llvm_visitor_helper( + const std::string& text, + codegen::Platform& platform, + const std::vector& nodes_to_collect) { + NmodlDriver driver; + const auto& ast = driver.parse_string(text); + + SymtabVisitor().visit_program(*ast); + SolveBlockVisitor().visit_program(*ast); + CodegenLLVMHelperVisitor(platform).visit_program(*ast); + + const auto& nodes = collect_nodes(*ast, nodes_to_collect); + + return nodes; +} + +//============================================================================= +// BinaryExpression and Double +//============================================================================= + +SCENARIO("Binary expression", "[visitor][llvm]") { + GIVEN("Procedure with addition of its arguments") { + std::string nmodl_text = R"( + PROCEDURE add(a, b) { + LOCAL i + i = a + b + } + )"; + + THEN("variables are loaded and add instruction is created") { + std::string module_string = + run_llvm_visitor(nmodl_text, /*opt_level=*/0, /*use_single_precision=*/true); + std::smatch m; + + std::regex rhs(R"(%1 = load float, float\* %b)"); + std::regex lhs(R"(%2 = load float, float\* %a)"); + std::regex res(R"(%3 = fadd float %2, %1)"); + + // Check the float values are loaded correctly and added. + REQUIRE(std::regex_search(module_string, m, rhs)); + REQUIRE(std::regex_search(module_string, m, lhs)); + REQUIRE(std::regex_search(module_string, m, res)); + } + } + + GIVEN("Procedure with multiple binary operators") { + std::string nmodl_text = R"( + PROCEDURE multiple(a, b) { + LOCAL i + i = (a - b) / (a + b) + } + )"; + + THEN("variables are processed from rhs first") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check rhs. + std::regex rr(R"(%1 = load double, double\* %b)"); + std::regex rl(R"(%2 = load double, double\* %a)"); + std::regex x(R"(%3 = fadd double %2, %1)"); + REQUIRE(std::regex_search(module_string, m, rr)); + REQUIRE(std::regex_search(module_string, m, rl)); + REQUIRE(std::regex_search(module_string, m, x)); + + // Check lhs. + std::regex lr(R"(%4 = load double, double\* %b)"); + std::regex ll(R"(%5 = load double, double\* %a)"); + std::regex y(R"(%6 = fsub double %5, %4)"); + REQUIRE(std::regex_search(module_string, m, lr)); + REQUIRE(std::regex_search(module_string, m, ll)); + REQUIRE(std::regex_search(module_string, m, y)); + + // Check result. + std::regex res(R"(%7 = fdiv double %6, %3)"); + REQUIRE(std::regex_search(module_string, m, res)); + } + } + + GIVEN("Procedure with assignment") { + std::string nmodl_text = R"( + PROCEDURE assignment() { + LOCAL i + i = 2 + } + )"; + + THEN("double constant is stored into i") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check store immediate is created. + std::regex allocation(R"(%i = alloca double)"); + std::regex assignment(R"(store double 2.0*e\+00, double\* %i)"); + REQUIRE(std::regex_search(module_string, m, allocation)); + REQUIRE(std::regex_search(module_string, m, assignment)); + } + } + + GIVEN("Function with power operator") { + std::string nmodl_text = R"( + FUNCTION power() { + LOCAL i, j + i = 2 + j = 4 + power = i ^ j + } + )"; + + THEN("'pow' intrinsic is created") { + std::string module_string = + run_llvm_visitor(nmodl_text, /*opt_level=*/0, /*use_single_precision=*/true); + std::smatch m; + + // Check 'pow' intrinsic. + std::regex declaration(R"(declare float @llvm\.pow\.f32\(float, float\))"); + std::regex pow(R"(call float @llvm\.pow\.f32\(float %.*, float %.*\))"); + REQUIRE(std::regex_search(module_string, m, declaration)); + REQUIRE(std::regex_search(module_string, m, pow)); + } + } +} + +//============================================================================= +// Define +//============================================================================= + +SCENARIO("Define", "[visitor][llvm]") { + GIVEN("Procedure with array variable of length specified by DEFINE") { + std::string nmodl_text = R"( + DEFINE N 100 + + PROCEDURE foo() { + LOCAL x[N] + } + )"; + + THEN("macro is expanded and array is allocated") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check stack allocations for i and j + std::regex array(R"(%x = alloca \[100 x double\])"); + REQUIRE(std::regex_search(module_string, m, array)); + } + } +} + +//============================================================================= +// If/Else statements and comparison operators +//============================================================================= + +SCENARIO("Comparison", "[visitor][llvm]") { + GIVEN("Procedure with comparison operators") { + std::string nmodl_text = R"( + PROCEDURE foo(x) { + if (x < 10) { + + } else if (x >= 10 && x <= 100) { + + } else if (x == 120) { + + } else if (!(x != 200)) { + + } + } + )"; + + THEN("correct LLVM instructions are produced") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check less than. + std::regex lt(R"(fcmp olt double %(.+), 1\.000000e\+01)"); + REQUIRE(std::regex_search(module_string, m, lt)); + + // Check greater or equal than and logical and. + std::regex ge(R"(fcmp ole double %(.+), 1\.000000e\+02)"); + std::regex logical_and(R"(and i1 %(.+), %(.+))"); + REQUIRE(std::regex_search(module_string, m, ge)); + REQUIRE(std::regex_search(module_string, m, logical_and)); + + // Check equals. + std::regex eq(R"(fcmp oeq double %(.+), 1\.200000e\+02)"); + REQUIRE(std::regex_search(module_string, m, eq)); + + // Check not equals. + std::regex ne(R"(fcmp one double %(.+), 2\.000000e\+02)"); + REQUIRE(std::regex_search(module_string, m, ne)); + } + } +} + +SCENARIO("If/Else", "[visitor][llvm]") { + GIVEN("Function with only if statement") { + std::string nmodl_text = R"( + FUNCTION foo(y) { + LOCAL x + x = 100 + if (y == 20) { + x = 20 + } + foo = x + y + } + )"; + + THEN("correct LLVM instructions are produced") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + std::regex cond_br( + "br i1 %2, label %3, label %4\n" + "\n" + "3:.*\n" + " store double 2\\.000000e\\+01, double\\* %x.*\n" + " br label %4\n" + "\n" + "4:"); + REQUIRE(std::regex_search(module_string, m, cond_br)); + } + } + + GIVEN("Function with both if and else statements") { + std::string nmodl_text = R"( + FUNCTION sign(x) { + LOCAL s + if (x < 0) { + s = -1 + } else { + s = 1 + } + sign = s + } + )"; + + THEN("correct LLVM instructions are produced") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + std::regex if_else_br( + "br i1 %2, label %3, label %4\n" + "\n" + "3:.*\n" + " store double -1\\.000000e\\+00, double\\* %s.*\n" + " br label %5\n" + "\n" + "4:.*\n" + " store double 1\\.000000e\\+00, double\\* %s.*\n" + " br label %5\n" + "\n" + "5:"); + REQUIRE(std::regex_search(module_string, m, if_else_br)); + } + } + + GIVEN("Function with both if and else if statements") { + std::string nmodl_text = R"( + FUNCTION bar(x) { + LOCAL s + s = -1 + if (x <= 0) { + s = 0 + } else if (0 < x && x <= 1) { + s = 1 + } + bar = s + } + )"; + + THEN("correct LLVM instructions are produced") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + std::regex if_else_if( + "br i1 %2, label %3, label %4\n" + "\n" + "3:.*\n" + " .*\n" + " br label %12\n" + "\n" + "4:.*\n" + " .*\n" + " .*\n" + " .*\n" + " .*\n" + " %.+ = and i1 %.+, %.+\n" + " br i1 %.+, label %10, label %11\n" + "\n" + "10:.*\n" + " .*\n" + " br label %11\n" + "\n" + "11:.*\n" + " br label %12\n" + "\n" + "12:"); + REQUIRE(std::regex_search(module_string, m, if_else_if)); + } + } + + GIVEN("Function with if, else if anf else statements") { + std::string nmodl_text = R"( + FUNCTION bar(x) { + LOCAL s + if (x <= 0) { + s = 0 + } else if (0 < x && x <= 1) { + s = 1 + } else { + s = 100 + } + bar = s + } + )"; + + THEN("correct LLVM instructions are produced") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + std::regex if_else_if_else( + "br i1 %2, label %3, label %4\n" + "\n" + "3:.*\n" + " .*\n" + " br label %13\n" + "\n" + "4:.*\n" + " .*\n" + " .*\n" + " .*\n" + " .*\n" + " %9 = and i1 %.+, %.+\n" + " br i1 %9, label %10, label %11\n" + "\n" + "10:.*\n" + " .*\n" + " br label %12\n" + "\n" + "11:.*\n" + " .*\n" + " br label %12\n" + "\n" + "12:.*\n" + " br label %13\n" + "\n" + "13:"); + REQUIRE(std::regex_search(module_string, m, if_else_if_else)); + } + } +} + +//============================================================================= +// FunctionBlock +//============================================================================= + +SCENARIO("Function", "[visitor][llvm]") { + GIVEN("Simple function with arguments") { + std::string nmodl_text = R"( + FUNCTION foo(x) { + foo = x + } + )"; + + THEN("function is produced with arguments allocated on stack and a return instruction") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check function signature. The return type should be the default double type. + std::regex function_signature(R"(define double @foo\(double %x[0-9].*\) \{)"); + REQUIRE(std::regex_search(module_string, m, function_signature)); + + // Check that function arguments are allocated on the local stack. + std::regex alloca_instr(R"(%x = alloca double)"); + std::regex store_instr(R"(store double %x[0-9].*, double\* %x)"); + REQUIRE(std::regex_search(module_string, m, alloca_instr)); + REQUIRE(std::regex_search(module_string, m, store_instr)); + + // Check the return variable has also been allocated. + std::regex ret_instr(R"(%ret_foo = alloca double)"); + + // Check that the return value has been loaded and passed to terminator. + std::regex loaded(R"(%2 = load double, double\* %ret_foo)"); + std::regex terminator(R"(ret double %2)"); + REQUIRE(std::regex_search(module_string, m, loaded)); + REQUIRE(std::regex_search(module_string, m, terminator)); + } + } +} + +//============================================================================= +// FunctionCall +//============================================================================= + +SCENARIO("Function call", "[visitor][llvm]") { + GIVEN("A call to procedure") { + std::string nmodl_text = R"( + PROCEDURE bar() {} + FUNCTION foo() { + bar() + } + )"; + + THEN("an int call instruction is created") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check for call instruction. + std::regex call(R"(call i32 @bar\(\))"); + REQUIRE(std::regex_search(module_string, m, call)); + } + } + + GIVEN("A call to function declared below the caller") { + std::string nmodl_text = R"( + FUNCTION foo(x) { + foo = 4 * bar() + } + FUNCTION bar() { + bar = 5 + } + )"; + + THEN("a correct call instruction is created") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check for call instruction. + std::regex call(R"(%[0-9]+ = call double @bar\(\))"); + REQUIRE(std::regex_search(module_string, m, call)); + } + } + + GIVEN("A call to function with arguments") { + std::string nmodl_text = R"( + FUNCTION foo(x, y) { + foo = 4 * x - y + } + FUNCTION bar(i) { + bar = foo(i, 4) + } + )"; + + THEN("arguments are processed before the call and passed to call instruction") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check correct arguments. + std::regex i(R"(%1 = load double, double\* %i)"); + std::regex call(R"(call double @foo\(double %1, double 4.000000e\+00\))"); + REQUIRE(std::regex_search(module_string, m, i)); + REQUIRE(std::regex_search(module_string, m, call)); + } + } + + GIVEN("A call to external method") { + std::string nmodl_text = R"( + FUNCTION nmodl_ceil(x) { + nmodl_ceil = ceil(x) + } + + FUNCTION nmodl_cos(x) { + nmodl_cos = cos(x) + } + + FUNCTION nmodl_exp(x) { + nmodl_exp = exp(x) + } + + FUNCTION nmodl_fabs(x) { + nmodl_fabs = fabs(x) + } + + FUNCTION nmodl_floor(x) { + nmodl_floor = floor(x) + } + + FUNCTION nmodl_log(x) { + nmodl_log = log(x) + } + + FUNCTION nmodl_log10(x) { + nmodl_log10 = log10(x) + } + + FUNCTION nmodl_pow(x, y) { + nmodl_pow = pow(x, y) + } + + FUNCTION nmodl_sin(x) { + nmodl_sin = sin(x) + } + + FUNCTION nmodl_sqrt(x) { + nmodl_sqrt = sqrt(x) + } + )"; + + THEN("LLVM intrinsic corresponding to this method is created") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check for intrinsic declarations. + std::regex ceil(R"(declare double @llvm\.ceil\.f64\(double\))"); + std::regex cos(R"(declare double @llvm\.cos\.f64\(double\))"); + std::regex exp(R"(declare double @llvm\.exp\.f64\(double\))"); + std::regex fabs(R"(declare double @llvm\.fabs\.f64\(double\))"); + std::regex floor(R"(declare double @llvm\.floor\.f64\(double\))"); + std::regex log(R"(declare double @llvm\.log\.f64\(double\))"); + std::regex log10(R"(declare double @llvm\.log10\.f64\(double\))"); + std::regex pow(R"(declare double @llvm\.pow\.f64\(double, double\))"); + std::regex sin(R"(declare double @llvm\.sin\.f64\(double\))"); + std::regex sqrt(R"(declare double @llvm\.sqrt\.f64\(double\))"); + REQUIRE(std::regex_search(module_string, m, ceil)); + REQUIRE(std::regex_search(module_string, m, cos)); + REQUIRE(std::regex_search(module_string, m, exp)); + REQUIRE(std::regex_search(module_string, m, fabs)); + REQUIRE(std::regex_search(module_string, m, floor)); + REQUIRE(std::regex_search(module_string, m, log)); + REQUIRE(std::regex_search(module_string, m, log10)); + REQUIRE(std::regex_search(module_string, m, pow)); + REQUIRE(std::regex_search(module_string, m, sin)); + REQUIRE(std::regex_search(module_string, m, sqrt)); + + // Check the correct call is made. + std::regex ceil_call(R"(call double @llvm\.ceil\.f64\(double %[0-9]+\))"); + std::regex cos_call(R"(call double @llvm\.cos\.f64\(double %[0-9]+\))"); + std::regex exp_call(R"(call double @llvm\.exp\.f64\(double %[0-9]+\))"); + std::regex fabs_call(R"(call double @llvm\.fabs\.f64\(double %[0-9]+\))"); + std::regex floor_call(R"(call double @llvm\.floor\.f64\(double %[0-9]+\))"); + std::regex log_call(R"(call double @llvm\.log\.f64\(double %[0-9]+\))"); + std::regex log10_call(R"(call double @llvm\.log10\.f64\(double %[0-9]+\))"); + std::regex pow_call(R"(call double @llvm\.pow\.f64\(double %[0-9]+, double %[0-9]+\))"); + std::regex sin_call(R"(call double @llvm\.sin\.f64\(double %[0-9]+\))"); + std::regex sqrt_call(R"(call double @llvm\.sqrt\.f64\(double %[0-9]+\))"); + REQUIRE(std::regex_search(module_string, m, ceil_call)); + REQUIRE(std::regex_search(module_string, m, cos_call)); + REQUIRE(std::regex_search(module_string, m, exp_call)); + REQUIRE(std::regex_search(module_string, m, fabs_call)); + REQUIRE(std::regex_search(module_string, m, floor_call)); + REQUIRE(std::regex_search(module_string, m, log_call)); + REQUIRE(std::regex_search(module_string, m, log10_call)); + REQUIRE(std::regex_search(module_string, m, pow_call)); + REQUIRE(std::regex_search(module_string, m, sin_call)); + REQUIRE(std::regex_search(module_string, m, sqrt_call)); + } + } + + GIVEN("A call to printf") { + std::string nmodl_text = R"( + PROCEDURE bar() { + LOCAL i + i = 0 + printf("foo") + printf("bar %d", i) + } + )"; + + THEN("printf is declared and global string values are created") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check for global string values. + std::regex str1( + R"(@[0-9]+ = private unnamed_addr constant \[6 x i8\] c\"\\22foo\\22\\00\")"); + std::regex str2( + R"(@[0-9]+ = private unnamed_addr constant \[9 x i8\] c\"\\22bar %d\\22\\00\")"); + REQUIRE(std::regex_search(module_string, m, str1)); + REQUIRE(std::regex_search(module_string, m, str2)); + + // Check for printf declaration. + std::regex declaration(R"(declare i32 @printf\(i8\*, \.\.\.\))"); + REQUIRE(std::regex_search(module_string, m, declaration)); + + // Check the correct calls are made. + std::regex call1( + R"(call i32 \(i8\*, \.\.\.\) @printf\(i8\* getelementptr inbounds \(\[6 x i8\], \[6 x i8\]\* @[0-9]+, i32 0, i32 0\)\))"); + std::regex call2( + R"(call i32 \(i8\*, \.\.\.\) @printf\(i8\* getelementptr inbounds \(\[9 x i8\], \[9 x i8\]\* @[0-9]+, i32 0, i32 0\), double %[0-9]+\))"); + REQUIRE(std::regex_search(module_string, m, call1)); + REQUIRE(std::regex_search(module_string, m, call2)); + } + } + + GIVEN("A call to function with the wrong number of arguments") { + std::string nmodl_text = R"( + FUNCTION foo(x, y) { + foo = 4 * x - y + } + FUNCTION bar(i) { + bar = foo(i) + } + )"; + + THEN("a runtime error is thrown") { + REQUIRE_THROWS_AS(run_llvm_visitor(nmodl_text), std::runtime_error); + } + } +} + +//============================================================================= +// IndexedName +//============================================================================= + +SCENARIO("Indexed name", "[visitor][llvm]") { + GIVEN("Procedure with a local array variable") { + std::string nmodl_text = R"( + PROCEDURE foo() { + LOCAL x[2] + } + )"; + + THEN("array is allocated") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + std::regex array(R"(%x = alloca \[2 x double\])"); + REQUIRE(std::regex_search(module_string, m, array)); + } + } + + GIVEN("Procedure with a local array assignment") { + std::string nmodl_text = R"( + PROCEDURE foo() { + LOCAL x[2] + x[10 - 10] = 1 + x[1] = 3 + } + )"; + + THEN("element is stored to the array") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check GEPs are created correctly to get the addresses of array elements. + std::regex GEP1( + R"(%1 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i64 0, i64 0)"); + std::regex GEP2( + R"(%2 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i64 0, i64 1)"); + REQUIRE(std::regex_search(module_string, m, GEP1)); + REQUIRE(std::regex_search(module_string, m, GEP2)); + + // Check the value is stored to the correct addresses. + std::regex store1(R"(store double 1.000000e\+00, double\* %1)"); + std::regex store2(R"(store double 3.000000e\+00, double\* %2)"); + REQUIRE(std::regex_search(module_string, m, store1)); + REQUIRE(std::regex_search(module_string, m, store2)); + } + } + + GIVEN("Procedure with a assignment of array element") { + std::string nmodl_text = R"( + PROCEDURE foo() { + LOCAL x[2], y + x[1] = 3 + y = x[1] + } + )"; + + THEN("array element is stored to the variable") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check GEP is created correctly to pint at array element. + std::regex GEP( + R"(%2 = getelementptr inbounds \[2 x double\], \[2 x double\]\* %x, i64 0, i64 1)"); + REQUIRE(std::regex_search(module_string, m, GEP)); + + // Check the value is loaded from the pointer. + std::regex load(R"(%3 = load double, double\* %2)"); + REQUIRE(std::regex_search(module_string, m, load)); + + // Check the value is stored to the the variable. + std::regex store(R"(store double %3, double\* %y)"); + REQUIRE(std::regex_search(module_string, m, store)); + } + } +} + +//============================================================================= +// LocalList and LocalVar +//============================================================================= + +SCENARIO("Local variable", "[visitor][llvm]") { + GIVEN("Procedure with some local variables") { + std::string nmodl_text = R"( + PROCEDURE local() { + LOCAL i, j + } + )"; + + THEN("local variables are allocated on the stack") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check stack allocations for i and j + std::regex i(R"(%i = alloca double)"); + std::regex j(R"(%j = alloca double)"); + REQUIRE(std::regex_search(module_string, m, i)); + REQUIRE(std::regex_search(module_string, m, j)); + } + } +} + +//============================================================================= +// ProcedureBlock +//============================================================================= + +SCENARIO("Procedure", "[visitor][llvm]") { + GIVEN("Empty procedure with no arguments") { + std::string nmodl_text = R"( + PROCEDURE empty() {} + )"; + + THEN("a function returning 0 integer is produced") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check procedure has empty body with a dummy 0 allocation. + std::regex signature(R"(define i32 @empty)"); + std::regex alloc(R"(%ret_empty = alloca i32)"); + std::regex store(R"(store i32 0, i32\* %ret_empty)"); + std::regex load(R"(%1 = load i32, i32\* %ret_empty)"); + std::regex ret(R"(ret i32 %1)"); + REQUIRE(std::regex_search(module_string, m, signature)); + REQUIRE(std::regex_search(module_string, m, alloc)); + REQUIRE(std::regex_search(module_string, m, store)); + REQUIRE(std::regex_search(module_string, m, load)); + REQUIRE(std::regex_search(module_string, m, ret)); + } + } + + GIVEN("Empty procedure with arguments") { + std::string nmodl_text = R"( + PROCEDURE with_argument(x) {} + )"; + + THEN("int function is produced with arguments allocated on stack") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check procedure signature. + std::regex function_signature(R"(define i32 @with_argument\(double %x[0-9].*\) \{)"); + REQUIRE(std::regex_search(module_string, m, function_signature)); + + // Check dummy return. + std::regex dummy_alloca(R"(%ret_with_argument = alloca i32)"); + std::regex dummy_store(R"(store i32 0, i32\* %ret_with_argument)"); + std::regex dummy_load(R"(%1 = load i32, i32\* %ret_with_argument)"); + std::regex ret(R"(ret i32 %1)"); + REQUIRE(std::regex_search(module_string, m, dummy_alloca)); + REQUIRE(std::regex_search(module_string, m, dummy_store)); + REQUIRE(std::regex_search(module_string, m, dummy_load)); + REQUIRE(std::regex_search(module_string, m, ret)); + + // Check that procedure arguments are allocated on the local stack. + std::regex alloca_instr(R"(%x = alloca double)"); + std::regex store_instr(R"(store double %x[0-9].*, double\* %x)"); + REQUIRE(std::regex_search(module_string, m, alloca_instr)); + REQUIRE(std::regex_search(module_string, m, store_instr)); + } + } +} + +//============================================================================= +// UnaryExpression +//============================================================================= + +SCENARIO("Unary expression", "[visitor][llvm]") { + GIVEN("Procedure with negation") { + std::string nmodl_text = R"( + PROCEDURE negation(a) { + LOCAL i + i = -a + } + )"; + + THEN("fneg instruction is created") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + std::regex allocation(R"(%1 = load double, double\* %a)"); + REQUIRE(std::regex_search(module_string, m, allocation)); + + // llvm v9 and llvm v11 implementation for negation + std::regex negation_v9(R"(%2 = fsub double -0.000000e\+00, %1)"); + std::regex negation_v11(R"(fneg double %1)"); + bool result = std::regex_search(module_string, m, negation_v9) || + std::regex_search(module_string, m, negation_v11); + REQUIRE(result == true); + } + } +} + +//============================================================================= +// WhileStatement +//============================================================================= + +SCENARIO("While", "[visitor][llvm]") { + GIVEN("Procedure with a simple while loop") { + std::string nmodl_text = R"( + FUNCTION loop() { + LOCAL i + i = 0 + WHILE (i < 10) { + i = i + 1 + } + loop = 0 + } + )"; + + THEN("correct loop is created") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + std::regex loop( + " br label %1\n" + "\n" + "1:.*\n" + " %2 = load double, double\\* %i.*\n" + " %3 = fcmp olt double %2, 1\\.000000e\\+01\n" + " br i1 %3, label %4, label %7\n" + "\n" + "4:.*\n" + " %5 = load double, double\\* %i.*\n" + " %6 = fadd double %5, 1\\.000000e\\+00\n" + " store double %6, double\\* %i.*\n" + " br label %1\n" + "\n" + "7:.*\n" + " store double 0\\.000000e\\+00, double\\* %ret_loop.*\n"); + // Check that 3 blocks are created: header, body and exit blocks. Also, there must be + // a backedge from the body to the header. + REQUIRE(std::regex_search(module_string, m, loop)); + } + } +} + +//============================================================================= +// State scalar kernel +//============================================================================= + +SCENARIO("Scalar state kernel", "[visitor][llvm]") { + GIVEN("A neuron state update") { + std::string nmodl_text = R"( + NEURON { + SUFFIX hh + NONSPECIFIC_CURRENT il + RANGE minf, mtau, gl, el + } + + STATE { + m + } + + PARAMETER { + gl = .0003 (S/cm2) <0,1e9> + el = -54.3 (mV) + } + + ASSIGNED { + v (mV) + minf + mtau (ms) + il (mA/cm2) + } + + BREAKPOINT { + SOLVE states METHOD cnexp + il = gl * (v - el) + } + + DERIVATIVE states { + m = (minf-m) / mtau + } + )"; + + THEN("a kernel with instance struct as an argument and a FOR loop is created") { + std::string module_string = run_llvm_visitor(nmodl_text); + std::smatch m; + + // Check the struct type with correct attributes and the kernel declaration. + std::regex struct_type( + "%.*__instance_var__type = type \\{ double\\*, double\\*, double\\*, double\\*, " + "double\\*, double\\*, double\\*, double\\*, double\\*, double\\*, i32\\*, " + "double\\*, double\\*, double\\*, double\\*, double, double, double, i32, i32 \\}"); + std::regex kernel_declaration( + R"(define void @nrn_state_hh\(%.*__instance_var__type\* noalias nocapture readonly .*\) #0)"); + REQUIRE(std::regex_search(module_string, m, struct_type)); + REQUIRE(std::regex_search(module_string, m, kernel_declaration)); + + // Check kernel attributes. + std::regex kernel_attributes(R"(attributes #0 = \{ nofree nounwind \})"); + REQUIRE(std::regex_search(module_string, m, kernel_attributes)); + + // Check for correct variables initialisation and a branch to condition block. + std::regex id_initialisation(R"(%id = alloca i32)"); + std::regex node_id_initialisation(R"(%node_id = alloca i32)"); + std::regex v_initialisation(R"(%v = alloca double)"); + std::regex br(R"(br label %for\.cond)"); + REQUIRE(std::regex_search(module_string, m, id_initialisation)); + REQUIRE(std::regex_search(module_string, m, node_id_initialisation)); + REQUIRE(std::regex_search(module_string, m, v_initialisation)); + REQUIRE(std::regex_search(module_string, m, br)); + + // Check condition block: id < mech->node_count, and a conditional branch to loop body + // or exit. + std::regex condition( + " %.* = load %.*__instance_var__type\\*, %.*__instance_var__type\\*\\* %.*,.*\n" + " %.* = getelementptr inbounds %.*__instance_var__type, " + "%.*__instance_var__type\\* " + "%.*, i32 0, i32 [0-9]+\n" + " %.* = load i32, i32\\* %.*,.*\n" + " %.* = load i32, i32\\* %id,.*\n" + " %.* = icmp slt i32 %.*, %.*"); + std::regex cond_br(R"(br i1 %.*, label %for\.body, label %for\.exit)"); + REQUIRE(std::regex_search(module_string, m, condition)); + REQUIRE(std::regex_search(module_string, m, cond_br)); + + // Check for correct loads from the struct with GEPs. + std::regex load_from_struct( + " %.* = load %.*__instance_var__type\\*, %.*__instance_var__type\\*\\* %.*\n" + " %.* = getelementptr inbounds %.*__instance_var__type, " + "%.*__instance_var__type\\* %.*, i32 0, i32 [0-9]+\n" + " %.* = load i32, i32\\* %id,.*\n" + " %.* = sext i32 %.* to i64\n" + " %.* = load (i32|double)\\*, (i32|double)\\*\\* %.*\n" + " %.* = getelementptr inbounds (i32|double), (i32|double)\\* %.*, i64 %.*\n" + " %.* = load (i32|double), (i32|double)\\* %.*"); + REQUIRE(std::regex_search(module_string, m, load_from_struct)); + + // Check induction variable is incremented in increment block. + std::regex increment( + "for.inc:.*\n" + " %.* = load i32, i32\\* %id,.*\n" + " %.* = add i32 %.*, 1\n" + " store i32 %.*, i32\\* %id,.*\n" + " br label %for\\.cond"); + REQUIRE(std::regex_search(module_string, m, increment)); + + // Check exit block. + std::regex exit( + "for\\.exit[0-9]*:.*\n" + " ret void"); + REQUIRE(std::regex_search(module_string, m, exit)); + } + } +} + +//============================================================================= +// Gather for vectorised kernel +//============================================================================= + +SCENARIO("Vectorised simple kernel", "[visitor][llvm]") { + GIVEN("An indirect indexing of voltage") { + std::string nmodl_text = R"( + NEURON { + SUFFIX hh + NONSPECIFIC_CURRENT i + } + + STATE {} + + ASSIGNED { + v (mV) + i (mA/cm2) + } + + BREAKPOINT { + SOLVE states METHOD cnexp + i = 2 + } + + DERIVATIVE states {} + )"; + + THEN("a gather instructions is created") { + std::string module_string = run_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/false, + /*vector_width=*/4); + std::smatch m; + + // Check that no loop metadata is attached. + std::regex loop_metadata(R"(!llvm\.loop !.*)"); + REQUIRE(!std::regex_search(module_string, m, loop_metadata)); + + // Check gather intrinsic is correctly declared. + std::regex declaration( + R"(declare <4 x double> @llvm\.masked\.gather\.v4f64\.v4p0f64\(<4 x double\*>, i32 immarg, <4 x i1>, <4 x double>\) )"); + REQUIRE(std::regex_search(module_string, m, declaration)); + + // Check that the indices vector is created correctly and extended to i64. + std::regex index_load(R"(load <4 x i32>, <4 x i32>\* %node_id)"); + std::regex sext(R"(sext <4 x i32> %.* to <4 x i64>)"); + REQUIRE(std::regex_search(module_string, m, index_load)); + REQUIRE(std::regex_search(module_string, m, sext)); + + // Check that the access to `voltage` is performed via gather instruction. + // v = mech->voltage[node_id] + std::regex gather( + "call <4 x double> @llvm\\.masked\\.gather\\.v4f64\\.v4p0f64\\(" + "<4 x double\\*> %.*, i32 1, <4 x i1> , <4 x " + "double> undef\\)"); + REQUIRE(std::regex_search(module_string, m, gather)); + } + } +} + +//============================================================================= +// Scatter for vectorised kernel +//============================================================================= + +SCENARIO("Vectorised simple kernel with ion writes", "[visitor][llvm]") { + GIVEN("An indirect indexing of ca ion") { + std::string nmodl_text = R"( + NEURON { + SUFFIX hh + USEION ca WRITE cai + } + + BREAKPOINT { + SOLVE states METHOD cnexp + } + + DERIVATIVE states {} + )"; + + THEN("a scatter instructions is created") { + std::string module_string = run_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/false, + /*vector_width=*/4); + std::smatch m; + + // Check scatter intrinsic is correctly declared. + std::regex declaration( + R"(declare void @llvm\.masked\.scatter\.v4f64\.v4p0f64\(<4 x double>, <4 x double\*>, i32 immarg, <4 x i1>\))"); + REQUIRE(std::regex_search(module_string, m, declaration)); + + // Check that the indices vector is created correctly and extended to i64. + std::regex index_load(R"(load <4 x i32>, <4 x i32>\* %ion_cai_id)"); + std::regex sext(R"(sext <4 x i32> %.* to <4 x i64>)"); + REQUIRE(std::regex_search(module_string, m, index_load)); + REQUIRE(std::regex_search(module_string, m, sext)); + + // Check that store to `ion_cai` is performed via scatter instruction. + // ion_cai[ion_cai_id] = cai[id] + std::regex scatter( + "call void @llvm\\.masked\\.scatter\\.v4f64\\.v4p0f64\\(<4 x double> %.*, <4 x " + "double\\*> %.*, i32 1, <4 x i1> \\)"); + REQUIRE(std::regex_search(module_string, m, scatter)); + } + } +} + +//============================================================================= +// Vectorised kernel with simple control flow +//============================================================================= + +SCENARIO("Vectorised simple kernel with control flow", "[visitor][llvm]") { + GIVEN("A single if/else statement") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + } + + STATE { + y + } + + BREAKPOINT { + SOLVE states METHOD cnexp + } + + DERIVATIVE states { + IF (y < 0) { + y = y + 7 + } ELSE { + y = v + } + } + )"; + + THEN("masked load and stores are created") { + std::string module_string = run_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/true, + /*vector_width=*/8); + std::smatch m; + + // Check masked load/store intrinsics are correctly declared. + std::regex masked_load( + R"(declare <8 x float> @llvm\.masked\.load\.v8f32\.p0v8f32\(<8 x float>\*, i32 immarg, <8 x i1>, <8 x float>\))"); + std::regex masked_store( + R"(declare void @llvm.masked\.store\.v8f32\.p0v8f32\(<8 x float>, <8 x float>\*, i32 immarg, <8 x i1>\))"); + REQUIRE(std::regex_search(module_string, m, masked_load)); + REQUIRE(std::regex_search(module_string, m, masked_store)); + + // Check true direction instructions are predicated with mask. + // IF (mech->y[id] < 0) { + // mech->y[id] = mech->y[id] + 7 + std::regex mask(R"(%30 = fcmp olt <8 x float> %.*, zeroinitializer)"); + std::regex true_load( + R"(call <8 x float> @llvm\.masked\.load\.v8f32\.p0v8f32\(<8 x float>\* %.*, i32 1, <8 x i1> %30, <8 x float> undef\))"); + std::regex true_store( + R"(call void @llvm\.masked\.store\.v8f32\.p0v8f32\(<8 x float> %.*, <8 x float>\* %.*, i32 1, <8 x i1> %30\))"); + REQUIRE(std::regex_search(module_string, m, mask)); + REQUIRE(std::regex_search(module_string, m, true_load)); + REQUIRE(std::regex_search(module_string, m, true_store)); + + // Check false direction instructions are predicated with inverted mask. + // } ELSE { + // mech->y[id] = v + // } + std::regex inverted_mask( + R"(%47 = xor <8 x i1> %30, )"); + std::regex false_load( + R"(call <8 x float> @llvm\.masked\.load\.v8f32\.p0v8f32\(<8 x float>\* %v, i32 1, <8 x i1> %47, <8 x float> undef\))"); + std::regex false_store( + R"(call void @llvm\.masked\.store\.v8f32\.p0v8f32\(<8 x float> %.*, <8 x float>\* %.*, i32 1, <8 x i1> %47\))"); + } + } +} + +//============================================================================= +// Derivative block : test optimization +//============================================================================= + +SCENARIO("Scalar derivative block", "[visitor][llvm][derivative]") { + GIVEN("After LLVM helper visitor transformations") { + std::string nmodl_text = R"( + NEURON { + SUFFIX hh + NONSPECIFIC_CURRENT il + RANGE minf, mtau + } + STATE { + m + } + ASSIGNED { + v (mV) + minf + mtau (ms) + } + BREAKPOINT { + SOLVE states METHOD cnexp + il = 2 + } + DERIVATIVE states { + m = (minf-m)/mtau + } + )"; + + std::string expected_state_loop = R"( + for(id = 0; idnode_count; id = id+1) { + node_id = mech->node_index[id] + v = mech->voltage[node_id] + mech->m[id] = (mech->minf[id]-mech->m[id])/mech->mtau[id] + })"; + + THEN("a single scalar loops is constructed") { + codegen::Platform default_platform; + auto result = run_llvm_visitor_helper(nmodl_text, + default_platform, + {ast::AstNodeType::CODEGEN_FOR_STATEMENT}); + REQUIRE(result.size() == 2); + + auto main_state_loop = reindent_text(to_nmodl(result[1])); + REQUIRE(main_state_loop == reindent_text(expected_state_loop)); + } + } +} + +SCENARIO("Vectorised derivative block", "[visitor][llvm][derivative]") { + GIVEN("After LLVM helper visitor transformations") { + std::string nmodl_text = R"( + NEURON { + SUFFIX hh + USEION na READ ena WRITE ina + NONSPECIFIC_CURRENT il + RANGE minf, mtau, gna, gnabar + } + STATE { + m h + } + PARAMETER { + gnabar = .12 (S/cm2) <0,1e9> + } + ASSIGNED { + v (mV) + minf + mtau (ms) + ena (mV) + ina (mA/cm2) + gna (S/cm2) + } + BREAKPOINT { + SOLVE states METHOD cnexp + gna = gnabar*m*m*m*h + ina = gna*(v - ena) + } + DERIVATIVE states { + m = (minf-m)/mtau + } + )"; + + std::string expected_state_main_loop = R"( + for(id = 0; idnode_count-7; id = id+8) { + node_id = mech->node_index[id] + ena_id = mech->ion_ena_index[id] + v = mech->voltage[node_id] + mech->ena[id] = mech->ion_ena[ena_id] + mech->m[id] = (mech->minf[id]-mech->m[id])/mech->mtau[id] + })"; + + std::string expected_state_epilogue_loop = R"( + for(; idnode_count; id = id+1) { + epilogue_node_id = mech->node_index[id] + epilogue_ena_id = mech->ion_ena_index[id] + epilogue_v = mech->voltage[epilogue_node_id] + mech->ena[id] = mech->ion_ena[epilogue_ena_id] + mech->m[id] = (mech->minf[id]-mech->m[id])/mech->mtau[id] + })"; + + std::string expected_cur_main_loop = R"( + for(id = 0; idnode_count-7; id = id+8) { + node_id = mech->node_index[id] + ena_id = mech->ion_ena_index[id] + ion_dinadv_id = mech->ion_dinadv_index[id] + ion_ina_id = mech->ion_ina_index[id] + v = mech->voltage[node_id] + mech->ena[id] = mech->ion_ena[ena_id] + v_org = v + v = v+0.001 + { + current = 0 + mech->gna[id] = mech->gnabar[id]*mech->m[id]*mech->m[id]*mech->m[id]*mech->h[id] + mech->ina[id] = mech->gna[id]*(v-mech->ena[id]) + current = current+il + current = current+mech->ina[id] + g = current + } + dina = mech->ina[id] + v = v_org + { + current = 0 + mech->gna[id] = mech->gnabar[id]*mech->m[id]*mech->m[id]*mech->m[id]*mech->h[id] + mech->ina[id] = mech->gna[id]*(v-mech->ena[id]) + current = current+il + current = current+mech->ina[id] + rhs = current + } + g = (g-rhs)/0.001 + mech->ion_dinadv[ion_dinadv_id] = mech->ion_dinadv[ion_dinadv_id]+(dina-mech->ina[id])/0.001 + mech->ion_ina[ion_ina_id] += mech->ina[id] + mech->vec_rhs[node_id] = mech->vec_rhs[node_id]-rhs + mech->vec_d[node_id] = mech->vec_d[node_id]+g + })"; + + THEN("vector and epilogue scalar loops are constructed") { + codegen::Platform simd_platform(/*use_single_precision=*/false, + /*instruction_width=*/8); + auto result = run_llvm_visitor_helper(nmodl_text, + simd_platform, + {ast::AstNodeType::CODEGEN_FOR_STATEMENT}); + REQUIRE(result.size() == 4); + + auto cur_main_loop = reindent_text(to_nmodl(result[0])); + REQUIRE(cur_main_loop == reindent_text(expected_cur_main_loop)); + + auto state_main_loop = reindent_text(to_nmodl(result[2])); + REQUIRE(state_main_loop == reindent_text(expected_state_main_loop)); + + auto state_epilogue_loop = reindent_text(to_nmodl(result[3])); + REQUIRE(state_epilogue_loop == reindent_text(expected_state_epilogue_loop)); + } + } +} + +//============================================================================= +// Vector library calls. +//============================================================================= + +SCENARIO("Vector library calls", "[visitor][llvm][vector_lib]") { + GIVEN("A vector LLVM intrinsic") { + std::string nmodl_text = R"( + NEURON { + SUFFIX hh + NONSPECIFIC_CURRENT il + } + STATE { + m + } + ASSIGNED { + v (mV) + il (mA/cm2) + } + BREAKPOINT { + SOLVE states METHOD cnexp + il = 2 + } + DERIVATIVE states { + m = exp(m) + } + )"; + + THEN("it is replaced with an appropriate vector library call") { + std::smatch m; + + // Check exponential intrinsic is created. + std::string no_library_module_str = run_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/false, + /*vector_width=*/2); + std::regex exp_decl(R"(declare <2 x double> @llvm\.exp\.v2f64\(<2 x double>\))"); + std::regex exp_call(R"(call <2 x double> @llvm\.exp\.v2f64\(<2 x double> .*\))"); + REQUIRE(std::regex_search(no_library_module_str, m, exp_decl)); + REQUIRE(std::regex_search(no_library_module_str, m, exp_call)); + + // Check exponential calls are replaced with calls to SVML library. + std::string svml_library_module_str = run_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/false, + /*vector_width=*/2, + /*vec_lib=*/"SVML"); + std::regex svml_exp_decl(R"(declare <2 x double> @__svml_exp2\(<2 x double>\))"); + std::regex svml_exp_call(R"(call <2 x double> @__svml_exp2\(<2 x double> .*\))"); + REQUIRE(std::regex_search(svml_library_module_str, m, svml_exp_decl)); + REQUIRE(std::regex_search(svml_library_module_str, m, svml_exp_call)); + REQUIRE(!std::regex_search(svml_library_module_str, m, exp_call)); + + // Check that supported exponential calls are replaced with calls to MASSV library (i.e. + // operating on vector of width 2). + std::string massv2_library_module_str = run_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/false, + /*vector_width=*/2, + /*vec_lib=*/"MASSV"); + std::regex massv2_exp_decl(R"(declare <2 x double> @__expd2\(<2 x double>\))"); + std::regex massv2_exp_call(R"(call <2 x double> @__expd2\(<2 x double> .*\))"); + REQUIRE(std::regex_search(massv2_library_module_str, m, massv2_exp_decl)); + REQUIRE(std::regex_search(massv2_library_module_str, m, massv2_exp_call)); + REQUIRE(!std::regex_search(massv2_library_module_str, m, exp_call)); + + // Check no replacement for MASSV happens for non-supported vector widths. + std::string massv4_library_module_str = run_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/false, + /*vector_width=*/4, + /*vec_lib=*/"MASSV"); + std::regex exp4_call(R"(call <4 x double> @llvm\.exp\.v4f64\(<4 x double> .*\))"); + REQUIRE(std::regex_search(massv4_library_module_str, m, exp4_call)); + + // Check correct replacement of @llvm.exp.v4f32 into @vexpf when using Accelerate. + std::string accelerate_library_module_str = + run_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/true, + /*vector_width=*/4, + /*vec_lib=*/"Accelerate"); + std::regex accelerate_exp_decl(R"(declare <4 x float> @vexpf\(<4 x float>\))"); + std::regex accelerate_exp_call(R"(call <4 x float> @vexpf\(<4 x float> .*\))"); + std::regex fexp_call(R"(call <4 x float> @llvm\.exp\.v4f32\(<4 x float> .*\))"); + REQUIRE(std::regex_search(accelerate_library_module_str, m, accelerate_exp_decl)); + REQUIRE(std::regex_search(accelerate_library_module_str, m, accelerate_exp_call)); + REQUIRE(!std::regex_search(accelerate_library_module_str, m, fexp_call)); + + // Check correct replacement of @llvm.exp.v2f64 into @_ZGV?N?v_exp when using SLEEF. + std::string sleef_library_module_str = run_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/false, + /*vector_width=*/2, + /*vec_lib=*/"SLEEF"); +#if defined(__arm64__) || defined(__aarch64__) + std::regex sleef_exp_decl(R"(declare <2 x double> @_ZGVnN2v_exp\(<2 x double>\))"); + std::regex sleef_exp_call(R"(call <2 x double> @_ZGVnN2v_exp\(<2 x double> .*\))"); +#else + std::regex sleef_exp_decl(R"(declare <2 x double> @_ZGVbN2v_exp\(<2 x double>\))"); + std::regex sleef_exp_call(R"(call <2 x double> @_ZGVbN2v_exp\(<2 x double> .*\))"); +#endif + REQUIRE(std::regex_search(sleef_library_module_str, m, sleef_exp_decl)); + REQUIRE(std::regex_search(sleef_library_module_str, m, sleef_exp_call)); + REQUIRE(!std::regex_search(sleef_library_module_str, m, fexp_call)); + + // Check the replacements when using Darwin's libsystem_m. + std::string libsystem_m_library_module_str = + run_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/true, + /*vector_width=*/4, + /*vec_lib=*/"libsystem_m"); + std::regex libsystem_m_exp_decl(R"(declare <4 x float> @_simd_exp_f4\(<4 x float>\))"); + std::regex libsystem_m_exp_call(R"(call <4 x float> @_simd_exp_f4\(<4 x float> .*\))"); + REQUIRE(std::regex_search(libsystem_m_library_module_str, m, libsystem_m_exp_decl)); + REQUIRE(std::regex_search(libsystem_m_library_module_str, m, libsystem_m_exp_call)); + REQUIRE(!std::regex_search(libsystem_m_library_module_str, m, fexp_call)); + } + } +} + +//============================================================================= +// Fast math flags +//============================================================================= + +SCENARIO("Fast math flags", "[visitor][llvm]") { + GIVEN("A function to produce fma and specified math flags") { + std::string nmodl_text = R"( + FUNCTION foo(a, b, c) { + foo = (a * b) + c + } + )"; + + THEN("instructions are generated with the flags set") { + std::string module_string = + run_llvm_visitor(nmodl_text, + /*opt_level=*/3, + /*use_single_precision=*/false, + /*vector_width=*/1, + /*vec_lib=*/"none", + /*fast_math_flags=*/{"nnan", "contract", "afn"}); + std::smatch m; + + // Check flags for produced 'fmul' and 'fadd' instructions. + std::regex fmul(R"(fmul nnan contract afn double %.*, %.*)"); + std::regex fadd(R"(fadd nnan contract afn double %.*, %.*)"); + REQUIRE(std::regex_search(module_string, m, fmul)); + REQUIRE(std::regex_search(module_string, m, fadd)); + } + } +} + +//============================================================================= +// Optimization : dead code removal +//============================================================================= + +SCENARIO("Dead code removal", "[visitor][llvm][opt]") { + GIVEN("Procedure using local variables, without any side effects") { + std::string nmodl_text = R"( + PROCEDURE add(a, b) { + LOCAL i + i = a + b + } + )"; + + THEN("with optimisation enabled, all ops are eliminated") { + std::string module_string = run_llvm_visitor(nmodl_text, /*opt_level=*/3); + std::smatch m; + + // Check if the values are optimised out. + std::regex empty_proc( + R"(define i32 @add\(double %a[0-9].*, double %b[0-9].*\).*\{\n(\s)*ret i32 0\n\})"); + REQUIRE(std::regex_search(module_string, m, empty_proc)); + } + } +} + +//============================================================================= +// Inlining: remove inline code blocks +//============================================================================= + +SCENARIO("Removal of inlined functions and procedures", "[visitor][llvm][inline]") { + GIVEN("Simple breakpoint block calling a function and a procedure") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test_inline + RANGE a, b, s + } + ASSIGNED { + a + b + s + } + PROCEDURE test_add(a, b) { + LOCAL i + i = a + b + } + FUNCTION test_sub(a, b) { + test_sub = a - b + } + BREAKPOINT { + SOLVE states METHOD cnexp + } + DERIVATIVE states { + a = 1 + b = 2 + test_add(a, b) + s = test_sub(a, b) + } + )"; + + THEN("when the code is inlined the procedure and function blocks are removed") { + std::string module_string = run_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/false, + /*vector_width=*/1, + /*vec_lib=*/"none", + /*fast_math_flags=*/{}, + /*nmodl_inline=*/true); + std::smatch m; + + // Check if the procedure and function declarations are removed + std::regex add_proc(R"(define i32 @test_add\(double %a[0-9].*, double %b[0-9].*\))"); + REQUIRE(!std::regex_search(module_string, m, add_proc)); + std::regex sub_func(R"(define double @test_sub\(double %a[0-9].*, double %b[0-9].*\))"); + REQUIRE(!std::regex_search(module_string, m, sub_func)); + } + } +} + +//============================================================================= +// Basic GPU kernel AST generation +//============================================================================= + +SCENARIO("GPU kernel body", "[visitor][llvm][gpu]") { + GIVEN("For GPU platforms") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + RANGE x, y + } + + ASSIGNED { x y } + + STATE { m } + + BREAKPOINT { + SOLVE states METHOD cnexp + } + + DERIVATIVE states { + m = y + 2 + } + )"; + + + std::string expected_loop = R"( + for(id = THREAD_ID; idnode_count; id = id+GRID_STRIDE) { + node_id = mech->node_index[id] + v = mech->voltage[node_id] + mech->m[id] = mech->y[id]+2 + })"; + + THEN("a loop with GPU-specific AST nodes is constructed") { + std::string name = "default"; + std::string math_library = "none"; + codegen::Platform gpu_platform(codegen::PlatformID::GPU, name, math_library); + auto result = run_llvm_visitor_helper(nmodl_text, + gpu_platform, + {ast::AstNodeType::CODEGEN_FOR_STATEMENT}); + REQUIRE(result.size() == 1); + + auto loop = reindent_text(to_nmodl(result[0])); + REQUIRE(loop == reindent_text(expected_loop)); + } + } +} + +//============================================================================= +// Basic NVVM/LLVM IR generation for GPU platforms +//============================================================================= + +SCENARIO("GPU kernel body IR generation", "[visitor][llvm][gpu]") { + GIVEN("For GPU platforms") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + RANGE x, y + } + + ASSIGNED { x y } + + STATE { m } + + BREAKPOINT { + SOLVE states METHOD cnexp + } + + DERIVATIVE states { + m = y + 2 + } + )"; + + THEN("kernel annotations are added and thread id intrinsics generated") { + std::string module_string = run_gpu_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/false); + std::smatch m; + + // Check kernel annotations are correclty created. + std::regex annotations(R"(!nvvm\.annotations = !\{!0\})"); + std::regex kernel_data( + R"(!0 = !\{void \(%.*__instance_var__type\*\)\* @nrn_state_.*, !\"kernel\", i32 1\})"); + REQUIRE(std::regex_search(module_string, m, annotations)); + REQUIRE(std::regex_search(module_string, m, kernel_data)); + + // Check thread/block id/dim instrinsics are created. + std::regex block_id(R"(call i32 @llvm\.nvvm\.read\.ptx\.sreg\.ctaid\.x\(\))"); + std::regex block_dim(R"(call i32 @llvm\.nvvm\.read\.ptx\.sreg\.ntid\.x\(\))"); + std::regex tid(R"(call i32 @llvm\.nvvm\.read\.ptx\.sreg\.tid\.x\(\))"); + std::regex grid_dim(R"(call i32 @llvm\.nvvm\.read\.ptx\.sreg\.nctaid\.x\(\))"); + REQUIRE(std::regex_search(module_string, m, block_id)); + REQUIRE(std::regex_search(module_string, m, block_dim)); + REQUIRE(std::regex_search(module_string, m, tid)); + REQUIRE(std::regex_search(module_string, m, grid_dim)); + } + } + + GIVEN("When optimizing for GPU platforms") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + RANGE x, y + } + + ASSIGNED { x y } + + STATE { m } + + BREAKPOINT { + SOLVE states METHOD cnexp + } + + DERIVATIVE states { + m = y + 2 + } + )"; + + THEN("address spaces are inferred and target information added") { + std::string module_string = run_gpu_llvm_visitor(nmodl_text, + /*opt_level=*/3, + /*use_single_precision=*/false); + std::smatch m; + + // Check target information. + // TODO: this may change when more platforms are supported. + std::regex data_layout( + R"(target datalayout = \"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64\")"); + std::regex triple(R"(nvptx64-nvidia-cuda)"); + REQUIRE(std::regex_search(module_string, m, data_layout)); + REQUIRE(std::regex_search(module_string, m, triple)); + + // Check for address space casts and address spaces in general when loading data. + std::regex as_cast( + R"(addrspacecast %.*__instance_var__type\* %.* to %.*__instance_var__type addrspace\(1\)\*)"); + std::regex gep_as1( + R"(getelementptr inbounds %.*__instance_var__type, %.*__instance_var__type addrspace\(1\)\* %.*, i64 0, i32 .*)"); + std::regex load_as1(R"(load double\*, double\* addrspace\(1\)\* %.*)"); + REQUIRE(std::regex_search(module_string, m, as_cast)); + REQUIRE(std::regex_search(module_string, m, gep_as1)); + REQUIRE(std::regex_search(module_string, m, load_as1)); + } + } + + GIVEN("When using math functions") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + RANGE x, y + } + + ASSIGNED { x y } + + STATE { m } + + BREAKPOINT { + SOLVE states METHOD cnexp + } + + DERIVATIVE states { + m = exp(y) + x ^ 2 + log(x) + } + )"; + + THEN("calls to libdevice are created") { + std::string module_string = run_gpu_llvm_visitor(nmodl_text, + /*opt_level=*/3, + /*use_single_precision=*/false, + /*math_library=*/"libdevice"); + std::smatch m; + + // Check if exp and pow intrinsics have been replaced. + std::regex exp_declaration(R"(declare double @__nv_exp\(double\))"); + std::regex exp_new_call(R"(call double @__nv_exp\(double %.*\))"); + std::regex exp_old_call(R"(call double @llvm\.exp\.f64\(double %.*\))"); + std::regex pow_declaration(R"(declare double @__nv_pow\(double, double\))"); + std::regex pow_new_call(R"(call double @__nv_pow\(double %.*, double .*\))"); + std::regex pow_old_call(R"(call double @llvm\.pow\.f64\(double %.*, double .*\))"); + std::regex log_declaration(R"(declare double @__nv_log\(double\))"); + std::regex log_new_call(R"(call double @__nv_log\(double %.*\))"); + std::regex log_old_call(R"(call double @llvm\.log\.f64\(double %.*\))"); + REQUIRE(std::regex_search(module_string, m, exp_declaration)); + REQUIRE(std::regex_search(module_string, m, exp_new_call)); + REQUIRE(!std::regex_search(module_string, m, exp_old_call)); + REQUIRE(std::regex_search(module_string, m, pow_declaration)); + REQUIRE(std::regex_search(module_string, m, pow_new_call)); + REQUIRE(!std::regex_search(module_string, m, pow_old_call)); + REQUIRE(std::regex_search(module_string, m, log_declaration)); + REQUIRE(std::regex_search(module_string, m, log_new_call)); + REQUIRE(!std::regex_search(module_string, m, log_old_call)); + } + } + + GIVEN("For current update with atomic addition ") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + USEION na READ ena WRITE ina + } + + STATE { } + + ASSIGNED { + v (mV) + ena (mV) + ina (mA/cm2) + } + + BREAKPOINT { + SOLVE states METHOD cnexp + } + + DERIVATIVE states { } + )"; + + THEN("corresponding LLVM atomic instruction is generated") { + std::string module_string = run_gpu_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/false); + std::smatch m; + + // Check for atomic addition. + std::regex add(R"(atomicrmw fadd double\* %.*, double %.* seq_cst)"); + REQUIRE(std::regex_search(module_string, m, add)); + } + } + + GIVEN("For current update with atomic addition ") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + USEION na READ ena WRITE ina + } + + STATE { } + + ASSIGNED { + v (mV) + ena (mV) + ina (mA/cm2) + } + + BREAKPOINT { + SOLVE states METHOD cnexp + } + + DERIVATIVE states { } + )"; + + THEN("corresponding LLVM atomic instruction is generated") { + std::string module_string = run_gpu_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/false); + std::smatch m; + + // Check for atomic addition. + std::regex add(R"(atomicrmw fadd double\* %.*, double %.* seq_cst)"); + REQUIRE(std::regex_search(module_string, m, add)); + } + } +} + +//============================================================================= +// Atomics for vectorised kernel +//============================================================================= + +SCENARIO("A simple kernel with atomic current updates", "[visitor][llvm]") { + GIVEN("A simple atomic update") { + std::string nmodl_text = R"( + NEURON { + SUFFIX test + USEION na READ ena WRITE ina + } + + STATE { } + + ASSIGNED { + v (mV) + ena (mV) + ina (mA/cm2) + } + + BREAKPOINT { } + + DERIVATIVE states { } + )"; + + THEN("an atomic loop is created") { + std::string module_string = run_llvm_visitor(nmodl_text, + /*opt_level=*/0, + /*use_single_precision=*/true, + /*vector_width=*/4); + std::smatch m; + + // Check for correct %ptrs calculation and bitcast to an array. + std::regex ptrtoint(R"(ptrtoint float\* %.* to i64)"); + std::regex insertelement(R"(insertelement <4 x i64> undef, i64 %.*, i32 0)"); + std::regex shufflevector( + R"(shufflevector <4 x i64> %.*, <4 x i64> undef, <4 x i32> zeroinitializer)"); + std::regex bitcast(R"(bitcast <4 x i64>\* %ptrs to \[4 x float\*\]\*)"); + REQUIRE(std::regex_search(module_string, m, ptrtoint)); + REQUIRE(std::regex_search(module_string, m, insertelement)); + REQUIRE(std::regex_search(module_string, m, shufflevector)); + REQUIRE(std::regex_search(module_string, m, bitcast)); + + // Check for %ptrs store and branch to atomic update block. + std::regex ptrs_store(R"(store <4 x i64> %.*, <4 x i64>\* %ptrs)"); + std::regex atomic_branch(R"(br label %atomic\.update)"); + REQUIRE(std::regex_search(module_string, m, ptrs_store)); + REQUIRE(std::regex_search(module_string, m, atomic_branch)); + + // Check the scalar loop for atomic update mis implemented correctly. + std::regex atomic_update( + " %.* = phi i64 \\[ 15, %for\\.body \\], \\[ %.*, %atomic\\.update \\]\n" + " %.* = call i64 @llvm\\.cttz\\.i64\\(i64 %.*, i1 false\\)\n" + " %.* = shl i64 1, %.*\n" + " %.* = xor i64 %.*, -1\n" + " %.* = and i64 %.*, %.*\n" + " %.* = getelementptr \\[4 x float\\*\\], \\[4 x float\\*\\]\\* %.*, i64 0, i64 " + "%.*\n" + " %.* = load float\\*, float\\*\\* %.*, align 8\n" + " %.* = load float, float\\* %.*, align 4\n" + " %.* = extractelement <4 x float> %.*, i64 %.*\n" + " %.* = fadd float %.*, %.*\n" + " store float %.*, float\\* %.*, align 4\n" + " %.* = icmp eq i64 %.*, 0\n"); + std::regex remaining( + R"(br i1 %.*, label %for\.body\.remaining, label %atomic\.update)"); + REQUIRE(std::regex_search(module_string, m, atomic_update)); + REQUIRE(std::regex_search(module_string, m, remaining)); + } + } +} diff --git a/test/unit/codegen/codegen_llvm_visitor.cpp b/test/unit/codegen/codegen_llvm_visitor.cpp new file mode 100644 index 0000000000..f86552459b --- /dev/null +++ b/test/unit/codegen/codegen_llvm_visitor.cpp @@ -0,0 +1,635 @@ +/************************************************************************* + * Copyright (C) 2019-2021 Blue Brain Project + * + * This file is part of NMODL distributed under the terms of the GNU + * Lesser General Public License. See top-level LICENSE file for details. + *************************************************************************/ + +#include + +#include "ast/program.hpp" +#include "codegen/codegen_helper_visitor.hpp" +#include "codegen/llvm/codegen_llvm_visitor.hpp" +#include "config/config.h" +#include "parser/nmodl_driver.hpp" +#include "test/unit/utils/test_utils.hpp" +#include "visitors/inline_visitor.hpp" +#include "visitors/neuron_solve_visitor.hpp" +#include "visitors/solve_block_visitor.hpp" +#include "visitors/symtab_visitor.hpp" +#include "visitors/units_visitor.hpp" + +using namespace nmodl; +using namespace visitor; +using namespace codegen; + +using nmodl::NrnUnitsLib; +using nmodl::parser::NmodlDriver; +using nmodl::test_utils::reindent_text; + +/// Run LLVM codegen visitor and get instance struct declaration and setup of C++ wrapper +std::string get_wrapper_instance_struct(const std::string& nmodl_text) { + const auto& ast = NmodlDriver().parse_string(nmodl_text); + std::stringbuf strbuf; + std::ostream oss(&strbuf); + /// directory where units lib file is located + std::string units_dir(NrnUnitsLib::get_path()); + /// parse units of text + UnitsVisitor(units_dir).visit_program(*ast); + SymtabVisitor().visit_program(*ast); + NeuronSolveVisitor().visit_program(*ast); + SolveBlockVisitor().visit_program(*ast); + + /// create LLVM and C++ wrapper code generation visitor + codegen::Platform cpu_platform(/*use_single_precision=*/false, /*instruction_width=*/1); + codegen::CodegenLLVMVisitor llvm_visitor("hh.mod", oss, cpu_platform, 0); + llvm_visitor.visit_program(*ast); + strbuf.str(""); + llvm_visitor.print_mechanism_range_var_structure(false); + llvm_visitor.print_instance_variable_setup(); + return strbuf.str(); +} + +// Run LLVM codegen helper visitor with given platform as target +static std::vector> run_llvm_visitor_helper( + const std::string& text, + codegen::Platform& platform, + const std::vector& nodes_to_collect) { + NmodlDriver driver; + const auto& ast = driver.parse_string(text); + + SymtabVisitor().visit_program(*ast); + InlineVisitor().visit_program(*ast); + NeuronSolveVisitor().visit_program(*ast); + SolveBlockVisitor().visit_program(*ast); + CodegenLLVMHelperVisitor(platform).visit_program(*ast); + + return collect_nodes(*ast, nodes_to_collect); +} + +SCENARIO("Check instance struct declaration and setup in wrapper", + "[codegen][llvm][instance_struct]") { + GIVEN("hh: simple mod file") { + std::string nmodl_text = R"( + TITLE hh.mod squid sodium, potassium, and leak channels + + UNITS { + (mA) = (milliamp) + (mV) = (millivolt) + (S) = (siemens) + } + + NEURON { + SUFFIX hh + USEION na READ ena WRITE ina + USEION k READ ek WRITE ik + NONSPECIFIC_CURRENT il + RANGE gnabar, gkbar, gl, el, gna, gk + RANGE minf, hinf, ninf, mtau, htau, ntau + THREADSAFE : assigned GLOBALs will be per thread + } + + PARAMETER { + gnabar = .12 (S/cm2) <0,1e9> + gkbar = .036 (S/cm2) <0,1e9> + gl = .0003 (S/cm2) <0,1e9> + el = -54.3 (mV) + } + + STATE { + m h n + } + + ASSIGNED { + v (mV) + celsius (degC) + ena (mV) + ek (mV) + gna (S/cm2) + gk (S/cm2) + ina (mA/cm2) + ik (mA/cm2) + il (mA/cm2) + minf hinf ninf + mtau (ms) htau (ms) ntau (ms) + } + + BREAKPOINT { + SOLVE states METHOD cnexp + gna = gnabar*m*m*m*h + ina = gna*(v - ena) + gk = gkbar*n*n*n*n + ik = gk*(v - ek) + il = gl*(v - el) + } + + DERIVATIVE states { + m' = (minf-m)/mtau + h' = (hinf-h)/htau + n' = (ninf-n)/ntau + } + )"; + + std::string generated_instance_struct_declaration = R"( + struct hh__instance_var__type { + const double* __restrict__ gnabar; + const double* __restrict__ gkbar; + const double* __restrict__ gl; + const double* __restrict__ el; + double* __restrict__ gna; + double* __restrict__ gk; + double* __restrict__ il; + double* __restrict__ minf; + double* __restrict__ hinf; + double* __restrict__ ninf; + double* __restrict__ mtau; + double* __restrict__ htau; + double* __restrict__ ntau; + double* __restrict__ m; + double* __restrict__ h; + double* __restrict__ n; + double* __restrict__ Dm; + double* __restrict__ Dh; + double* __restrict__ Dn; + double* __restrict__ ena; + double* __restrict__ ek; + double* __restrict__ ina; + double* __restrict__ ik; + double* __restrict__ v_unused; + double* __restrict__ g_unused; + const double* __restrict__ ion_ena; + double* __restrict__ ion_ina; + double* __restrict__ ion_dinadv; + const double* __restrict__ ion_ek; + double* __restrict__ ion_ik; + double* __restrict__ ion_dikdv; + int* __restrict__ ion_ena_index; + int* __restrict__ ion_ina_index; + int* __restrict__ ion_dinadv_index; + int* __restrict__ ion_ek_index; + int* __restrict__ ion_ik_index; + int* __restrict__ ion_dikdv_index; + double* __restrict__ voltage; + int* __restrict__ node_index; + double* __restrict__ vec_rhs; + double* __restrict__ vec_d; + double* __restrict__ _shadow_rhs; + double* __restrict__ _shadow_d; + double t; + double dt; + double celsius; + int secondorder; + int node_count; + }; + )"; + std::string generated_instance_struct_setup = R"( + static inline void setup_instance(NrnThread* nt, Memb_list* ml) { + hh__instance_var__type* inst = (hh__instance_var__type*) mem_alloc(1, sizeof(hh__instance_var__type)); + int pnodecount = ml->_nodecount_padded; + Datum* indexes = ml->pdata; + inst->gnabar = ml->data+0*pnodecount; + inst->gkbar = ml->data+1*pnodecount; + inst->gl = ml->data+2*pnodecount; + inst->el = ml->data+3*pnodecount; + inst->gna = ml->data+4*pnodecount; + inst->gk = ml->data+5*pnodecount; + inst->il = ml->data+6*pnodecount; + inst->minf = ml->data+7*pnodecount; + inst->hinf = ml->data+8*pnodecount; + inst->ninf = ml->data+9*pnodecount; + inst->mtau = ml->data+10*pnodecount; + inst->htau = ml->data+11*pnodecount; + inst->ntau = ml->data+12*pnodecount; + inst->m = ml->data+13*pnodecount; + inst->h = ml->data+14*pnodecount; + inst->n = ml->data+15*pnodecount; + inst->Dm = ml->data+16*pnodecount; + inst->Dh = ml->data+17*pnodecount; + inst->Dn = ml->data+18*pnodecount; + inst->ena = ml->data+19*pnodecount; + inst->ek = ml->data+20*pnodecount; + inst->ina = ml->data+21*pnodecount; + inst->ik = ml->data+22*pnodecount; + inst->v_unused = ml->data+23*pnodecount; + inst->g_unused = ml->data+24*pnodecount; + inst->ion_ena = nt->_data; + inst->ion_ina = nt->_data; + inst->ion_dinadv = nt->_data; + inst->ion_ek = nt->_data; + inst->ion_ik = nt->_data; + inst->ion_dikdv = nt->_data; + inst->ion_ena_index = indexes+0*pnodecount; + inst->ion_ina_index = indexes+1*pnodecount; + inst->ion_dinadv_index = indexes+2*pnodecount; + inst->ion_ek_index = indexes+3*pnodecount; + inst->ion_ik_index = indexes+4*pnodecount; + inst->ion_dikdv_index = indexes+5*pnodecount; + inst->voltage = nt->_actual_v; + inst->node_index = ml->nodeindices; + inst->vec_rhs = nt->_actual_rhs; + inst->vec_d = nt->_actual_d; + inst->_shadow_rhs = nt->_shadow_rhs; + inst->_shadow_d = nt->_shadow_d; + inst->t = nt->t; + inst->dt = nt->dt; + inst->celsius = celsius; + inst->secondorder = secondorder; + inst->node_count = ml->nodecount; + ml->instance = inst; + } + )"; + + THEN("index and nt variables created correctly") { + auto result_instance_struct_declaration_setup = reindent_text( + get_wrapper_instance_struct(nmodl_text)); + + auto expected_instance_struct_declaration = reindent_text( + generated_instance_struct_declaration); + auto expected_instance_struct_setup = reindent_text(generated_instance_struct_setup); + + REQUIRE(result_instance_struct_declaration_setup.find( + expected_instance_struct_declaration) != std::string::npos); + REQUIRE(result_instance_struct_declaration_setup.find(expected_instance_struct_setup) != + std::string::npos); + } + } +} + + +SCENARIO("Channel: Derivative and breakpoint block llvm transformations", + "[visitor][llvm_helper][channel]") { + GIVEN("A hh.mod file with derivative and breakpoint block") { + std::string nmodl_text = R"( + TITLE hh.mod squid sodium, potassium, and leak channels + + UNITS { + (mA) = (milliamp) + (mV) = (millivolt) + (S) = (siemens) + } + + NEURON { + SUFFIX hh + USEION na READ ena WRITE ina + USEION k READ ek WRITE ik + NONSPECIFIC_CURRENT il + RANGE gnabar, gkbar, gl, el, gna, gk + RANGE minf, hinf, ninf, mtau, htau, ntau + THREADSAFE + } + + PARAMETER { + gnabar = .12 (S/cm2) <0,1e9> + gkbar = .036 (S/cm2) <0,1e9> + gl = .0003 (S/cm2) <0,1e9> + el = -54.3 (mV) + } + + STATE { + m + h + n + } + + ASSIGNED { + v (mV) + celsius (degC) + ena (mV) + ek (mV) + gna (S/cm2) + gk (S/cm2) + ina (mA/cm2) + ik (mA/cm2) + il (mA/cm2) + minf + hinf + ninf + mtau (ms) + htau (ms) + ntau (ms) + } + + BREAKPOINT { + SOLVE states METHOD cnexp + gna = gnabar*m*m*m*h + ina = gna*(v-ena) + gk = gkbar*n*n*n*n + ik = gk*(v-ek) + il = gl*(v-el) + } + + DERIVATIVE states { + rates(v) + m' = (minf-m)/mtau + h' = (hinf-h)/htau + n' = (ninf-n)/ntau + } + + PROCEDURE rates(v(mV)) { + LOCAL alpha, beta, sum, q10 + UNITSOFF + q10 = 3^((celsius-6.3)/10) + alpha = .1*vtrap(-(v+40), 10) + beta = 4*exp(-(v+65)/18) + sum = alpha+beta + mtau = 1/(q10*sum) + minf = alpha/sum + alpha = .07*exp(-(v+65)/20) + beta = 1/(exp(-(v+35)/10)+1) + sum = alpha+beta + htau = 1/(q10*sum) + hinf = alpha/sum + alpha = .01*vtrap(-(v+55), 10) + beta = .125*exp(-(v+65)/80) + sum = alpha+beta + ntau = 1/(q10*sum) + ninf = alpha/sum + } + + FUNCTION vtrap(x, y) { + IF (fabs(x/y)<1e-6) { + vtrap = y*(1-x/y/2) + } ELSE { + vtrap = x/(exp(x/y)-1) + } + } + )"; + + std::string expected_state_function = R"( + VOID nrn_state_hh(INSTANCE_STRUCT *mech){ + INTEGER id + INTEGER node_id, ena_id, ek_id + DOUBLE v + for(id = 0; idnode_count; id = id+1) { + node_id = mech->node_index[id] + ena_id = mech->ion_ena_index[id] + ek_id = mech->ion_ek_index[id] + v = mech->voltage[node_id] + mech->ena[id] = mech->ion_ena[ena_id] + mech->ek[id] = mech->ion_ek[ek_id] + { + DOUBLE alpha, beta, sum, q10, vtrap_in_0, vtrap_in_1, v_in_0 + v_in_0 = v + UNITSOFF + q10 = 3^((mech->celsius-6.3)/10) + { + DOUBLE x_in_0, y_in_0 + x_in_0 = -(v_in_0+40) + y_in_0 = 10 + IF (fabs(x_in_0/y_in_0)<1e-6) { + vtrap_in_0 = y_in_0*(1-x_in_0/y_in_0/2) + } ELSE { + vtrap_in_0 = x_in_0/(exp(x_in_0/y_in_0)-1) + } + } + alpha = .1*vtrap_in_0 + beta = 4*exp(-(v_in_0+65)/18) + sum = alpha+beta + mech->mtau[id] = 1/(q10*sum) + mech->minf[id] = alpha/sum + alpha = .07*exp(-(v_in_0+65)/20) + beta = 1/(exp(-(v_in_0+35)/10)+1) + sum = alpha+beta + mech->htau[id] = 1/(q10*sum) + mech->hinf[id] = alpha/sum + { + DOUBLE x_in_1, y_in_1 + x_in_1 = -(v_in_0+55) + y_in_1 = 10 + IF (fabs(x_in_1/y_in_1)<1e-6) { + vtrap_in_1 = y_in_1*(1-x_in_1/y_in_1/2) + } ELSE { + vtrap_in_1 = x_in_1/(exp(x_in_1/y_in_1)-1) + } + } + alpha = .01*vtrap_in_1 + beta = .125*exp(-(v_in_0+65)/80) + sum = alpha+beta + mech->ntau[id] = 1/(q10*sum) + mech->ninf[id] = alpha/sum + } + mech->m[id] = mech->m[id]+(1.0-exp(mech->dt*((((-1.0)))/mech->mtau[id])))*(-(((mech->minf[id]))/mech->mtau[id])/((((-1.0)))/mech->mtau[id])-mech->m[id]) + mech->h[id] = mech->h[id]+(1.0-exp(mech->dt*((((-1.0)))/mech->htau[id])))*(-(((mech->hinf[id]))/mech->htau[id])/((((-1.0)))/mech->htau[id])-mech->h[id]) + mech->n[id] = mech->n[id]+(1.0-exp(mech->dt*((((-1.0)))/mech->ntau[id])))*(-(((mech->ninf[id]))/mech->ntau[id])/((((-1.0)))/mech->ntau[id])-mech->n[id]) + } + })"; + + std::string expected_cur_function = R"( + VOID nrn_cur_hh(INSTANCE_STRUCT *mech){ + INTEGER id + INTEGER node_id, ena_id, ek_id, ion_dinadv_id, ion_dikdv_id, ion_ina_id, ion_ik_id + DOUBLE v, g, rhs, v_org, current, dina, dik + for(id = 0; idnode_count; id = id+1) { + node_id = mech->node_index[id] + ena_id = mech->ion_ena_index[id] + ek_id = mech->ion_ek_index[id] + ion_dinadv_id = mech->ion_dinadv_index[id] + ion_dikdv_id = mech->ion_dikdv_index[id] + ion_ina_id = mech->ion_ina_index[id] + ion_ik_id = mech->ion_ik_index[id] + v = mech->voltage[node_id] + mech->ena[id] = mech->ion_ena[ena_id] + mech->ek[id] = mech->ion_ek[ek_id] + v_org = v + v = v+0.001 + { + current = 0 + mech->gna[id] = mech->gnabar[id]*mech->m[id]*mech->m[id]*mech->m[id]*mech->h[id] + mech->ina[id] = mech->gna[id]*(v-mech->ena[id]) + mech->gk[id] = mech->gkbar[id]*mech->n[id]*mech->n[id]*mech->n[id]*mech->n[id] + mech->ik[id] = mech->gk[id]*(v-mech->ek[id]) + mech->il[id] = mech->gl[id]*(v-mech->el[id]) + current = current+mech->il[id] + current = current+mech->ina[id] + current = current+mech->ik[id] + g = current + } + dina = mech->ina[id] + dik = mech->ik[id] + v = v_org + { + current = 0 + mech->gna[id] = mech->gnabar[id]*mech->m[id]*mech->m[id]*mech->m[id]*mech->h[id] + mech->ina[id] = mech->gna[id]*(v-mech->ena[id]) + mech->gk[id] = mech->gkbar[id]*mech->n[id]*mech->n[id]*mech->n[id]*mech->n[id] + mech->ik[id] = mech->gk[id]*(v-mech->ek[id]) + mech->il[id] = mech->gl[id]*(v-mech->el[id]) + current = current+mech->il[id] + current = current+mech->ina[id] + current = current+mech->ik[id] + rhs = current + } + g = (g-rhs)/0.001 + mech->ion_dinadv[ion_dinadv_id] = mech->ion_dinadv[ion_dinadv_id]+(dina-mech->ina[id])/0.001 + mech->ion_dikdv[ion_dikdv_id] = mech->ion_dikdv[ion_dikdv_id]+(dik-mech->ik[id])/0.001 + mech->ion_ina[ion_ina_id] += mech->ina[id] + mech->ion_ik[ion_ik_id] += mech->ik[id] + mech->vec_rhs[node_id] = mech->vec_rhs[node_id]-rhs + mech->vec_d[node_id] = mech->vec_d[node_id]+g + } + })"; + + THEN("codegen functions are constructed correctly for density channel") { + codegen::Platform simd_platform(/*use_single_precision=*/false, + /*instruction_width=*/1); + auto result = run_llvm_visitor_helper(nmodl_text, + simd_platform, + {ast::AstNodeType::CODEGEN_FUNCTION}); + REQUIRE(result.size() == 2); + + auto cur_function = reindent_text(to_nmodl(result[0])); + REQUIRE(cur_function == reindent_text(expected_cur_function)); + + auto state_function = reindent_text(to_nmodl(result[1])); + REQUIRE(state_function == reindent_text(expected_state_function)); + } + } +} + +SCENARIO("Synapse: Derivative and breakpoint block llvm transformations", + "[visitor][llvm_helper][derivative]") { + GIVEN("A exp2syn.mod file with derivative and breakpoint block") { + // note that USEION statement is added just for better code coverage (ionic current) + std::string nmodl_text = R"( + NEURON { + POINT_PROCESS Exp2Syn + USEION na READ ena WRITE ina + RANGE tau1, tau2, e, i + NONSPECIFIC_CURRENT i + RANGE g_var, gna + } + + UNITS { + (nA) = (nanoamp) + (mV) = (millivolt) + (uS) = (microsiemens) + } + + PARAMETER { + tau1 = 0.1 (ms) <1e-9,1e9> + tau2 = 10 (ms) <1e-9,1e9> + e = 0 (mV) + } + + ASSIGNED { + v (mV) + i (nA) + g_var (uS) + gna (S/cm2) + factor + } + + STATE { + A (uS) + B (uS) + } + + INITIAL { + LOCAL tp + IF (tau1/tau2>0.9999) { + tau1 = 0.9999*tau2 + } + IF (tau1/tau2<1e-9) { + tau1 = tau2*1e-9 + } + A = 0 + B = 0 + tp = (tau1*tau2)/(tau2-tau1)*log(tau2/tau1) + factor = -exp(-tp/tau1)+exp(-tp/tau2) + factor = 1/factor + } + + BREAKPOINT { + SOLVE state METHOD cnexp + ina = gna*(v-ena) + g_var = B-A + i = g_var*(v-e) + } + + DERIVATIVE state { + A' = -A/tau1 + B' = -B/tau2 + } + + NET_RECEIVE (weight(uS)) { + A = A+weight*factor + B = B+weight*factor + })"; + + std::string expected_cur_function = R"( + VOID nrn_cur_exp2syn(INSTANCE_STRUCT *mech){ + INTEGER id + INTEGER node_id, ena_id, node_area_id, ion_dinadv_id, ion_ina_id + DOUBLE v, g, rhs, v_org, current, dina, mfactor + for(id = 0; idnode_count; id = id+1) { + node_id = mech->node_index[id] + ena_id = mech->ion_ena_index[id] + node_area_id = mech->node_area_index[id] + ion_dinadv_id = mech->ion_dinadv_index[id] + ion_ina_id = mech->ion_ina_index[id] + v = mech->voltage[node_id] + mech->ena[id] = mech->ion_ena[ena_id] + v_org = v + v = v+0.001 + { + current = 0 + mech->ina[id] = mech->gna[id]*(v-mech->ena[id]) + mech->g_var[id] = mech->B[id]-mech->A[id] + mech->i[id] = mech->g_var[id]*(v-mech->e[id]) + current = current+mech->i[id] + current = current+mech->ina[id] + g = current + } + dina = mech->ina[id] + v = v_org + { + current = 0 + mech->ina[id] = mech->gna[id]*(v-mech->ena[id]) + mech->g_var[id] = mech->B[id]-mech->A[id] + mech->i[id] = mech->g_var[id]*(v-mech->e[id]) + current = current+mech->i[id] + current = current+mech->ina[id] + rhs = current + } + g = (g-rhs)/0.001 + mech->ion_dinadv[ion_dinadv_id] = mech->ion_dinadv[ion_dinadv_id]+(dina-mech->ina[id])/0.001*1.e2/mech->node_area[node_area_id] + mech->ion_ina[ion_ina_id] += mech->ina[id]*(1.e2/mech->node_area[node_area_id]) + mfactor = 1.e2/mech->node_area[node_area_id] + g = g*mfactor + rhs = rhs*mfactor + mech->vec_rhs[node_id] = mech->vec_rhs[node_id]-rhs + mech->vec_d[node_id] = mech->vec_d[node_id]+g + } + })"; + + std::string expected_state_function = R"( + VOID nrn_state_exp2syn(INSTANCE_STRUCT *mech){ + INTEGER id + INTEGER node_id, ena_id + DOUBLE v + for(id = 0; idnode_count; id = id+1) { + node_id = mech->node_index[id] + ena_id = mech->ion_ena_index[id] + v = mech->voltage[node_id] + mech->ena[id] = mech->ion_ena[ena_id] + mech->A[id] = mech->A[id]+(1.0-exp(mech->dt*((-1.0)/mech->tau1[id])))*(-(0.0)/((-1.0)/mech->tau1[id])-mech->A[id]) + mech->B[id] = mech->B[id]+(1.0-exp(mech->dt*((-1.0)/mech->tau2[id])))*(-(0.0)/((-1.0)/mech->tau2[id])-mech->B[id]) + } + })"; + + THEN("codegen functions are constructed correctly for synapse") { + codegen::Platform simd_platform(/*use_single_precision=*/false, + /*instruction_width=*/1); + auto result = run_llvm_visitor_helper(nmodl_text, + simd_platform, + {ast::AstNodeType::CODEGEN_FUNCTION}); + REQUIRE(result.size() == 2); + + auto cur_function = reindent_text(to_nmodl(result[0])); + REQUIRE(cur_function == reindent_text(expected_cur_function)); + + auto state_function = reindent_text(to_nmodl(result[1])); + REQUIRE(state_function == reindent_text(expected_state_function)); + } + } +}