compat.yml

states:
  fullok:
    symbol: '<svg height="11.92" overflow="visible" version="1.1" width="11.92"><g transform="translate(0,11.92) matrix(1 0 0 -1 0 0) translate(5.96,0) translate(0,5.96)" fill="#B9D25F" stroke="#000000" stroke-width="0.4pt" color="#000000"><path d="M 0 0 M 5.96 0 C 5.96 3.29 3.29 5.96 0 5.96 C -3.29 5.96 -5.96 3.29 -5.96 0 C -5.96 -3.29 -3.29 -5.96 0 -5.96 C 3.29 -5.96 5.96 -3.29 5.96 0 Z M 0 0" style="stroke:none"></path></g></svg>'
    description: Full vendor support
  indirectok:
    symbol: '<svg height="7.92" overflow="visible" version="1.1" width="15.85"><g transform="translate(0,7.92) matrix(1 0 0 -1 0 0) translate(7.92,0) translate(0,4.75)" fill="#D3C65D" stroke="#000000" stroke-width="0.4pt" color="#000000"><path d="M -7.92 3.17 C -7.92 -1.21 -4.38 -4.75 0 -4.75 C 4.38 -4.75 7.92 -1.21 7.92 3.17 Z" style="stroke:none"></path></g></svg>'
    description: Indirect, but comprehensive support, by vendor
  prettyok:
    symbol: '<svg height="12.64" overflow="visible" version="1.1" width="12.64"><g transform="translate(0,12.64) matrix(1 0 0 -1 0 0) translate(6.32,0) translate(0,6.32)" fill="#FBBC6A" stroke="#000000" stroke-width="0.4pt" color="#000000"><path d="M 6.32 6.32 L -6.32 6.32 L -6.32 -6.32 L 6.32 -6.32 Z" style="stroke:none"></path></g></svg>'
    description: Vendor support, but not (yet) entirely comprehensive
  nonvendorok:
    symbol: '<svg height="13.4" overflow="visible" version="1.1" width="15.48"><g transform="translate(0,13.4) matrix(1 0 0 -1 0 0) translate(7.74,0) translate(0,4.47)" fill="#C7DB7F" stroke="#000000" stroke-width="0.4pt" color="#000000"><path d="M 0 8.94 L -7.74 -4.47 L 7.74 -4.47 Z" style="stroke:none"></path></g></svg>'
    description: Comprehensive support, but not by vendor
  somesupport:
    symbol: '<svg height="16.17" overflow="visible" version="1.1" width="17"><g transform="translate(0,16.17) matrix(1 0 0 -1 0 0) translate(8.5,0) translate(0,7.23)" fill="#F38966" stroke="#000000" stroke-width="0.4pt" color="#000000"><path d="M 0 8.94 L -2.45 3.37 L -8.5 2.76 L -3.97 -1.29 L -5.25 -7.23 L 0 -4.17 L 5.25 -7.23 L 3.97 -1.29 L 8.5 2.76 L 2.45 3.37 Z" style="stroke:none"></path></g></svg>'
    description: Limited, probably indirect support -- but at least some
  nope:
    symbol: '<svg height="9.45" overflow="visible" version="1.1" width="9.45"><g transform="translate(0,9.45) matrix(1 0 0 -1 0 0) translate(0.55,0) translate(0,0.55)" fill="#000000" stroke="#EB5F73" stroke-width="0.8pt" color="#000000"><path d="M 0 0 L 8.34 8.34" style="fill:none"></path></g></svg>'
    description: No direct support available, but of course one could ISO-C-bind your way through it or directly link the libraries
  C:
    symbol: C
    description: C++ (sometimes also C)
  F:
    symbol: F
    description: Fortran
vendors:
  NVIDIA:
    CUDA:
      C:
        cudac: fullok
      F:
        cudafortran: fullok
    HIP:
      C:
        nvidiahip: indirectok
      F:
        nvidiahipfortran: 
          - somesupport
          - nope
    SYCL:
      C:
        nvidiasycl: nonvendorok
      F: 
        syclfortran: nope
    OpenACC:
      C:
        openaccc: fullok
      F:
        openaccfortran: fullok
    OpenMP:
      C:
        nvidiaopenmpc: 
          - prettyok
          - nonvendorok
      F:
        nvidiaopenmpfortran: 
          - prettyok
          - fullok
    Standard:
      C:
        nvidiastandardc: fullok
      F:
        nvidiastandardfortran: fullok
    Kokkos:
      C:
        nvidiakokkosc: nonvendorok
      F:
        nvidiakokkosfortran: somesupport
    ALPAKA:
      C:
        nvidiaalpakac: nonvendorok
      F:
        nvidiaalpakafortran: nope
    etc:
      Python:
        nvidiapython: 
          - fullok
          - nonvendorok
  AMD:
    CUDA:
      C:
        amdcudac: indirectok        
      F:
        amdcudafortran: somesupport
    HIP:
      C:
        amdhipc: fullok 
      F:
        nvidiahipfortran: 
          - somesupport
          - nope
    SYCL:
      C:
        amdsyclc: nonvendorok
      F:
        syclfortran: nope
    OpenACC:
      C:
        amdopenaccc: nonvendorok
      F:
        amdopenaccfortran:
          - nonvendorok
          - somesupport
    OpenMP:
      C:
        amdopenmpc: 
          - fullok
          - nonvendorok
      F:
        amdopenmpfortran: fullok
    Standard:
      C:
        amdstandardc: 
          - nonvendorok
          - prettyok
          - somesupport
      F:
        amdstandardfortran: nope
    Kokkos:
      C:
        amdkokkosc: nonvendorok
      F:
        nvidiakokkosfortran: somesupport
    ALPAKA:
      C:
        amdalpakac: nonvendorok
      F:
        nvidiaalpakafortran: nope
    etc:
      Python:
        amdpython: somesupport
  Intel:
    CUDA:
      C:
        intelcudac: 
          - indirectok
          - nonvendorok
      F:
        intelcudafortran: nope
    HIP:
      C:
        intelhipc: nonvendorok
      F:
        intelhipfortran: nope
    SYCL:
      C:
        intelsyclc: fullok
      F:
        syclfortran: nope
    OpenACC:
      C:
        intelopenaccc: somesupport
      F:
        intelopenaccfortran: somesupport
    OpenMP:
      C:
        intelopenmpc: fullok
      F:
        intelopenmpfortran: fullok
    Standard:
      C:
        intelstandardc: 
          - fullok
          - prettyok
      F:
        intelstandardfortran: fullok
    Kokkos:
      C:
        intelkokkosc: nonvendorok
      F:
        nvidiakokkosfortran: somesupport
    ALPAKA:
      C:
        intelalpakac: nonvendorok
      F:
        nvidiaalpakafortran: nope
    etc:
      Python:
        intelpython: prettyok
descriptions:
  cudac: "CUDA C/C++ is supported on NVIDIA GPUs through the <a href='https://developer.nvidia.com/cuda-toolkit'>CUDA Toolkit</a>. First released in 2007, the toolkit covers nearly all aspects of the NVIDIA platform: an API for programming (incl. language extensions), libraries, tools for profiling and debugging, compiler, management tools, and more. The current version is CUDA 12.2. Usually, when referring to <em>CUDA</em> without any additional context, the CUDA API is meant. While incorporating some Open Source components, the CUDA platform in its entirety is proprietary and closed sourced. The low-level CUDA instruction set architecture is PTX, to which higher languages like the CUDA C/C++ are translated to. PTX is compiled to SASS, the binary code executed on the device. As it is the reference for platform, the support for NVIDIA GPUs through CUDA C/C++ is very comprehensive. In addition to support through the CUDA toolkit, NVIDIA GPUs can also be <a href='https://llvm.org/docs/CompileCudaWithLLVM.html'>used by Clang</a>, utilizing the LLVM toolchain to emit PTX code and compile it subsequently."
  cudafortran: "CUDA Fortran, a proprietary Fortran extension by NVIDIA, is supported on NVIDIA GPUs via the <a href='https://developer.nvidia.com/hpc-sdk'>NVIDIA HPC SDK</a> (<em>NVHPC</em>). NVHPC implements most features of the CUDA API in Fortran and is activated through the <code>-cuda</code> switch in the <code>nvfortran</code> compiler. The CUDA extensions for Fortran are modeled closely after the CUDA C/C++ definitions. In addition to creating explicit kernels in Fortran, CUDA Fortran also supports <em>cuf kernels</em>, a way to let the compiler generate GPU parallel code automatically. Very recently, <a href='https://reviews.llvm.org/D150159'>CUDA Fortran support was also merged into Flang</a>, the LLVM-based Fortran compiler."
  nvidiahip: "<a href='https://github.com/ROCm-Developer-Tools/HIP'>HIP</a> programs can directly use NVIDIA GPUs via a CUDA backend. As HIP is strongly inspired by CUDA, the mapping is relatively straight-forward; API calls are named similarly (for example: <code>hipMalloc()</code> instead of <code>cudaMalloc()</code>) and keywords of the kernel syntax are identical. HIP also supports some CUDA libraries and creates interfaces to them (like <code>hipblasSaxpy()</code> instead of <code>cublasSaxpy()</code>). To target NVIDIA GPUs through the HIP compiler (<code>hipcc</code>), <code>HIP_PLATFORM=nvidia</code> needs to be set in the environment. In order to initially create a HIP code from CUDA, AMD offers the <a href='https://github.com/ROCm-Developer-Tools/HIPIFY'>HIPIFY</a> conversion tool."
  nvidiahipfortran: "No Fortran version of HIP exists; HIP is solely a C/C++ model. But AMD offers an extensive set of ready-made interfaces to the HIP API and HIP and ROCm libraries with <a href='https://github.com/ROCmSoftwarePlatform/hipfort'>hipfort</a> (MIT-licensed). All interfaces implement C functionality and CUDA-like Fortran extensions, for example to write kernels, are available."
  nvidiasycl: "No direct support for <a href=\"https://www.khronos.org/sycl/\">SYCL</a> is available by NVIDIA, but SYCL can be used on NVIDIA GPUs through multiple venues. First, SYCL can be <a href=\"https://github.com/intel/llvm/blob/sycl/sycl/doc/GetStartedGuide.md#build-dpc-toolchain-with-support-for-nvidia-cuda\">used through DPC++</a>, an Open-Source LLVM-based compiler project <a href=\"https://github.com/intel/llvm\">led by Intel</a>. The DPC++ infrastructure is also available through Intel's commercial <a href=\"https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html\">oneAPI toolkit</a> (<em>Intel oneAPI DPC++/C++</em>) as <a href=\"https://developer.codeplay.com/products/oneapi/nvidia/2023.2.1/guides/get-started-guide-nvidia\">a dedicated plugin</a>. Upstreaming SYCL support directly into LLVM is an <a href=\"https://github.com/intel/llvm/issues/49\">ongoing effort</a>, which started <a href=\"https://lists.llvm.org/pipermail/cfe-dev/2019-January/060811.html\">in 2019</a>. Further, SYCL can be used via <a href=\"https://github.com/OpenSYCL/OpenSYCL/\">Open SYCL</a> (previously called hipSYCL), an independently developed SYCL implementation, using NVIDIA GPUs either through the CUDA support of LLVM or the <code>nvc++</code> compiler of NVHPC. A third popular possibility was the NVIDIA GPU support in <a href=\"https://github.com/codeplaysoftware/sycl-for-cuda/tree/cuda\">ComputeCpp of CodePlay</a>; though <a href=\"https://developer.codeplay.com/products/computecpp/ce/home/\">the product became unsupported in September 2023</a>. In case LLVM is involved, SYCL implementations can rely on CUDA support in LLVM, which needs the CUDA toolkit available for the final compilations parts beyond PTX. In order to translate a CUDA code to SYCL, Intel offers the <a href=\"https://github.com/oneapi-src/SYCLomatic\">SYCLomatic</a> conversion tool."
  syclfortran: 'SYCL is a C++-based programming model (C++17) and by its nature does not support Fortran. Also, no pre-made bindings are available.'
  openaccc: 'OpenACC C/C++ on NVIDIA GPUs is supported most extensively through the <a href="https://developer.nvidia.com/hpc-sdk">NVIDIA HPC SDK</a>. Beyond the bundled libraries, frameworks, and other models, the NVIDIA HPC SDK also features the <code>nvc</code>/<code>nvc++</code> compilers, in which <a href="https://docs.nvidia.com/hpc-sdk/compilers/hpc-compilers-user-guide/index.html#acc-use">OpenACC support</a> can be enabled with the <code>-acc -gpu</code>. The support of OpenACC in this vendor-delivered compiler is very comprehensive, it conforms to version 2.7 of the specification. A variety of compile options are available to modify the compilation process. In addition to NVIDIA HPC SDK, good support is also available in GCC since GCC 5.0, <a href="https://gcc.gnu.org/wiki/OpenACC">supporting OpenACC 2.6</a> through the <code>nvptx</code> architecture. The compiler switch to enable OpenACC in <code>gcc</code>/<code>g++</code> is <code>-fopenacc</code>, further options are available. Further, the <a href="https://csmd.ornl.gov/project/clacc">Clacc compiler</a> implements OpenACC support into the LLVM toolchain, adapting the Clang frontend. As a central design aspect, it translates OpenACC to OpenMP as part of the compilation process. OpenACC can be activated in a Clacc-<code>clang</code> via <code>-fopenacc</code>, and further compiler options exist, mostly leveraging OpenMP options. A recent study by <a href="https://ieeexplore.ieee.org/document/10029456">Jarmusch et al.</a> compared these compilers for coverage of the OpenACC 3.0 specification.'
  openaccfortran: 'Support of OpenACC Fortran on NVIDIA GPUs is similar to OpenACC C/C++, albeit not identical. First, <a href="https://developer.nvidia.com/hpc-sdk">NVIDIA HPC SDK</a> supports OpenACC in Fortran through the included <code>nvfortran</code> compiler, with options like for the C/C++ compilers. In addition, also <a href="https://gcc.gnu.org/wiki/OpenACC">GCC supports OpenACC</a> through the <code>gfortran</code> compiler with identical compiler options to the C/C++ compilers. Further, similar to OpenACC support in LLVM for C/C++ through <em>Clacc</em> contributions, the LLVM frontend for Fortran, <a href="https://flang.llvm.org/docs/">Flang</a> (the successor of <em>F18</em>, not <em>classic Flang</em>), <a href="https://flang.llvm.org/docs/OpenACC.html">supports OpenACC</a> as well. Support was initially contributed through the <a href="https://ieeexplore.ieee.org/document/9651310">Flacc project</a> and now resides in the main LLVM project. Finally, the <a href="https://www.hpe.com/psnow/doc/a50002303enw">HPE Cray Programming Environment</a> supports <a href="https://cpe.ext.hpe.com/docs/cce/man7/intro_openacc.7.html">OpenACC Fortran</a>; in <code>ftn</code>, OpenACC can be enabled through <code>-hacc</code>.'
  nvidiaopenmpc: 'OpenMP in C/C++ is supported on NVIDIA GPUs (<em>Offloading</em>) through multiple venues, similarly to OpenACC. First, the NVIDIA HPC SDK supports <a href="https://docs.nvidia.com/hpc-sdk/compilers/hpc-compilers-user-guide/index.html#openmp-use">OpenMP GPU offloading</a> in both <code>nvc</code> and <code>nvc++</code>, albeit only a subset of the entire OpenMP 5.0 standard (see <a href="https://docs.nvidia.com/hpc-sdk/compilers/hpc-compilers-user-guide/index.html#openmp-subset">the documentation for supported/unsupported features</a>). The key compiler option is <code>-mp</code>. Also in GCC, <a href="https://gcc.gnu.org/wiki/Offloading">OpenMP offloading</a> can be used to NVIDIA GPUs; the compiler switch is <code>-fopenmp</code>, with options delivered through <a href="https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html#index-foffload"><code>-foffload</code> and <code>-foffload-options</code></a>. GCC <a href="https://gcc.gnu.org/onlinedocs/gcc-13.1.0/libgomp/OpenMP-Implementation-Status.html">currently supports OpenMP 4.5 entirely</a>, while OpenMP features of 5.0, 5.1, and, 5.2 are currently being implemented. Similarly in Clang, where <a href="https://clang.llvm.org/docs/OffloadingDesign.html">OpenMP offloading to NVIDIA GPUs</a> is supported and enabled through <code>-fopenmp -fopenmp-targets=nvptx64</code>, with offload architectures selected via <code>--offload-arch=native</code> (or similar). Clang implements <a href="https://clang.llvm.org/docs/OpenMPSupport.html#openmp-implementation-details">nearly all OpenMP 5.0 features and most of OpenMP 5.1/5.2</a>. In the HPE Cray Programming Environment, a <a href="https://cpe.ext.hpe.com/docs/cce/man7/intro_openmp.7.html">subset of OpenMP 5.0/5.1 is supported</a> for NVIDIA GPUs. It can be activated through <code>-fopenmp</code>. Also <a href="https://github.com/ROCm-Developer-Tools/aomp/">AOMP</a>, AMD''s Clang/LLVM-based compiler, supports NVIDIA GPUs. Support of OpenMP features in the compilers was recently discussed in the <a href="https://www.openmp.org/wp-content/uploads/2022_ECP_Community_BoF_Days-OpenMP_RoadMap_BoF.pdf">OpenMP ECP BoF 2022</a>.'
  nvidiaopenmpfortran: 'OpenMP in Fortran is supported on NVIDIA GPUs nearly identical to C/C++. <a href="https://docs.nvidia.com/hpc-sdk/compilers/hpc-compilers-user-guide/index.html#openmp-use">NVIDIA HPC SDK''s <code>nvfortran</code></a> implements support, <a href="https://gcc.gnu.org/wiki/openmp">GCC''s <code>gfortran</code></a>, <a href="https://flang.llvm.org/docs/">LLVM''s Flang</a> (through <code>-mp</code>, and <a href="https://flang.llvm.org/docs/GettingStarted.html#openmp-target-offload-build">only when Flang is compiled via Clang</a>), and also the <a href="https://cpe.ext.hpe.com/docs/cce/man7/intro_openmp.7.html">HPE Cray Programming Environment</a>.'
  nvidiastandardc: 'Standard language parallelism of C++, namely algorithms and data structures of the <em>parallel STL</em>, is supported on NVIDIA GPUs <a href="https://docs.nvidia.com/hpc-sdk/compilers/c++-parallel-algorithms/index.html">through the <code>nvc++</code> compiler of the NVIDIA HPC SDK</a>. The key compiler option is <code>-stdpar=gpu</code>, which enables offloading of parallel algorithms to the GPU. Also, currently Open SYCL <a href="https://github.com/OpenSYCL/OpenSYCL/pull/1088">is in the process of implementing support for pSTL algorithms</a>, enabled via <code>--hipsycl-stdpar</code>. Further, <a href="https://intel.github.io/llvm-docs/GetStartedGuide.html#build-dpc-toolchain-with-support-for-nvidia-cuda">NVIDIA GPUs can be targeted from Intel''s DPC++ compiler</a>, enabling usage of pSTL algorithms implemented in Intel''s Open Source <a href="https://github.com/oneapi-src/oneDPL">oneDPL</a> (<em>oneAPI DPC++ Library</em>) on NVIDIA GPUs. Finally, a <a href="https://discourse.llvm.org/t/rfc-openmp-offloading-backend-for-c-parallel-algorithms/73468">current proposal in the LLVM community</a> aims at implementing pSTL support through an OpenMP backend.'
  nvidiastandardfortran: 'Standard language parallelism of Fortran, mainly <code>do concurrent</code>, is supported on NVIDIA GPUs <a href="https://developer.nvidia.com/blog/accelerating-fortran-do-concurrent-with-gpus-and-the-nvidia-hpc-sdk/">through the <code>nvfortran</code> compiler of the NVIDIA HPC SDK</a>. As for the C++ case, it is enabled through the <code>-stdpar=gpu</code> compiler option.' # also: gfortran at summer of code, but nothing seems to have come out of it: https://gcc.gnu.org/wiki/SummerOfCode?action=recall&rev=242
  nvidiakokkosc: '<a href="https://github.com/kokkos/kokkos">Kokkos</a> supports NVIDIA GPUs in C++. Kokkos has <a href="https://kokkos.github.io/kokkos-core-wiki/requirements.html">multiple backends</a> available with NVIDIA GPU support: a native CUDA C/C++ backend (using <code>nvcc</code>), an NVIDIA HPC SDK backend (using CUDA support in <code>nvc++</code>), and a Clang backend, using either Clang''s CUDA support directly or <a href="https://docs.nersc.gov/development/programming-models/kokkos/">via the OpenMP offloading facilities</a> (via <code>clang++</code>).'
  nvidiakokkosfortran: 'Kokkos is a C++ programming model, but an official compatibility layer for Fortran (<a href="https://github.com/kokkos/kokkos-fortran-interop"><em>Fortran Language Compatibility Layer</em>, FLCL</a>) is available. Through this layer, GPUs can be used as supported by Kokkos C++.'
  nvidiaalpakac: '<a href="https://github.com/alpaka-group/alpaka">Alpaka</a> supports NVIDIA GPUs in C++ (C++17), either through the NVIDIA CUDA C/C++ compiler <code>nvcc</code> or LLVM/Clang''s support of CUDA in <code>clang++</code>.'
  nvidiaalpakafortran: 'Alpaka is a C++ programming model and no ready-made Fortran support exists.'
  nvidiapython: 'Using NVIDIA GPUs from Python code can be achieved through multiple venues. NVIDIA itself offers <a href="https://github.com/NVIDIA/cuda-python">CUDA Python</a>, a package delivering low-level interfaces to CUDA C/C++. Typically, code is not directly written using CUDA Python, but rather CUDA Python functions as a backend for higher level models. CUDA Python is available on PyPI as <a href="https://pypi.org/project/cuda-python/"><code>cuda-python</code></a>. An alternative to CUDA Python from the community is <a href="https://github.com/inducer/pycuda">PyCUDA</a>, which adds some higher-level features and functionality and comes with its own C++ base layer. PyCUDA is available on PyPI as <a href="https://pypi.org/project/pycuda/"><code>pycuda</code></a>. The most well-known, higher-level abstraction is <a href="https://cupy.dev/">CuPy</a>, which implements primitives known from Numpy with GPU support, offers functionality for defining custom kernels, and bindings to libraries. CuPy is available on PyPI as <a href="https://pypi.org/project/cupy-cuda12x/"><code>cupy-cuda12x</code></a> (for CUDA 12.x). Two packages arguably providing even higher abstractions are Numba and CuNumeric. <a href="http://numba.pydata.org/">Numba</a> offers access to NVIDIA GPUs and features  acceleration of functions through Python decorators (<em>functions wrapping functions</em>); it is available as <a href="https://pypi.org/project/numba/"><code>numba</code></a> on PyPI. <a href="https://github.com/nv-legate/cunumeric">cuNumeric</a>, a project by NVIDIA, allows to access the GPU via Numpy-inspired functions (like CuPy), but utilizes the <a href="https://github.com/nv-legate/legate.core">Legate library</a> to transparently scale to multiple GPUs.'
  amdcudac: 'While CUDA is not directly supported on AMD GPUs, it can be translated to HIP through AMD''s <a href="https://github.com/ROCm-Developer-Tools/HIPIFY">HIPIFY</a>. Using <code>hipcc</code> and <code>HIP_PLATFORM=amd</code> in the environment, CUDA-to-HIP-translated code can be executed.'
  amdcudafortran: 'No direct support for CUDA Fortran on AMD GPUs is available, but AMD offers a source-to-source translator, <a href="https://github.com/ROCmSoftwarePlatform/gpufort">GPUFORT</a>, to convert some CUDA Fortran to either Fortran with OpenMP (via <a href="https://github.com/ROCm-Developer-Tools/aomp">AOMP</a>) or Fortran with HIP bindings and extracted C kernels (via <a href="https://github.com/ROCmSoftwarePlatform/hipfort">hipfort</a>). As stated in the project repository, the covered functionality is <a href="https://github.com/ROCmSoftwarePlatform/gpufort#limitations">driven by use-case requirements</a>; the last commit is two years old.'
  amdhipc: '<a href="https://github.com/ROCm-Developer-Tools/HIP">HIP</a> C++ is the <em>native</em> programming model for AMD GPUs and, as such, fully supports the devices. It is part of AMD''s GPU-targeted <a href="https://rocm.docs.amd.com/en/latest/">ROCm platform</a>, which includes compilers, libraries, tool, and drivers and mostly consists of Open Source Software. HIP code can be compiled with <a href="https://github.com/ROCm-Developer-Tools/HIPCC"><code>hipcc</code></a>, utilizing the correct environment variables (like <code>HIP_PLATFORM=amd</code>) and compiler options (like <code>--offload-arch=gfx90a</code>). <code>hipcc</code> is a <em>compiler driver</em> (wrapper script) which assembles the correct compilation string, finally calling <a href="https://github.com/RadeonOpenCompute/llvm-project">AMD''s Clang compiler</a> to generate host/device code (using the <a href="https://llvm.org/docs/AMDGPUUsage.html">AMDGPU backend</a>).'
  amdsyclc: 'No direct support for SYCL is available by AMD for their GPU devices. But like for the NVIDIA ecosystem, SYCL C++ can be used on AMD GPUs through third-party software. First, <a href="https://github.com/OpenSYCL/OpenSYCL">Open SYCL</a> (previously <em>hipSYCL</em>) supports AMD GPUs, relying on HIP/ROCm support in Clang. All available <a href="https://github.com/OpenSYCL/OpenSYCL/blob/develop/doc/compilation.md">internal compilation models</a> can target AMD GPUs. Second, also AMD GPUs can be targeted through both <a href="https://github.com/intel/llvm/blob/sycl/sycl/doc/GetStartedGuide.md#build-dpc-toolchain-with-support-for-hip-amd">DPC++</a>, Intel''s LLVM-based Open Source compiler, and the commercial version included in the <a href="https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html">oneAPI toolkit</a> (via an <a href="https://developer.codeplay.com/products/oneapi/amd/2023.2.1/guides/get-started-guide-amd">AMD ROCm plugin</a>). In comparison to SYCL support for CUDA, no conversion tool like SYCLomatic exists.'
  amdopenaccc: 'OpenACC C/C++ is not supported by AMD itself, but third-party support is available for AMD GPUs through GCC or Clacc (similarly to their support of OpenACC C/C++ for NVIDI GPUS). In <a href="https://gcc.gnu.org/wiki/Offloading">GCC, OpenACC support</a> can be activated through <code>-fopenacc</code>, and further specified for AMD GPUs with, for example, <code>-foffload=amdgcn-amdhsa="-march=gfx906"</code>. <a href="https://csmd.ornl.gov/project/clacc">Clacc also supports OpenACC C/C++ on AMD GPUs</a> by translating OpenACC to OpenMP and using LLVM''s AMD support. The enabling compiler switch is <code>-fopenacc</code>, and AMD GPU targets can be further specified by, for example, <code>-fopenmp-targets=amdgcn-amd-amdhsa</code>. <a href=\"https://github.com/intel/intel-application-migration-tool-for-openacc-to-openmp\">Intel''s OpenACC to OpenMP source-to-source translator</a> can also be used for AMD''s platform.'
  amdopenaccfortran: 'No native support for OpenACC on AMD GPUs for Fortran is available, but AMD supplies <a href="https://github.com/ROCmSoftwarePlatform/gpufort">GPUFORT</a>, a research project to source-to-source translate OpenACC Fortran to either Fortran with added OpenMP or Fortran with HIP bindings and extracted C kernels (using <a href="https://github.com/ROCmSoftwarePlatform/hipfort">hipfort</a>). The covered functionality of GPUFORT is driven by use-case requirements, the last commit is two years old. Support for OpenACC Fortran is also available by the community through <a href="https://gcc.gnu.org/onlinedocs/gfortran/OpenACC.html">GCC (<code>gfortran</code>)</a> and upcoming in <a href="https://ieeexplore.ieee.org/document/9651310">LLVM (Flacc)</a>. Also the <a href="https://cpe.ext.hpe.com/docs/cce/man7/intro_openacc.7.html">HPE Cray Programming Environment supports OpenACC Fortran</a> on AMD GPUs. In addition, the <a href="https://github.com/intel/intel-application-migration-tool-for-openacc-to-openmp">translator tool to convert OpenACC source to OpenMP source by Intel</a> can be used.'
  amdopenmpc: 'AMD offers <a href="https://github.com/ROCm-Developer-Tools/aomp">AOMP</a>, a dedicated, Clang-based compiler for using OpenMP C/C++ on AMD GPUs (<em>offloading</em>). AOMP is usually shipped with ROCm. The compiler <a href="https://www.exascaleproject.org/wp-content/uploads/2022/02/Elwasif-ECP-sollve_vv_final.pdf">supports most OpenMP 4.5 and some OpenMP 5.0 features</a>. Since the compiler is Clang-based, the usual Clang compiler options apply (<code>-fopenmp</code> to enable OpenMP parsing, and others). Also in the upstream Clang compiler, <a href="https://clang.llvm.org/docs/OffloadingDesign.html">AMD GPUs can be targeted through OpenMP</a>; as outlined for NVIDIA GPUs, the support for OpenMP 5.0 is nearly complete, and support for OpenMP 5.1/5.2 is comprehensive. In addition, the <a href="https://cpe.ext.hpe.com/docs/cce/man7/intro_openmp.7.html">HPE Cray Programming Environment</a> supports OpenMP on AMD GPUs.'
  amdopenmpfortran: 'Through <a href="https://github.com/ROCm-Developer-Tools/aomp">AOMP</a>, AMD supports OpenMP offloading to AMD GPUs in Fortran, using the <code>flang</code> executable and Clang-typical compiler options (foremost <code>-fopenmp</code>). Support for AMD GPUs is also available through the <a href="https://cpe.ext.hpe.com/docs/cce/man7/intro_openmp.7.html">HPE Cray Programming Environment</a>.'
  amdstandardc: 'AMD does not yet provide production-grade support for Standard-language parallelism in C++ for their GPUs. Currently under development is <a href="https://github.com/ROCmSoftwarePlatform/roc-stdpar"><em>roc-stdpar</em></a> (ROCm Standard Parallelism Runtime Implementation), which aims to supply pSTL algorithms on the GPU and <a href="https://discourse.llvm.org/t/rfc-adding-c-parallel-algorithm-offload-support-to-clang-llvm/72159">merge the implementation with upstream LLVM</a>. Support for GPU-parallel algorithms is enabled with <code>-stdpar</code>. An <a href="https://discourse.llvm.org/t/rfc-openmp-offloading-backend-for-c-parallel-algorithms/73468">alternative proposal in the LLVM</a> community aims to support the pSTL via an OpenMP backend. Also Open SYCL <a href="https://github.com/OpenSYCL/OpenSYCL/pull/1088">is in the process of creating support for C++ parallel algorithms</a> via a <code>--hipsycl-stdpar</code> switch. By using Open SYCL''s backends, also AMD GPUs are supported. Intel provides the Open Source <a href="https://github.com/oneapi-src/oneDPL">oneDPL</a> (<em>oneAPI DPC++ Library</em>) which <a href="https://oneapi-src.github.io/oneDPL/parallel_api_main.html">implements pSTL algorithms</a> through the DPC++ compiler (see also <em>C++ Standard Parallelism for Intel GPUs</em>). DPC++ has <a href="https://intel.github.io/llvm-docs/GetStartedGuide.html#build-dpc-toolchain-with-support-for-hip-amd">experimental support for AMD GPUs</a>.'
  amdstandardfortran: 'There is no (known) way to launch Standard-based parallel algorithms in Fortran on AMD GPUs.'
  amdkokkosc: '<a href="https://github.com/kokkos/kokkos">Kokkos</a> supports AMD GPUs in C++ mainly through the HIP/ROCm backend. Also, an OpenMP offloading backend is available.'
  amdalpakac: '<a href="https://github.com/alpaka-group/alpaka">Alpaka</a> supports AMD GPUs in C++ through HIP or through an OpenMP backend.'
  amdpython: 'AMD does not officially support GPU programming with Python, but third-party solutions are available. <a href="https://docs.cupy.dev/en/latest/install.html#using-cupy-on-amd-gpu-experimental">CuPy</a> experimentally supports AMD GPUs/ROCm. The package can be found on PyPI as <code>cupy-rocm-5-0</code>. Numba once had <a href="https://numba.pydata.org/numba-doc/latest/roc/index.html">support for AMD GPUs</a>, but it is <a href="https://numba.readthedocs.io/en/stable/release-notes.html#version-0-54-0-19-august-2021">not maintained anymore</a>. Low-level bindings from Python to HIP exist, for example <a href="https://github.com/jatinx/PyHIP">PyHIP</a> (available as <code>pyhip-interface</code> on PyPI). Bindings to OpenCL also exist (<a href="https://documen.tician.de/pyopencl/">PyOpenCL</a>).'
  intelcudac: "Intel itself does not support CUDA C/C++ on their GPUs. They offer <a href='https://github.com/oneapi-src/SYCLomatic'>SYCLomatic</a>, though, an Open Source tool to translate CUDA code to SYCL code, allowing it to run on Intel GPUs. The commercial variant of SYCLomatic is called the <a href='https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html'>DPC++ Compatibility Tool</a> and bundled with oneAPI toolkit. The community project <a href='https://github.com/CHIP-SPV/chipStar'>chipStar</a> (previously called CHIP-SPV, recently released a 1.0 version) allows to target Intel GPUs from CUDA C/C++ code by using the CUDA support in Clang. chipStar delivers a <a href='https://github.com/CHIP-SPV/chipStar/blob/main/docs/Using.md#compiling-cuda-application-directly-with-chipstar'>Clang-wrapper, <code>cuspv</code></a>, which replaces calls to <code>nvcc</code>. Also <a href='https://github.com/vosen/ZLUDA'>ZLUDA</a> exists, which implements CUDA support for Intel GPUs; it is not maintained anymore, though."
  intelcudafortran: "No direct support exists for CUDA Fortran on Intel GPUs. A simple example to bind SYCL to a (CUDA) Fortran program (via ISO C BINDING) can be <a href='https://github.com/codeplaysoftware/SYCL-For-CUDA-Examples/tree/master/examples/fortran_interface'>found on GitHub</a>."
  intelhipc: 'No native support for HIP C++ on Intel GPUs exists. The Open Source third-party project <a href="https://github.com/CHIP-SPV/chipStar">chipStar</a> (previously called CHIP-SPV), though, supports <a href="https://github.com/CHIP-SPV/chipStar/blob/main/docs/Using.md#compiling-a-hip-application-using-chipstar">HIP on Intel GPUs</a> by mapping it to OpenCL or Intel''s Level Zero runtime. The compiler uses an LLVM-based toolchain and relies on its HIP and SPIR-V functionality.'
  intelhipfortran: "HIP for Fortran does not exist, and also no translation efforts for Intel GPUs."
  intelsyclc: '<a href="https://www.khronos.org/sycl/">SYCL</a> is a C++17-based standard and selected by Intel as the prime programming model for Intel GPUs. Intel implements SYCL support for their GPUs <a href="https://github.com/intel/llvm">via DPC++</a>, an LLVM-based compiler toolchain. Currently, Intel maintains an own fork of LLVM, but <a href="https://lists.llvm.org/pipermail/cfe-dev/2019-January/060811.html">plans to upstream the changes</a> to the main LLVM repository. Based on DPC++, Intel releases a <a href="https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html">commercial <em>Intel oneAPI DPC++</em> compiler</a> as part of the <a href="https://www.intel.com/content/www/us/en/developer/tools/oneapi/toolkits.html">oneAPI toolkit</a>. The third-party project Open SYCL also supports Intel GPUs, by leveraging/creating LLVM support (either SPIR-V or Level Zero). A previous solution for targeting Intel GPUs from SYCL was <a href="https://developer.codeplay.com/products/computecpp/ce/home/">ComputeCpp of CodePlay</a>. The project became unsupported in September 2023 (in favor of implementations to the DPC++ project).'
  intelopenaccc: 'No direct support for OpenACC C/C++ is available for Intel GPUs. Intel offers a Python-based tool to translate source files with OpenACC C/C++ to OpenMP C/C++, the <a href="https://github.com/intel/intel-application-migration-tool-for-openacc-to-openmp"><em>Application Migration Tool for OpenACC to OpenMP API</em></a>.'
  intelopenaccfortran: 'Also for OpenACC Fortran, no direct support is available for Intel GPUs. Intel''s <a href="https://github.com/intel/intel-application-migration-tool-for-openacc-to-openmp">source-to-source translation tool from OpenACC to OpenMP</a> also supports Fortran, though.'
  intelopenmpc: 'OpenMP is a second key programming model for Intel GPUs and <a href="https://www.intel.com/content/www/us/en/develop/documentation/get-started-with-cpp-fortran-compiler-openmp/top.html">well-supported by Intel</a>. For C++, the support is built into the commercial version of DPC++/C++, <em>Intel oneAPI DPC++/C++</em>. All <a href="https://www.intel.com/content/www/us/en/developer/articles/technical/openmp-features-and-extensions-supported-in-icx.html">OpenMP 4.5 and most OpenMP 5.0 and 5.1 features are supported</a>. OpenMP can be enabled through the <code>-qopenmp</code> compiler option of <code>icpx</code>; a suitable offloading target can be given via <code>-fopenmp-targets=spir64</code>.'
  intelopenmpfortran: 'OpenMP in Fortran is Intel''s main selected route to bring Fortran applications to their GPUs. OpenMP offloading in Fortran is supported through <a href="https://www.intel.com/content/www/us/en/docs/fortran-compiler/developer-guide-reference/2023-2/overview.html">Intel''s Fortran Compiler <code>ifx</code></a> (the new LLVM-based version, not the <em>Fortran Compiler Classic</em>), part of the oneAPI HPC Toolkit. Similarly to C++, OpenMP offloading can be enabled through a combination of <code>-qopenmp</code> and <code>-fopenmp-targets=spir64</code>.'
  intelstandardc: 'Intel supports C++ standard parallelism (<em>pSTL</em>) through the Open Source <a href="https://oneapi-src.github.io/oneDPL/index.html">oneDPL</a> (oneAPI DPC++ Library), also available as part of the oneAPI toolkit. It <a href="https://oneapi-src.github.io/oneDPL/parallel_api_main.html">implements the pSTL</a> on top of the DPC++ compiler, algorithms, data structures, and policies live in the <code>oneapi::dpl::</code> namespace. In addition, <a href="https://github.com/OpenSYCL/OpenSYCL/pull/1088">Open SYCL is current adding support for C++ parallel algorithms</a>, to be enabled via the <code>--hipsycl-stdpar</code> compiler option.'
  intelstandardfortran: 'Standard language parallelism of Fortran is supported by Intel on their GPUs through the Intel Fortran Compiler <code>ifx</code> (the new, LLVM-based compiler, not the <em>Classic</em> version), part of the oneAPI HPC toolkit. In the <a href="https://www.intel.com/content/www/us/en/developer/articles/release-notes/fortran-compiler-release-notes.html">oneAPI update 2022.1</a>, the <a href="https://www.intel.com/content/www/us/en/docs/fortran-compiler/developer-guide-reference/2023-2/do-concurrent.html"><code>do concurrent</code> support</a> was added and extended in further releases. It can be used via the <code>-qopenmp</code> compiler option together with <code>-fopenmp-target-do-concurrent</code> and <code>-fopenmp-targets=spir64</code>.'
  intelkokkosc: 'No direct support by Intel for Kokkos is available, but <a href="https://kokkos.github.io/kokkos-core-wiki/">Kokkos</a> supports Intel GPUs through an experimental SYCL backend.'
  intelalpakac: 'Since <a href="https://github.com/alpaka-group/alpaka/releases/tag/0.9.0">v.0.9.0</a>, <a href="https://github.com/alpaka-group/alpaka">Alpaka</a> contains experimental SYCL support with which Intel GPUs can be targeted. Also, Alpaka can fall back to an OpenMP backend.'
  intelpython: 'Intel GPUs can be used from Python through three notable packages. First, Intel''s <a href="https://github.com/IntelPython/dpctl"><em>Data Parallel Control</em> (dpctl)</a> implements low-level Python bindings to SYCL functionality. It is available on PyPI as <a href="https://pypi.org/project/dpctl/"><code>dpctl</code></a>. Second, a higher level, Intel''s <a href="https://github.com/IntelPython/numba-dpex"><em>Data-parallel Extension to Numba</em> (numba-dpex)</a> supplies an extension to the JIT functionality of Numba to support Intel GPUs. It is available from Anaconda as <a href="https://anaconda.org/intel/numba-dpex"><code>numba-dpex</code></a>. Finally, and arguably highest level, Intel''s <a href="https://github.com/IntelPython/dpnp"><em>Data Parallel Extension for Numpy</em> (dpnp)</a> builds up on the Numpy API and extends some functions with Intel GPU support. It is available on PyPI as <a href="https://pypi.org/project/dpnp/"><code>dpnp</code></a>, although latest versions appear to be available <a href="https://github.com/IntelPython/dpnp/releases">only on GitHub</a>.'
references:
  cudac: CUDA
  cudafortran: CUDAFortran
  nvidiahip: HIP
  nvidiahipfortran: hipfort
  nvidiasycl: intelllvm,opensyclproceedings
  syclfortran: khronossycl
  openaccc: nvhpc,gccopenacc,claccieee,jarmusch22
  openaccfortran: nvhpc,gccopenacc,flaccieee
  nvidiaopenmpc: nvhpc,gccopenmp,clangopenmp,hpepe
  nvidiaopenmpfortran: nvhpc,gccopenmp,hpepe,flang
  nvidiastandardc: nvhpc,opensyclproceedings,onedpl
  nvidiastandardfortran: nvhpc
  nvidiakokkosc: kokkos
  nvidiakokkosfortran: kokkos
  nvidiaalpakac: alpaka
  nvidiaalpakafortran: alpaka
  nvidiapython: cudapython,pycuda,cupy,numba,cunumeric
  amdcudac: HIP
  amdcudafortran: gpufort
  amdhipc: HIP
  amdsyclc: opensyclproceedings,intelllvm
  amdopenaccc: gccopenacc,claccieee
  amdopenaccfortran: gpufort,gccopenacc,flaccieee
  amdopenmpc: aomp,ecpopenmpbof,hpepe
  amdopenmpfortran: aomp,hpepe
  amdstandardc: rocstdpar,opensyclproceedings,onedpl
  amdkokkosc: kokkos
  amdalpakac: alpaka
  amdpython: cudapython
  intelcudac: syclomatic,chipstar,oneapi
  intelhipc: chipstar
  intelsyclc: intelllvm,oneapi,opensyclproceedings
  intelopenaccc: acc2mp
  intelopenaccfortran: acc2mp
  intelopenmpc: oneapi
  intelopenmpfortran: oneapi
  intelstandardc: onedpl
  intelstandardfortran: oneapi
  intelkokkosc: kokkos
  intelpython: dpctl,numba-dpex,dpnp