ci: setup pre-commit (#584)

As requested in #506 (comment), this PR setup pre-commit for flashinfer. Co-authored-by: Yixin Dong <[email protected]>
flashinfer-ai · Nov 5, 2024 · 979bb6c · 979bb6c
1 parent e5cafde
commit 979bb6c
Show file tree

Hide file tree

Showing 88 changed files with 1,288 additions and 1,059 deletions.
diff --git a/.github/workflows/build-doc.yml b/.github/workflows/build-doc.yml
@@ -41,7 +41,7 @@ jobs:
       run: |
         cd docs
         make html
-    
+
     - name: Upload artifact
       uses: actions/upload-pages-artifact@v3
       with:
@@ -51,4 +51,3 @@ jobs:
     - name: Deploy to GitHub Pages
       id: deployment
       uses: actions/deploy-pages@v4
-
diff --git a/.gitmodules b/.gitmodules
@@ -16,4 +16,3 @@
 [submodule "3rdparty/spdlog"]
 	path = 3rdparty/spdlog
 	url = https://github.com/gabime/spdlog.git
-
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,61 @@
+# To use:
+#
+#     pre-commit run -a
+#
+# Or:
+#
+#     pre-commit install  # (runs every time you commit in git)
+#
+# To update this file:
+#
+#     pre-commit autoupdate
+#
+# See https://github.com/pre-commit/pre-commit
+# Note the pre-commit hooks shoule only be used for formatting, but not for linting.
+# For linting consider using CI.
+repos:
+  # Standard hooks
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: check-added-large-files
+      - id: check-case-conflict
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+      - id: requirements-txt-fixer
+      - id: trailing-whitespace
+
+  # Changes tabs to spaces
+  - repo: https://github.com/Lucas-C/pre-commit-hooks
+    rev: v1.5.5
+    hooks:
+      - id: remove-tabs
+      - id: remove-crlf
+
+  # Formatters
+  - repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 24.8.0
+    hooks:
+      - id: black
+
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        args: ["--profile=black"] # <-- this one
+
+  - repo: https://github.com/pre-commit/mirrors-clang-format
+    rev: v19.1.1
+    hooks:
+      - id: clang-format
+        types_or: [c++, c, cuda]
+        exclude: |
+          (?x)^(3rdparty/.* src/generated/.* python/flashinfer/jit/aot_config.py python/csrc_aot/generated/.*)$
+
+  - repo: https://github.com/cheshirekow/cmake-format-precommit
+    rev: v0.6.13
+    hooks:
+      - id: cmake-format
+        additional_dependencies: [pyyaml>=5.1]
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -113,7 +113,7 @@ We thank contributions and feedbacks from the community: [@comaniac](https://git
 * bugfix: Fix cudagraph mode of BatchPrefillWithRaggedKVCacheWrapper ([#412](https://github.com/flashinfer-ai/flashinfer/pull/412)) ([9907bc](https://github.com/flashinfer-ai/flashinfer/commit/9907bc163eec7677870014b6ed5bb1789cc584f0))
 * fix cu118 cub usage for sampling kernels ([#410](https://github.com/flashinfer-ai/flashinfer/pull/410)) ([58d359](https://github.com/flashinfer-ai/flashinfer/commit/58d35930740083f27e65c9818ab857f9f4880aff))
 
-### MiscBreak up _kernels into multiple modules 
+### MiscBreak up _kernels into multiple modules
 
 * enhance allocator error info and add shape check for prefill begin forward functions ([#413](https://github.com/flashinfer-ai/flashinfer/pull/413)) ([5e36c5](https://github.com/flashinfer-ai/flashinfer/commit/5e36c527bb10c9331a17d4ecd609120406280979))
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
diff --git a/README.md b/README.md
@@ -73,8 +73,8 @@ kv_len = 2048
 num_kv_heads = 32
 head_dim = 128
 
-k = torch.randn(kv_len, num_kv_heads, head_dim).half().to(0) 
-v = torch.randn(kv_len, num_kv_heads, head_dim).half().to(0) 
+k = torch.randn(kv_len, num_kv_heads, head_dim).half().to(0)
+v = torch.randn(kv_len, num_kv_heads, head_dim).half().to(0)
 
 # decode attention
 
@@ -110,7 +110,7 @@ cmake ..
 make -j12
 ```
 
-You can run `./bench_{single/batch}_{prefill/decode}` to benchmark the performance (e.g. `./bench_single_prefill` for single-request prefill attention). `./bench_{single/batch}_{prefill/decode} --help` will show you the available options. 
+You can run `./bench_{single/batch}_{prefill/decode}` to benchmark the performance (e.g. `./bench_single_prefill` for single-request prefill attention). `./bench_{single/batch}_{prefill/decode} --help` will show you the available options.
 
 ## C++ API and TVM Bindings
 

diff --git a/cmake/config.cmake b/cmake/config.cmake
@@ -22,21 +22,22 @@ set(FLASHINFER_FASTDIV_TEST ON)
 set(FLASHINFER_FASTDEQUANT_TEST ON)
 # Whether to compile distributed tests
 set(FLASHINFER_DISTRIBUTED ON)
-# The following configurations can impact the binary
-# size of the generated library
+# The following configurations can impact the binary size of the generated
+# library
 set(FLASHINFER_GEN_HEAD_DIMS 64 128 256 512)
 set(FLASHINFER_GEN_KV_LAYOUTS 0 1)
 set(FLASHINFER_GEN_POS_ENCODING_MODES 0 1 2)
 set(FLASHINFER_GEN_ALLOW_FP16_QK_REDUCTIONS "false" "true")
 set(FLASHINFER_GEN_MASK_MODES 0 1 2)
 
 # Set target cuda architectures for tests/benchmarks, defaults to native.
-# "native" is a special value for CMAKE_CUDA_ARCHITECTURES which means use the architectures of the host's GPU.
-# it's new in CMake 3.24, if you are using an older of CMake or you want to use a different value, you can
-# set its value here. Supported CUDA architctures include 80;86;89;90
-# NOTE(Zihao): using "native" might be slow because whenever compile a cuda file with `-arch=native`, nvcc will spawn
-# a `__nvcc_device_query` process to get the architecture of the host's GPU, which could stall the compilation process.
-# So it's recommended to set it to a specific value if you know the architecture of the target GPU.
-# Example:
-# set(FLASHINFER_CUDA_ARCHITECTURES 80)
+# "native" is a special value for CMAKE_CUDA_ARCHITECTURES which means use the
+# architectures of the host's GPU. it's new in CMake 3.24, if you are using an
+# older of CMake or you want to use a different value, you can set its value
+# here. Supported CUDA architctures include 80;86;89;90
+# NOTE(Zihao): using "native" might be slow because whenever compile a cuda file
+# with `-arch=native`, nvcc will spawn a `__nvcc_device_query` process to get
+# the architecture of the host's GPU, which could stall the compilation process.
+# So it's recommended to set it to a specific value if you know the architecture
+# of the target GPU. Example: set(FLASHINFER_CUDA_ARCHITECTURES 80)
 set(FLASHINFER_CUDA_ARCHITECTURES native)
diff --git a/cmake/modules/FindThrust.cmake b/cmake/modules/FindThrust.cmake
@@ -1,32 +1,31 @@
-##=============================================================================
-##
-##  Copyright (c) Kitware, Inc.
-##  All rights reserved.
-##  See LICENSE.txt for details.
-##
-##  This software is distributed WITHOUT ANY WARRANTY; without even
-##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-##  PURPOSE.  See the above copyright notice for more information.
-##
-##  Copyright 2012 Sandia Corporation.
-##  Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-##  the U.S. Government retains certain rights in this software.
-##
-##=============================================================================
+# =============================================================================
+#
+# Copyright (c) Kitware, Inc. All rights reserved. See LICENSE.txt for details.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# above copyright notice for more information.
+#
+# Copyright 2012 Sandia Corporation. Under the terms of Contract
+# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain
+# rights in this software.
+#
+# =============================================================================
 
 #
 # FindThrust
 #
-# This module finds the Thrust header files and extrats their version.  It
-# sets the following variables.
+# This module finds the Thrust header files and extrats their version.  It sets
+# the following variables.
 #
 # THRUST_INCLUDE_DIR -  Include directory for thrust header files.  (All header
-#                       files will actually be in the thrust subdirectory.)
-# THRUST_VERSION -      Version of thrust in the form "major.minor.patch".
+# files will actually be in the thrust subdirectory.) THRUST_VERSION - Version
+# of thrust in the form "major.minor.patch".
 #
 
-find_path( THRUST_INCLUDE_DIR
-  HINTS ./  
+find_path(
+  THRUST_INCLUDE_DIR
+  HINTS ./
         ../thrust
         ../../thrust
         ../../../thrust
@@ -35,56 +34,36 @@ find_path( THRUST_INCLUDE_DIR
         /usr/local/cuda/include
         ${CUDA_INCLUDE_DIRS}
   NAMES thrust/version.h
-  DOC "Thrust headers"
-  )
-if( THRUST_INCLUDE_DIR )
-  list( REMOVE_DUPLICATES THRUST_INCLUDE_DIR )
+  DOC "Thrust headers")
+if(THRUST_INCLUDE_DIR)
+  list(REMOVE_DUPLICATES THRUST_INCLUDE_DIR)
 
   # Find thrust version
-  file( STRINGS ${THRUST_INCLUDE_DIR}/thrust/version.h
-    version
-    REGEX "#define THRUST_VERSION[ \t]+([0-9x]+)"
-    )
-  string( REGEX REPLACE
-    "#define THRUST_VERSION[ \t]+"
-    ""
-    version
-    "${version}"
-    )
+  file(STRINGS ${THRUST_INCLUDE_DIR}/thrust/version.h version
+       REGEX "#define THRUST_VERSION[ \t]+([0-9x]+)")
+  string(REGEX REPLACE "#define THRUST_VERSION[ \t]+" "" version "${version}")
+
+  file(STRINGS ${THRUST_INCLUDE_DIR}/thrust/version.h major_version
+       REGEX "#define THRUST_MAJOR_VERSION[ \t]+([0-9x]+)")
+  string(REGEX REPLACE "#define THRUST_MAJOR_VERSION[ \t]+" "" major_version
+                       "${major_version}")
 
-  file( STRINGS ${THRUST_INCLUDE_DIR}/thrust/version.h
-    major_version
-    REGEX "#define THRUST_MAJOR_VERSION[ \t]+([0-9x]+)"
-    )
-  string( REGEX REPLACE
-    "#define THRUST_MAJOR_VERSION[ \t]+"
-    ""
-    major_version
-    "${major_version}"
-    )
+  file(STRINGS ${THRUST_INCLUDE_DIR}/thrust/version.h major_version
+       REGEX "#define THRUST_MINOR_VERSION[ \t]+([0-9x]+)")
+  string(REGEX REPLACE "#define THRUST_MINOR_VERSION[ \t]+" "" minor_version
+                       "${minor_version}")
 
-  file( STRINGS ${THRUST_INCLUDE_DIR}/thrust/version.h
-    major_version
-    REGEX "#define THRUST_MINOR_VERSION[ \t]+([0-9x]+)"
-    )
-  string( REGEX REPLACE
-    "#define THRUST_MINOR_VERSION[ \t]+"
-    ""
-    minor_version
-    "${minor_version}"
-    )
-
-  set( THRUST_VERSION "${version}")
-  set( THRUST_MAJOR_VERSION "${major_version}")
-  set( THRUST_MINOR_VERSION "${minor_version}")
-endif( THRUST_INCLUDE_DIR )
+  set(THRUST_VERSION "${version}")
+  set(THRUST_MAJOR_VERSION "${major_version}")
+  set(THRUST_MINOR_VERSION "${minor_version}")
+endif(THRUST_INCLUDE_DIR)
 
 # Check for required components
-include( FindPackageHandleStandardArgs )
-find_package_handle_standard_args( Thrust
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(
+  Thrust
   REQUIRED_VARS THRUST_INCLUDE_DIR
-  VERSION_VAR THRUST_VERSION
-  )
+  VERSION_VAR THRUST_VERSION)
 
 set(THRUST_INCLUDE_DIRS ${THRUST_INCLUDE_DIR})
 mark_as_advanced(THRUST_INCLUDE_DIR)
diff --git a/cmake/utils/Utils.cmake b/cmake/utils/Utils.cmake
@@ -1,6 +1,8 @@
 macro(__flashinfer_option variable description value)
   if(NOT DEFINED ${variable})
-    set(${variable} ${value} CACHE STRING ${description})
+    set(${variable}
+        ${value}
+        CACHE STRING ${description})
   endif()
 endmacro()
 
@@ -10,11 +12,11 @@ endmacro()
 
 set(FLASHINFER_ALL_OPTIONS)
 
-#######################################################
-# An option that the user can select. Can accept condition to control when option is available for user.
-# Usage:
-#   tvm_option(<option_variable> "doc string" <initial value or boolean expression> [IF <condition>])
-# The macro snippet is copied from Apache TVM codebase.
+# ##############################################################################
+# An option that the user can select. Can accept condition to control when
+# option is available for user. Usage: tvm_option(<option_variable> "doc string"
+# <initial value or boolean expression> [IF <condition>]) The macro snippet is
+# copied from Apache TVM codebase.
 macro(flashinfer_option variable description value)
   set(__value ${value})
   set(__condition "")

diff --git a/docs/api/python/cascade.rst b/docs/api/python/cascade.rst
@@ -40,4 +40,3 @@ Cascade Attention Wrapper Classes
     :members:
 
     .. automethod:: __init__
-
diff --git a/docs/api/python/norm.rst b/docs/api/python/norm.rst
@@ -9,5 +9,5 @@ Kernels for normalization layers.
 
 .. autosummary::
     :toctree: _generate
-    
+
     rmsnorm
diff --git a/docs/api/python/quantization.rst b/docs/api/python/quantization.rst
@@ -9,6 +9,6 @@ Quantization related kernels.
 
 .. autosummary::
     :toctree: _generate
-    
+
     packbits
     segment_packbits
diff --git a/docs/api/python/rope.rst b/docs/api/python/rope.rst
@@ -9,7 +9,7 @@ Kernels for applying rotary embeddings.
 
 .. autosummary::
     :toctree: _generate
-    
+
     apply_rope_inplace
     apply_llama31_rope_inplace
     apply_rope

diff --git a/docs/installation.rst b/docs/installation.rst
@@ -135,7 +135,7 @@ You can follow the steps below to install FlashInfer from source code:
 3. Install Ninja build system:
 
    .. code-block:: bash
-    
+
        pip install ninja
 
 4. Install FlashInfer:
@@ -208,12 +208,12 @@ To compile the C++ benchmarks (using `nvbench <https://github.com/NVIDIA/nvbench
 3. Create build directory and copy configuration files
 
    .. code-block:: bash
-       
+
        mkdir -p build
        cp cmake/config.cmake build/  # you can modify the configuration file if needed
 
 4. Compile the benchmarks and unittests:
-   
+
    .. code-block:: bash
 
        cd build