Skip to content
This repository has been archived by the owner on Aug 10, 2024. It is now read-only.

Commit

Permalink
Enable limited api build on cibw
Browse files Browse the repository at this point in the history
  • Loading branch information
sasha0552 authored Jul 16, 2024
1 parent 509f98e commit 21a7641
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 5 deletions.
7 changes: 2 additions & 5 deletions .github/workflows/build-vllm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ jobs:
patches=(
"https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/vllm/0000-enable-support-for-pascal-gpus.patch"
"https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/vllm/1000-set-torch-cuda-arch-list.patch"
"https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/vllm/2000-enable-limited-api-build-on-cibw.patch"
)
# Apply patches
Expand All @@ -44,7 +45,7 @@ jobs:
- name: Build wheels
uses: pypa/[email protected]
env:
CIBW_BUILD: cp${{ matrix.python_version }}-manylinux_x86_64
CIBW_BUILD: cp38-manylinux_x86_64
CIBW_ENVIRONMENT: CMAKE_BUILD_TYPE=Release VLLM_INSTALL_PUNICA_KERNELS=0
CIBW_MANYLINUX_PYPY_X86_64_IMAGE: ghcr.io/sasha0552/manylinux2014_x86_64-cuda
CIBW_MANYLINUX_X86_64_IMAGE: ghcr.io/sasha0552/manylinux2014_x86_64-cuda
Expand All @@ -59,10 +60,6 @@ jobs:
prerelease: true
tag_name: ${{ github.event.inputs.tag_name }}

strategy:
matrix:
python_version: [38, 39, 310, 311]

on:
workflow_dispatch:
inputs:
Expand Down
56 changes: 56 additions & 0 deletions patches/vllm/2000-enable-limited-api-build-on-cibw.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
--- a/setup.py
+++ b/setup.py
@@ -14,6 +14,7 @@ from packaging.version import Version, parse
from setuptools import Extension, find_packages, setup
from setuptools.command.build_ext import build_ext
from torch.utils.cpp_extension import CUDA_HOME
+from wheel.bdist_wheel import bdist_wheel


def load_module_from_path(module_name, path):
@@ -234,6 +235,18 @@ class cmake_build_ext(build_ext):
subprocess.check_call(["cmake", *build_args], cwd=self.build_temp)


+class bdist_wheel_abi3(bdist_wheel):
+
+ def get_tag(self):
+ python, abi, plat = super().get_tag()
+
+ if python.startswith("cp"):
+ # on CPython, our wheels are abi3 and compatible back to 3.8
+ return "cp38", "abi3", plat
+
+ return python, abi, plat
+
+
def _is_cuda() -> bool:
has_cuda = torch.version.cuda is not None
return (VLLM_TARGET_DEVICE == "cuda" and has_cuda
@@ -440,6 +453,8 @@ def get_requirements() -> List[str]:

ext_modules = []

+cmdclass = {"bdist_wheel": bdist_wheel_abi3}
+
if _is_cuda() or _is_hip():
ext_modules.append(CMakeExtension(name="vllm._moe_C"))

@@ -449,6 +464,8 @@ if _build_custom_ops():
if _install_punica():
ext_modules.append(CMakeExtension(name="vllm._punica_C"))

+ cmdclass["build_ext"] = cmake_build_ext
+
package_data = {
"vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json"]
}
@@ -486,7 +503,7 @@ setup(
extras_require={
"tensorizer": ["tensorizer>=2.9.0"],
},
- cmdclass={"build_ext": cmake_build_ext} if _build_custom_ops() else {},
+ cmdclass=cmdclass,
package_data=package_data,
entry_points={
"console_scripts": [

0 comments on commit 21a7641

Please sign in to comment.