Skip to content

Refactor turbomind attention by precomputing rotary embed #1114

Refactor turbomind attention by precomputing rotary embed

Refactor turbomind attention by precomputing rotary embed #1114

Workflow file for this run

name: windows-x64-gpu
on:
push:
paths:
- '.github/workflows/windows-x64-gpu.yml'
- 'src/**'
- 'CMakeLists.txt'
- 'cmake/**'
- 'examples/**'
- '3rdparty/**'
- 'tests/csrc/**'
pull_request:
paths:
- '.github/workflows/windows-x64-gpu.yml'
- 'src/**'
- 'CMakeLists.txt'
- 'cmake/**'
- 'examples/**'
- '3rdparty/**'
- 'tests/csrc/**'
concurrency:
group: windows-x64-gpu-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
build:
strategy:
matrix:
cudaver: [11.8.0, 12.1.0]
name: cuda-${{ matrix.cudaver }}
runs-on: windows-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up python
uses: actions/setup-python@v4
with:
python-version: '3.8'
- name: Install python packages
run: |
pip install -r requirements/build.txt
pip install wheel
- name: Setup CUDA Toolkit
id: cuda-toolkit
shell: pwsh
run: ./builder/windows/setup_cuda.ps1
env:
INPUT_CUDA_VERSION: ${{ matrix.cudaver }}
- name: Build wheel
run: |
$env:BUILD_TEST="ON"
mkdir build
cd build
..\builder\windows\generate.ps1
cmake --build . --config Release -- /m /v:q
if (-Not $?) {
echo "build failed"
exit 1
}
cmake --install . --config Release
cd ..
rm build -Force -Recurse
python setup.py bdist_wheel -d build/wheel