chore: bump llama.cpp #693
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: ci | |
permissions: | |
contents: read | |
pull-requests: read | |
actions: read | |
env: | |
LLAMA_BOX_BUILD_VERSION: "${{ github.ref_name }}" | |
on: | |
workflow_dispatch: { } | |
push: | |
tags: | |
- "v*.*.*" | |
branches: | |
- "main" | |
- "branch-v*.*" | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- "**.mdx" | |
- "**.png" | |
- "**.jpg" | |
- ".github/workflows/prune.yml" | |
- ".github/workflows/sync.yml" | |
pull_request: | |
branches: | |
- "main" | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- "**.mdx" | |
- "**.png" | |
- "**.jpg" | |
- ".github/workflows/prune.yml" | |
- ".github/workflows/sync.yml" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
cancel-in-progress: true | |
# Disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
jobs: | |
darwin: | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- arch: 'amd64' | |
instruction: 'avx2' | |
runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Deps | |
run: | | |
brew update | |
- name: Setup XCode | |
uses: maxim-lobanov/setup-xcode@v1 | |
with: | |
xcode-version: '15.2' | |
- name: Build | |
run: | | |
echo "===== BUILD =====" | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_ACCELERATE=on -DGGML_METAL=off \ | |
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \ | |
-DGGML_NATIVE=on \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(sysctl -n hw.physicalcpu) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
otool --version | |
otool -L ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-${{ matrix.instruction }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-darwin-${{ matrix.arch }}-${{ matrix.instruction }} | |
darwin-metal: | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: | |
- 'amd64' | |
- 'arm64' | |
version: | |
- '3.0' | |
# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images, | |
# https://support.apple.com/en-us/102894. | |
runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Deps | |
run: | | |
brew update | |
- name: Setup XCode | |
uses: maxim-lobanov/setup-xcode@v1 | |
with: | |
xcode-version: '15.2' | |
- name: Build | |
run: | | |
echo "===== BUILD =====" | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_ACCELERATE=on -DGGML_METAL=on -DGGML_METAL_USE_BF16=on -DGGML_METAL_EMBED_LIBRARY=on \ | |
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \ | |
-DGGML_NATIVE=on \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(sysctl -n hw.physicalcpu) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
otool --version | |
otool -L ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-metal.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-darwin-${{ matrix.arch }}-metal-${{ matrix.version }} | |
linux: | |
strategy: | |
fail-fast: false | |
matrix: | |
# AVX2 ==> CentOS 7. | |
# AVX512 ==> RockyLinux 8.9. | |
# NEON ==> Ubuntu 18.04. | |
include: | |
- arch: 'amd64' | |
instruction: 'avx2' | |
distro_container_image: 'gpustack/devel-cpu:centos7' | |
- arch: 'amd64' | |
instruction: 'avx512' | |
distro_container_image: 'gpustack/devel-cpu:rockylinux8.9' | |
- arch: 'arm64' | |
instruction: 'neon' | |
distro_container_image: 'gpustack/devel-cpu:ubuntu18.04' | |
runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' || 'ubuntu-22.04-arm' }} | |
steps: | |
- name: Maximize Docker Build Space | |
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main | |
with: | |
deep-clean: false | |
root-reserve-mb: 20480 | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-linux-${{ matrix.arch }}-${{ matrix.instruction }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
# NB(thxCode): workaround to avoid using OpenMP. | |
rm -rf /opt/rh/devtoolset-9/root/usr/lib/gcc/\$(uname -m)-redhat-linux/9/libgomp* || true | |
rm -rf /opt/rh/gcc-toolset-11/root/usr/lib/gcc/\$(uname -m)-redhat-linux/11/libgomp* || true | |
rm -rf /usr/lib/gcc/\$(uname -m)-linux-gnu/11/libgomp* || true | |
rm -rf /opt/openEuler/gcc-toolset-10/root/usr/lib/gcc/\$(uname -m)-linux-gnu/10/libgomp* || true | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
echo "===== BUILD =====" | |
env || true | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \ | |
-DGGML_NATIVE=off \ | |
${{ matrix.instruction == 'avx2' && '-DGGML_AVX=on -DGGML_AVX_VNNI=off -DGGML_AVX2=on' || '' }} \ | |
${{ matrix.instruction == 'avx512' && '-DGGML_AVX512=on -DGGML_AVX512_BF16=off -DGGML_AVX512_VBMI=on -DGGML_AVX512_VNNI=on' || '' }} \ | |
${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' || '' }} \ | |
-DGGML_BLAS_VENDOR=OpenBLAS \ | |
-DGGML_STATIC=on \ | |
-DGGML_BLAS=on \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt || true | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd --version | |
ldd ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env DEBIAN_FRONTEND=noninteractive \ | |
--env CCACHE_DIR \ | |
--env LLAMA_BOX_BUILD_VERSION \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
${{ matrix.distro_container_image }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-${{ matrix.instruction }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-${{ matrix.instruction }} | |
linux-hip: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/rocm/dev-centos-7/tags. | |
# 6.2 ==> 6.2.4, CentOS 7. | |
# build fat binary, | |
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, | |
# https://llvm.org/docs/AMDGPUUsage.html. | |
# official gpu support list, | |
# see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html. | |
include: | |
- arch: 'amd64' | |
version: '6.2' | |
distro_container_image: 'gpustack/devel-rocm-hip:6.2.4-centos7' | |
hip_arch: 'gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1030;gfx1100;gfx1101;gfx1102' | |
runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' || 'ubuntu-22.04-arm' }} | |
steps: | |
- name: Maximize Docker Build Space | |
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main | |
with: | |
deep-clean: false | |
root-reserve-mb: 20480 | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-linux-hip-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
AMDGPU_TARGETS: "${{ matrix.hip_arch }}" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
echo "===== BUILD =====" | |
env || true | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_HIP=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \ | |
-DCMAKE_C_COMPILER="\$(hipconfig -l)/clang" \ | |
-DCMAKE_CXX_COMPILER="\$(hipconfig -l)/clang++" \ | |
-DCMAKE_HIP_COMPILER="\$(hipconfig -l)/clang" \ | |
-DGGML_NATIVE=off \ | |
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \ | |
${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' || '' }} \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt || true | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd --version | |
ldd ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env DEBIAN_FRONTEND=noninteractive \ | |
--env CCACHE_DIR \ | |
--env AMDGPU_TARGETS \ | |
--env LLAMA_BOX_BUILD_VERSION \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
${{ matrix.distro_container_image }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }} | |
linux-cuda: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/nvidia/cuda/tags?page=&page_size=&ordering=&name=devel. | |
# 12.4 ==> 12.4.0, CentOS 7, RockyLinux 8. | |
# 11.8 ==> 11.8.0, CentOS 7, RockyLinux 8. | |
# build fat binary, | |
# see https://developer.nvidia.com/cuda-gpus. | |
include: | |
- arch: 'amd64' | |
version: '12.4' | |
distro_container_image: 'gpustack/devel-nvidia-cuda:12.4.0-centos7' | |
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real' | |
- arch: 'amd64' | |
version: '11.8' | |
distro_container_image: 'gpustack/devel-nvidia-cuda:11.8.0-centos7' | |
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real' | |
- arch: 'arm64' | |
version: '12.4' | |
distro_container_image: 'gpustack/devel-nvidia-cuda:12.4.0-rockylinux8' | |
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real' | |
- arch: 'arm64' | |
version: '11.8' | |
distro_container_image: 'gpustack/devel-nvidia-cuda:11.8.0-rockylinux8' | |
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real' | |
runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' || 'ubuntu-22.04-arm' }} | |
steps: | |
- name: Maximize Docker Build Space | |
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main | |
with: | |
deep-clean: false | |
root-reserve-mb: 20480 | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-linux-cuda-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
CUDA_ARCHITECTURES: "${{ matrix.cuda_arch }}" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
echo "===== BUILD =====" | |
env || true | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_CUDA=on -DGGML_CUDA_F16=on -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" \ | |
-DGGML_NATIVE=off \ | |
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \ | |
${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' || '' }} \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt || true | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd --version | |
ldd ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env DEBIAN_FRONTEND=noninteractive \ | |
--env CCACHE_DIR \ | |
--env CUDA_ARCHITECTURES \ | |
--env LLAMA_BOX_BUILD_VERSION \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
${{ matrix.distro_container_image }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }} | |
linux-oneapi: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/intel/oneapi-basekit/tags?page=&page_size=&ordering=&name=devel. | |
# 2025.0 ==> 2025.0.0-0, Ubuntu 22.04. | |
include: | |
- arch: 'amd64' | |
version: '2025.0' | |
distro_container_image: 'gpustack/devel-intel-oneapi:2025.0.0-0-ubuntu22.04' | |
runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' || 'ubuntu-22.04-arm' }} | |
steps: | |
- name: Maximize Docker Build Space | |
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main | |
with: | |
deep-clean: false | |
root-reserve-mb: 20480 | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-linux-oneapi-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
if [ -f /etc/os-release ]; then | |
source /etc/os-release | |
cat /etc/os-release | |
if [ "\${ID}" = "ubuntu" ]; then | |
apt-get update -y \ | |
&& apt-get install -y build-essential libssl-dev ccache curl git bc | |
if (( \$(echo "\${VERSION_ID} < 21.04" | bc -l) )); then | |
apt-get update -y \ | |
&& apt-get install -y software-properties-common | |
add-apt-repository -y ppa:ubuntu-toolchain-r/test | |
apt-get update -y \ | |
&& apt-get install -y gcc-11 g++-11 | |
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 | |
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10 | |
fi | |
else | |
echo "Unsupport distribution: \${ID}" | |
exit 1 | |
fi | |
else | |
echo "Unknown distribution" | |
exit 1 | |
fi | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
echo "===== BUILD =====" | |
env || true | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_SYCL=on -DGGML_SYCL_F16=on \ | |
-DGGML_NATIVE=off \ | |
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \ | |
${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' || '' }} \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt || true | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd --version | |
ldd ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CC=icx \ | |
--env CXX=icpx \ | |
--env DEBIAN_FRONTEND=noninteractive \ | |
--env CCACHE_DIR \ | |
--env LLAMA_BOX_BUILD_VERSION \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
${{ matrix.distro_container_image }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }} | |
linux-cann: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/ascendai/cann/tags?page=&page_size=&ordering=&name=8.0.rc2.alpha003-910b. | |
# 8.0 ==> 8.0.rc2.alpha003, Ubuntu 20.04, OpenEuler 20.03 | |
arch: | |
- 'amd64' | |
- 'arm64' | |
version: | |
- '8.0' | |
distro_container_image: | |
- 'gpustack/devel-ascendai-cann:8.0.rc2.alpha003-910b-ubuntu20.04' | |
- 'gpustack/devel-ascendai-cann:8.0.rc2.alpha003-910b-openeuler20.03' | |
- 'gpustack/devel-ascendai-cann:8.0.rc2.alpha003-310p-ubuntu20.04' | |
- 'gpustack/devel-ascendai-cann:8.0.rc2.alpha003-310p-openeuler20.03' | |
runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' || 'ubuntu-22.04-arm' }} | |
steps: | |
- name: Maximize Docker Build Space | |
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main | |
with: | |
deep-clean: false | |
root-reserve-mb: 20480 | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-linux-cann-${{ matrix.arch }}-${{ matrix.version }}-${{ matrix.distro_container_image }}${{ contains(matrix.distro_container_image, '310p') && '-310p' || '' }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
source /usr/local/Ascend/ascend-toolkit/set_env.sh | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
echo "===== BUILD =====" | |
env || true | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_CANN=on \ | |
-DSOC_TYPE=${{ contains(matrix.distro_container_image, '910b') && 'Ascend910B3' || 'Ascend310P3' }} \ | |
-DGGML_NATIVE=off \ | |
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \ | |
${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' || '' }} \ | |
${{ contains(matrix.distro_container_image, '310p') && '-DGGML_AVX2=off' || '' }} \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt || true | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd --version | |
ldd ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env DEBIAN_FRONTEND=noninteractive \ | |
--env CCACHE_DIR \ | |
--env LLAMA_BOX_BUILD_VERSION \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
${{ matrix.distro_container_image }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cann-${{ matrix.version }}${{ contains(matrix.distro_container_image, 'openeuler20.03') && '-openeuler20.03' || '' }}${{ contains(matrix.distro_container_image, '310p') && '-310p' || '' }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-cann-${{ matrix.version }}${{ contains(matrix.distro_container_image, 'openeuler20.03') && '-openeuler20.03' || '' }}${{ contains(matrix.distro_container_image, '310p') && '-310p' || '' }} | |
linux-musa: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://hub.docker.com/r/mthreads/musa/tags?page_size=&ordering=&name=ubuntu22.04. | |
# rc3.1.0 ==> rc3.1.0, Ubuntu 22.04. | |
include: | |
- arch: 'amd64' | |
version: 'rc3.1' | |
distro_container_image: 'gpustack/devel-mthreads-musa:rc3.1.0-ubuntu22.04' | |
runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' || 'ubuntu-22.04-arm' }} | |
steps: | |
- name: Maximize Docker Build Space | |
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main | |
with: | |
deep-clean: false | |
root-reserve-mb: 20480 | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-linux-musa-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
echo "===== BUILD =====" | |
env || true | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_MUSA=on \ | |
-DGGML_NATIVE=off \ | |
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \ | |
${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' || '' }} \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt || true | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd --version | |
ldd ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env DEBIAN_FRONTEND=noninteractive \ | |
--env CCACHE_DIR \ | |
--env LLAMA_BOX_BUILD_VERSION \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
${{ matrix.distro_container_image }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-musa-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-musa-${{ matrix.version }} | |
linux-dtk: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://sourcefind.cn/#/image/dcu/dtk. | |
# 24.04 ==> 24.04.3, Ubuntu 20.04. | |
# build fat binary, | |
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, | |
# https://llvm.org/docs/AMDGPUUsage.html. | |
# official gpu support list, | |
# see https://download.sourcefind.cn:65024/6/main. | |
include: | |
- arch: 'amd64' | |
version: '24.04' | |
distro_container_image: 'gpustack/devel-hygon-dtk:24.04.3-ubuntu20.04' | |
hip_arch: 'gfx906;gfx926;gfx928' | |
runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' || 'ubuntu-22.04-arm' }} | |
steps: | |
- name: Maximize Docker Build Space | |
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main | |
with: | |
deep-clean: false | |
root-reserve-mb: 20480 | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-linux-dtk-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
AMDGPU_TARGETS: "${{ matrix.hip_arch }}" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
echo "===== BUILD =====" | |
source /opt/dtk/env.sh | |
export LIBRARY_PATH="/opt/dtk/llvm/lib/clang/15.0.0/lib/linux:\${LD_LIBRARY_PATH}" | |
env || true | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DGGML_HIP=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \ | |
-DCMAKE_C_COMPILER="hipcc" \ | |
-DCMAKE_C_FLAGS="--gpu-max-threads-per-block=1024" \ | |
-DCMAKE_CXX_COMPILER="hipcc" \ | |
-DCMAKE_CXX_FLAGS="--gpu-max-threads-per-block=1024" \ | |
-DCMAKE_HIP_COMPILER="clang" \ | |
-DCMAKE_HIP_FLAGS="--gpu-max-threads-per-block=1024" \ | |
-DGGML_NATIVE=off \ | |
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \ | |
${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' || '' }} \ | |
-DGGML_OPENMP=off \ | |
-DGGML_RPC=on | |
cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt || true | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
ls -alh ${{ github.workspace }}/build/bin/ | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd --version | |
ldd ${{ github.workspace }}/build/bin/llama-box || true | |
else | |
exit 1 | |
fi | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env DEBIAN_FRONTEND=noninteractive \ | |
--env CCACHE_DIR \ | |
--env AMDGPU_TARGETS \ | |
--env LLAMA_BOX_BUILD_VERSION \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
${{ matrix.distro_container_image }} | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-dtk-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-dtk-${{ matrix.version }} | |
windows: | |
strategy: | |
fail-fast: false | |
matrix: | |
# AVX2 ==> Windows Server 2022. | |
# AVX512 ==> Windows Server 2022. | |
# NEON ==> Windows Server 2022. | |
include: | |
- arch: 'amd64' | |
instruction: 'avx2' | |
- arch: 'amd64' | |
instruction: 'avx512' | |
- arch: 'arm64' | |
instruction: 'neon' | |
runs-on: windows-2022 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-windows-${{ matrix.arch }}-${{ matrix.instruction }} | |
path: | | |
${{ github.workspace }}\.cache | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install ccache ninja curl openssl -y | |
- name: Setup | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
if (Test-Path -Path "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat") { | |
cmd /c 'call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' || 'amd64_arm64' }} && set' | ForEach-Object { ` | |
if ($_ -Match '^(.*?)=(.*)$') { $_ | Out-File -FilePath $env:GITHUB_ENV -Append } ` | |
} | |
} else { | |
cmd /c 'call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' || 'amd64_arm64' }} && set' | ForEach-Object { ` | |
if ($_ -Match '^(.*?)=(.*)$') { $_ | Out-File -FilePath $env:GITHUB_ENV -Append } ` | |
} | |
} | |
"OPENSSL_ROOT_DIR=C:\Program Files\OpenSSL" | Out-File -FilePath $env:GITHUB_ENV -Append | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache" | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "===== BUILD =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null | |
Get-ChildItem Env: -ErrorAction Ignore | Format-Table -Property Name, Value -ErrorAction Ignore | |
cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} ` | |
-DGGML_NATIVE=off ` | |
${{ matrix.arch == 'arm64' && format('-DCMAKE_TOOLCHAIN_FILE={0}\llama-box\scripts\build-windows-arm64.cmake', github.workspace) || '' }} ` | |
${{ matrix.instruction == 'avx2' && '-DGGML_AVX=on -DGGML_AVX_VNNI=off -DGGML_AVX2=on' || '' }} ` | |
${{ matrix.instruction == 'avx512' && '-DGGML_AVX512=on -DGGML_AVX512_BF16=off -DGGML_AVX512_VBMI=on -DGGML_AVX512_VNNI=on' || '' }} ` | |
-DGGML_STATIC=on ` | |
-DGGML_OPENMP=off ` | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1)) | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-${{ matrix.instruction }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-${{ matrix.instruction }} | |
windows-hip: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html. | |
# 6.2 ==> 6.2.4, Windows Server 2022. | |
# build fat binary, | |
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, | |
# https://llvm.org/docs/AMDGPUUsage.html. | |
# official gpu support list, | |
# see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html. | |
include: | |
- arch: 'amd64' | |
version: '6.2' | |
distro_binary_installer: 'https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe' | |
hip_arch: 'gfx1030;gfx1100;gfx1101;gfx1102' | |
runs-on: windows-2022 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-windows-hip-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}\.cache | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install ccache ninja curl openssl -y | |
- name: Setup HIP | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download AMD ROCm HIP SDK" | |
curl.exe --retry 5 --retry-delay 5 ` | |
--output "${{ runner.temp }}\installer.exe" ` | |
--url "${{ matrix.distro_binary_installer }}" | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install AMD ROCm HIP SDK" | |
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait ` | |
-ArgumentList '-install' | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify AMD ROCm HIP SDK" | |
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version | |
$hipPath = "$(Resolve-Path -Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Split-Path | Split-Path)" | |
"HIP_PATH=${hipPath}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
if (Test-Path -Path "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat") { | |
cmd /c 'call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' || 'amd64_arm64' }} && set' | ForEach-Object { ` | |
if ($_ -Match '^(.*?)=(.*)$') { $_ | Out-File -FilePath $env:GITHUB_ENV -Append } ` | |
} | |
} else { | |
cmd /c 'call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' || 'amd64_arm64' }} && set' | ForEach-Object { ` | |
if ($_ -Match '^(.*?)=(.*)$') { $_ | Out-File -FilePath $env:GITHUB_ENV -Append } ` | |
} | |
} | |
"OPENSSL_ROOT_DIR=C:\Program Files\OpenSSL" | Out-File -FilePath $env:GITHUB_ENV -Append | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache" | |
AMDGPU_TARGETS: "${{ matrix.hip_arch }}" | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "HIP_PATH=${env:HIP_PATH}" | |
Write-Host "===== BUILD =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null | |
$env:CMAKE_PREFIX_PATH = "${env:HIP_PATH}" | |
Get-ChildItem Env: -ErrorAction Ignore | Format-Table -Property Name, Value -ErrorAction Ignore | |
cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DGGML_HIP=on -DAMDGPU_TARGETS="${env:AMDGPU_TARGETS}" ` | |
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` | |
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` | |
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} ` | |
-DGGML_NATIVE=off ` | |
-DGGML_CUDA_F16=on ` | |
-DGGML_OPENMP=off ` | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1)) | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }} | |
windows-cuda: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=Server2022&target_type=exe_network. | |
# 12.4 ==> 12.4.1, Windows Server 2022. | |
# 11.8 ==> 11.8.0, Windows Server 2019. | |
# build fat binary, | |
# see https://developer.nvidia.com/cuda-gpus. | |
include: | |
- arch: 'amd64' | |
version: '12.4' | |
distro_binary_installer: 'https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe' | |
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real' | |
- arch: 'amd64' | |
version: '11.8' | |
distro_binary_installer: 'https://developer.download.nvidia.com/compute/cuda/11.8.0/network_installers/cuda_11.8.0_windows_network.exe' | |
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real' | |
runs-on: ${{ matrix.version == '11.8' && 'windows-2019' || 'windows-2022' }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-windows-cuda-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}\.cache | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install ccache ninja curl openssl -y | |
- name: Setup CUDA | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download NVIDIA CUDA SDK" | |
curl.exe --retry 5 --retry-delay 5 ` | |
--output "${{ runner.temp }}\installer.exe" ` | |
--url "${{ matrix.distro_binary_installer }}" | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install NVIDIA CUDA SDK" | |
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait ` | |
-ArgumentList '-s','nvcc_${{ matrix.version }}','cudart_${{ matrix.version }}','cublas_${{ matrix.version }}','cublas_dev_${{ matrix.version }}','thrust_${{ matrix.version }}','visual_studio_integration_${{ matrix.version }}' | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify NVIDIA CUDA SDK" | |
& 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\nvcc.exe' --version | |
$cudaPath = "$(Resolve-Path -Path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\nvcc.exe' | Split-Path | Split-Path)" | |
$cudaVersion=($cudaPath | Split-Path -Leaf ) -replace 'v(\d+).(\d+)', '$1_$2' | |
"CUDA_PATH=${cudaPath}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
"CUDA_PATH_V${cudaVersion}=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Append | |
"CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVersion}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
if (Test-Path -Path "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat") { | |
cmd /c 'call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' || 'amd64_arm64' }} && set' | ForEach-Object { ` | |
if ($_ -Match '^(.*?)=(.*)$') { $_ | Out-File -FilePath $env:GITHUB_ENV -Append } ` | |
} | |
} else { | |
cmd /c 'call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' || 'amd64_arm64' }} && set' | ForEach-Object { ` | |
if ($_ -Match '^(.*?)=(.*)$') { $_ | Out-File -FilePath $env:GITHUB_ENV -Append } ` | |
} | |
} | |
"OPENSSL_ROOT_DIR=C:\Program Files\OpenSSL" | Out-File -FilePath $env:GITHUB_ENV -Append | |
- name: Build | |
env: | |
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache" | |
CUDA_ARCHITECTURES: "${{ matrix.cuda_arch }}" | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "CUDA_PATH=${env:CUDA_PATH}" | |
Write-Host "===== BUILD =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null | |
Get-ChildItem Env: -ErrorAction Ignore | Format-Table -Property Name, Value -ErrorAction Ignore | |
cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${env:CUDA_ARCHITECTURES}" ` | |
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} ` | |
-DGGML_NATIVE=off ` | |
-DGGML_CUDA_F16=on ` | |
-DGGML_OPENMP=off ` | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1)) | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }} | |
windows-oneapi: | |
strategy: | |
fail-fast: false | |
matrix: | |
# see https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?operatingsystem=windows&windows-install-type=online. | |
# 2025.0 ==> 2025.0.0, Windows Server 2022. | |
include: | |
- arch: 'amd64' | |
version: '2025.0' | |
distro_binary_installer: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882.exe' | |
runs-on: windows-2022 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
# doesn't support ccache, | |
# as the oneAPI need to configure the environment variables via setvars.bat. | |
timeout-minutes: 5 | |
uses: actions/cache@v4 | |
with: | |
key: cache-windows-oneapi-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}\build | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install ninja curl openssl -y | |
- name: Setup oneAPI | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download Intel oneAPI SDK" | |
curl.exe --retry 5 --retry-delay 5 ` | |
--output "${{ runner.temp }}\installer.exe" ` | |
--url "${{ matrix.distro_binary_installer }}" | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install Intel oneAPI SDK" | |
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait ` | |
-ArgumentList '-s','--action=install','--components=intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel','--eula=accept','-p=NEED_VS2017_INTEGRATION=0','-p=NEED_VS2019_INTEGRATION=0','-p=NEED_VS2022_INTEGRATION=0' | |
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify Intel oneAPI SDK" | |
& 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' --version | |
$oneapiPath = "$(Resolve-Path -Path 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' | Split-Path | Split-Path)" | |
"ONEAPI_PATH=${oneapiPath}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
$oneapiRoot = "$(Split-Path -Path $oneapiPath)" | |
"ONEAPI_ROOT=${oneapiRoot}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
cmd /c "call `"${oneapiRoot}\setvars.bat`" && set" | ForEach-Object { ` | |
if ($_ -Match '^(.*?)=(.*)$') { $_ | Out-File -FilePath $env:GITHUB_ENV -Append } ` | |
} | |
"OPENSSL_ROOT_DIR=C:\Program Files\OpenSSL" | Out-File -FilePath $env:GITHUB_ENV -Append | |
- name: Build | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "ONEAPI_PATH=${env:ONEAPI_PATH}" | |
Write-Host "ONEAPI_ROOT=${env:ONEAPI_ROOT}" | |
Write-Host "===== BUILD =====" | |
Get-ChildItem Env: -ErrorAction Ignore | Format-Table -Property Name, Value -ErrorAction Ignore | |
cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DGGML_SYCL=on ` | |
-DCMAKE_C_COMPILER=cl ` | |
-DCMAKE_CXX_COMPILER=icx ` | |
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} ` | |
-DGGML_NATIVE=off ` | |
-DGGML_SYCL_F16=on ` | |
-DGGML_OPENMP=off ` | |
-DGGML_RPC=on | |
cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1)) | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }} | |
release: | |
if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
permissions: | |
contents: write | |
actions: read | |
id-token: write | |
runs-on: ubuntu-22.04 | |
needs: | |
- darwin | |
- darwin-metal | |
- linux | |
- linux-hip | |
- linux-cuda | |
- linux-oneapi | |
- linux-cann | |
- linux-musa | |
- linux-dtk | |
- windows | |
- windows-hip | |
- windows-cuda | |
- windows-oneapi | |
steps: | |
- name: Download Artifact | |
uses: actions/download-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out | |
merge-multiple: true | |
- name: Release | |
uses: softprops/action-gh-release@v1 | |
with: | |
fail_on_unmatched_files: true | |
tag_name: "${{ env.VERSION }}" | |
prerelease: ${{ contains(github.ref, 'rc') }} | |
files: ${{ github.workspace }}/out/* |