chore: bump llama.cpp #693

Workflow file for this run

	name: ci

	permissions:
	contents: read
	pull-requests: read
	actions: read

	env:
	LLAMA_BOX_BUILD_VERSION: "${{ github.ref_name }}"

	on:
	workflow_dispatch: { }
	push:
	tags:
	- "v..*"
	branches:
	- "main"
	- "branch-v."
	paths-ignore:
	- "docs/**"
	- "**.md"
	- "**.mdx"
	- "**.png"
	- "**.jpg"
	- ".github/workflows/prune.yml"
	- ".github/workflows/sync.yml"
	pull_request:
	branches:
	- "main"
	paths-ignore:
	- "docs/**"
	- "**.md"
	- "**.mdx"
	- "**.png"
	- "**.jpg"
	- ".github/workflows/prune.yml"
	- ".github/workflows/sync.yml"

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref && github.ref \|\| github.run_id }}
	cancel-in-progress: true


	# Disable OpenMP,
	# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
	# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
	jobs:

	darwin:
	strategy:
	fail-fast: false
	matrix:
	include:
	- arch: 'amd64'
	instruction: 'avx2'
	runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' \|\| 'macos-14' }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Deps
	run: \|
	brew update
	- name: Setup XCode
	uses: maxim-lobanov/setup-xcode@v1
	with:
	xcode-version: '15.2'
	- name: Build
	run: \|
	echo "===== BUILD ====="
	mkdir -p ${{ github.workspace }}/.cache
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_ACCELERATE=on -DGGML_METAL=off \
	-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' \|\| 'off' }} \
	-DGGML_NATIVE=on \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(sysctl -n hw.physicalcpu)

	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	otool --version
	otool -L ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-${{ matrix.instruction }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-darwin-${{ matrix.arch }}-${{ matrix.instruction }}

	darwin-metal:
	strategy:
	fail-fast: false
	matrix:
	arch:
	- 'amd64'
	- 'arm64'
	version:
	- '3.0'
	# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images,
	# https://support.apple.com/en-us/102894.
	runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' \|\| 'macos-14' }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Deps
	run: \|
	brew update
	- name: Setup XCode
	uses: maxim-lobanov/setup-xcode@v1
	with:
	xcode-version: '15.2'
	- name: Build
	run: \|
	echo "===== BUILD ====="
	mkdir -p ${{ github.workspace }}/.cache
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_ACCELERATE=on -DGGML_METAL=on -DGGML_METAL_USE_BF16=on -DGGML_METAL_EMBED_LIBRARY=on \
	-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' \|\| 'off' }} \
	-DGGML_NATIVE=on \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(sysctl -n hw.physicalcpu)

	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	otool --version
	otool -L ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-metal.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-darwin-${{ matrix.arch }}-metal-${{ matrix.version }}

	linux:
	strategy:
	fail-fast: false
	matrix:
	# AVX2 ==> CentOS 7.
	# AVX512 ==> RockyLinux 8.9.
	# NEON ==> Ubuntu 18.04.
	include:
	- arch: 'amd64'
	instruction: 'avx2'
	distro_container_image: 'gpustack/devel-cpu:centos7'
	- arch: 'amd64'
	instruction: 'avx512'
	distro_container_image: 'gpustack/devel-cpu:rockylinux8.9'
	- arch: 'arm64'
	instruction: 'neon'
	distro_container_image: 'gpustack/devel-cpu:ubuntu18.04'
	runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' \|\| 'ubuntu-22.04-arm' }}
	steps:
	- name: Maximize Docker Build Space
	uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
	with:
	deep-clean: false
	root-reserve-mb: 20480
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-linux-${{ matrix.arch }}-${{ matrix.instruction }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	# NB(thxCode): workaround to avoid using OpenMP.
	rm -rf /opt/rh/devtoolset-9/root/usr/lib/gcc/\$(uname -m)-redhat-linux/9/libgomp* \|\| true
	rm -rf /opt/rh/gcc-toolset-11/root/usr/lib/gcc/\$(uname -m)-redhat-linux/11/libgomp* \|\| true
	rm -rf /usr/lib/gcc/\$(uname -m)-linux-gnu/11/libgomp* \|\| true
	rm -rf /opt/openEuler/gcc-toolset-10/root/usr/lib/gcc/\$(uname -m)-linux-gnu/10/libgomp* \|\| true
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	echo "===== BUILD ====="
	env \|\| true
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' \|\| 'off' }} \
	-DGGML_NATIVE=off \
	${{ matrix.instruction == 'avx2' && '-DGGML_AVX=on -DGGML_AVX_VNNI=off -DGGML_AVX2=on' \|\| '' }} \
	${{ matrix.instruction == 'avx512' && '-DGGML_AVX512=on -DGGML_AVX512_BF16=off -DGGML_AVX512_VBMI=on -DGGML_AVX512_VNNI=on' \|\| '' }} \
	${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' \|\| '' }} \
	-DGGML_BLAS_VENDOR=OpenBLAS \
	-DGGML_STATIC=on \
	-DGGML_BLAS=on \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt \|\| true
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd --version
	ldd ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env DEBIAN_FRONTEND=noninteractive \
	--env CCACHE_DIR \
	--env LLAMA_BOX_BUILD_VERSION \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	${{ matrix.distro_container_image }}

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-${{ matrix.instruction }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-${{ matrix.instruction }}

	linux-hip:
	strategy:
	fail-fast: false
	matrix:
	# see https://hub.docker.com/r/rocm/dev-centos-7/tags.
	# 6.2 ==> 6.2.4, CentOS 7.
	# build fat binary,
	# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878,
	# https://llvm.org/docs/AMDGPUUsage.html.
	# official gpu support list,
	# see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html.
	include:
	- arch: 'amd64'
	version: '6.2'
	distro_container_image: 'gpustack/devel-rocm-hip:6.2.4-centos7'
	hip_arch: 'gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1030;gfx1100;gfx1101;gfx1102'
	runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' \|\| 'ubuntu-22.04-arm' }}
	steps:
	- name: Maximize Docker Build Space
	uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
	with:
	deep-clean: false
	root-reserve-mb: 20480
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-linux-hip-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	AMDGPU_TARGETS: "${{ matrix.hip_arch }}"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	echo "===== BUILD ====="
	env \|\| true
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_HIP=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \
	-DCMAKE_C_COMPILER="\$(hipconfig -l)/clang" \
	-DCMAKE_CXX_COMPILER="\$(hipconfig -l)/clang++" \
	-DCMAKE_HIP_COMPILER="\$(hipconfig -l)/clang" \
	-DGGML_NATIVE=off \
	-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' \|\| 'off' }} \
	${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' \|\| '' }} \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt \|\| true
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd --version
	ldd ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env DEBIAN_FRONTEND=noninteractive \
	--env CCACHE_DIR \
	--env AMDGPU_TARGETS \
	--env LLAMA_BOX_BUILD_VERSION \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	${{ matrix.distro_container_image }}

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}

	linux-cuda:
	strategy:
	fail-fast: false
	matrix:
	# see https://hub.docker.com/r/nvidia/cuda/tags?page=&page_size=&ordering=&name=devel.
	# 12.4 ==> 12.4.0, CentOS 7, RockyLinux 8.
	# 11.8 ==> 11.8.0, CentOS 7, RockyLinux 8.
	# build fat binary,
	# see https://developer.nvidia.com/cuda-gpus.
	include:
	- arch: 'amd64'
	version: '12.4'
	distro_container_image: 'gpustack/devel-nvidia-cuda:12.4.0-centos7'
	cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
	- arch: 'amd64'
	version: '11.8'
	distro_container_image: 'gpustack/devel-nvidia-cuda:11.8.0-centos7'
	cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
	- arch: 'arm64'
	version: '12.4'
	distro_container_image: 'gpustack/devel-nvidia-cuda:12.4.0-rockylinux8'
	cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
	- arch: 'arm64'
	version: '11.8'
	distro_container_image: 'gpustack/devel-nvidia-cuda:11.8.0-rockylinux8'
	cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
	runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' \|\| 'ubuntu-22.04-arm' }}
	steps:
	- name: Maximize Docker Build Space
	uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
	with:
	deep-clean: false
	root-reserve-mb: 20480
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-linux-cuda-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	CUDA_ARCHITECTURES: "${{ matrix.cuda_arch }}"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	echo "===== BUILD ====="
	env \|\| true
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_CUDA=on -DGGML_CUDA_F16=on -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" \
	-DGGML_NATIVE=off \
	-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' \|\| 'off' }} \
	${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' \|\| '' }} \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt \|\| true
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd --version
	ldd ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env DEBIAN_FRONTEND=noninteractive \
	--env CCACHE_DIR \
	--env CUDA_ARCHITECTURES \
	--env LLAMA_BOX_BUILD_VERSION \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	${{ matrix.distro_container_image }}

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}

	linux-oneapi:
	strategy:
	fail-fast: false
	matrix:
	# see https://hub.docker.com/r/intel/oneapi-basekit/tags?page=&page_size=&ordering=&name=devel.
	# 2025.0 ==> 2025.0.0-0, Ubuntu 22.04.
	include:
	- arch: 'amd64'
	version: '2025.0'
	distro_container_image: 'gpustack/devel-intel-oneapi:2025.0.0-0-ubuntu22.04'
	runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' \|\| 'ubuntu-22.04-arm' }}
	steps:
	- name: Maximize Docker Build Space
	uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
	with:
	deep-clean: false
	root-reserve-mb: 20480
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-linux-oneapi-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	if [ -f /etc/os-release ]; then
	source /etc/os-release
	cat /etc/os-release
	if [ "\${ID}" = "ubuntu" ]; then
	apt-get update -y \
	&& apt-get install -y build-essential libssl-dev ccache curl git bc
	if (( \$(echo "\${VERSION_ID} < 21.04" \| bc -l) )); then
	apt-get update -y \
	&& apt-get install -y software-properties-common
	add-apt-repository -y ppa:ubuntu-toolchain-r/test
	apt-get update -y \
	&& apt-get install -y gcc-11 g++-11
	update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
	update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
	fi
	else
	echo "Unsupport distribution: \${ID}"
	exit 1
	fi
	else
	echo "Unknown distribution"
	exit 1
	fi
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	echo "===== BUILD ====="
	env \|\| true
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_SYCL=on -DGGML_SYCL_F16=on \
	-DGGML_NATIVE=off \
	-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' \|\| 'off' }} \
	${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' \|\| '' }} \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt \|\| true
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd --version
	ldd ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env CC=icx \
	--env CXX=icpx \
	--env DEBIAN_FRONTEND=noninteractive \
	--env CCACHE_DIR \
	--env LLAMA_BOX_BUILD_VERSION \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	${{ matrix.distro_container_image }}

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }}

	linux-cann:
	strategy:
	fail-fast: false
	matrix:
	# see https://hub.docker.com/r/ascendai/cann/tags?page=&page_size=&ordering=&name=8.0.rc2.alpha003-910b.
	# 8.0 ==> 8.0.rc2.alpha003, Ubuntu 20.04, OpenEuler 20.03
	arch:
	- 'amd64'
	- 'arm64'
	version:
	- '8.0'
	distro_container_image:
	- 'gpustack/devel-ascendai-cann:8.0.rc2.alpha003-910b-ubuntu20.04'
	- 'gpustack/devel-ascendai-cann:8.0.rc2.alpha003-910b-openeuler20.03'
	- 'gpustack/devel-ascendai-cann:8.0.rc2.alpha003-310p-ubuntu20.04'
	- 'gpustack/devel-ascendai-cann:8.0.rc2.alpha003-310p-openeuler20.03'
	runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' \|\| 'ubuntu-22.04-arm' }}
	steps:
	- name: Maximize Docker Build Space
	uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
	with:
	deep-clean: false
	root-reserve-mb: 20480
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-linux-cann-${{ matrix.arch }}-${{ matrix.version }}-${{ matrix.distro_container_image }}${{ contains(matrix.distro_container_image, '310p') && '-310p' \|\| '' }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	source /usr/local/Ascend/ascend-toolkit/set_env.sh
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	echo "===== BUILD ====="
	env \|\| true
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_CANN=on \
	-DSOC_TYPE=${{ contains(matrix.distro_container_image, '910b') && 'Ascend910B3' \|\| 'Ascend310P3' }} \
	-DGGML_NATIVE=off \
	-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' \|\| 'off' }} \
	${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' \|\| '' }} \
	${{ contains(matrix.distro_container_image, '310p') && '-DGGML_AVX2=off' \|\| '' }} \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt \|\| true
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd --version
	ldd ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env DEBIAN_FRONTEND=noninteractive \
	--env CCACHE_DIR \
	--env LLAMA_BOX_BUILD_VERSION \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	${{ matrix.distro_container_image }}

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cann-${{ matrix.version }}${{ contains(matrix.distro_container_image, 'openeuler20.03') && '-openeuler20.03' \|\| '' }}${{ contains(matrix.distro_container_image, '310p') && '-310p' \|\| '' }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-cann-${{ matrix.version }}${{ contains(matrix.distro_container_image, 'openeuler20.03') && '-openeuler20.03' \|\| '' }}${{ contains(matrix.distro_container_image, '310p') && '-310p' \|\| '' }}

	linux-musa:
	strategy:
	fail-fast: false
	matrix:
	# see https://hub.docker.com/r/mthreads/musa/tags?page_size=&ordering=&name=ubuntu22.04.
	# rc3.1.0 ==> rc3.1.0, Ubuntu 22.04.
	include:
	- arch: 'amd64'
	version: 'rc3.1'
	distro_container_image: 'gpustack/devel-mthreads-musa:rc3.1.0-ubuntu22.04'
	runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' \|\| 'ubuntu-22.04-arm' }}
	steps:
	- name: Maximize Docker Build Space
	uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
	with:
	deep-clean: false
	root-reserve-mb: 20480
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-linux-musa-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	echo "===== BUILD ====="
	env \|\| true
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_MUSA=on \
	-DGGML_NATIVE=off \
	-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' \|\| 'off' }} \
	${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' \|\| '' }} \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt \|\| true
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd --version
	ldd ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env DEBIAN_FRONTEND=noninteractive \
	--env CCACHE_DIR \
	--env LLAMA_BOX_BUILD_VERSION \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	${{ matrix.distro_container_image }}

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-musa-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-musa-${{ matrix.version }}

	linux-dtk:
	strategy:
	fail-fast: false
	matrix:
	# see https://sourcefind.cn/#/image/dcu/dtk.
	# 24.04 ==> 24.04.3, Ubuntu 20.04.
	# build fat binary,
	# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878,
	# https://llvm.org/docs/AMDGPUUsage.html.
	# official gpu support list,
	# see https://download.sourcefind.cn:65024/6/main.
	include:
	- arch: 'amd64'
	version: '24.04'
	distro_container_image: 'gpustack/devel-hygon-dtk:24.04.3-ubuntu20.04'
	hip_arch: 'gfx906;gfx926;gfx928'
	runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-22.04' \|\| 'ubuntu-22.04-arm' }}
	steps:
	- name: Maximize Docker Build Space
	uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
	with:
	deep-clean: false
	root-reserve-mb: 20480
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-linux-dtk-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	AMDGPU_TARGETS: "${{ matrix.hip_arch }}"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	echo "===== BUILD ====="
	source /opt/dtk/env.sh
	export LIBRARY_PATH="/opt/dtk/llvm/lib/clang/15.0.0/lib/linux:\${LD_LIBRARY_PATH}"
	env \|\| true
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DGGML_HIP=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \
	-DCMAKE_C_COMPILER="hipcc" \
	-DCMAKE_C_FLAGS="--gpu-max-threads-per-block=1024" \
	-DCMAKE_CXX_COMPILER="hipcc" \
	-DCMAKE_CXX_FLAGS="--gpu-max-threads-per-block=1024" \
	-DCMAKE_HIP_COMPILER="clang" \
	-DCMAKE_HIP_FLAGS="--gpu-max-threads-per-block=1024" \
	-DGGML_NATIVE=off \
	-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' \|\| 'off' }} \
	${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' \|\| '' }} \
	-DGGML_OPENMP=off \
	-DGGML_RPC=on
	cat ${{ github.workspace }}/build/llama-box/CMakeFiles/llama-box.dir/link.txt \|\| true
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
	echo "===== RESULT ====="
	ls -alh ${{ github.workspace }}/build/bin/
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd --version
	ldd ${{ github.workspace }}/build/bin/llama-box \|\| true
	else
	exit 1
	fi
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env DEBIAN_FRONTEND=noninteractive \
	--env CCACHE_DIR \
	--env AMDGPU_TARGETS \
	--env LLAMA_BOX_BUILD_VERSION \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	${{ matrix.distro_container_image }}

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-dtk-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-dtk-${{ matrix.version }}

	windows:
	strategy:
	fail-fast: false
	matrix:
	# AVX2 ==> Windows Server 2022.
	# AVX512 ==> Windows Server 2022.
	# NEON ==> Windows Server 2022.
	include:
	- arch: 'amd64'
	instruction: 'avx2'
	- arch: 'amd64'
	instruction: 'avx512'
	- arch: 'arm64'
	instruction: 'neon'
	runs-on: windows-2022
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-windows-${{ matrix.arch }}-${{ matrix.instruction }}
	path: \|
	${{ github.workspace }}\.cache
	- name: Deps
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	choco install ccache ninja curl openssl -y
	- name: Setup
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	if (Test-Path -Path "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat") {
	cmd /c 'call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' \|\| 'amd64_arm64' }} && set' \| ForEach-Object { `
	if ($_ -Match '^(.?)=(.)$') { $_ \| Out-File -FilePath $env:GITHUB_ENV -Append } `
	}
	} else {
	cmd /c 'call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' \|\| 'amd64_arm64' }} && set' \| ForEach-Object { `
	if ($_ -Match '^(.?)=(.)$') { $_ \| Out-File -FilePath $env:GITHUB_ENV -Append } `
	}
	}

	"OPENSSL_ROOT_DIR=C:\Program Files\OpenSSL" \| Out-File -FilePath $env:GITHUB_ENV -Append
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache"
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "===== BUILD ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore \| Out-Null
	Get-ChildItem Env: -ErrorAction Ignore \| Format-Table -Property Name, Value -ErrorAction Ignore
	cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
	-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' \|\| 'off' }} `
	-DGGML_NATIVE=off `
	${{ matrix.arch == 'arm64' && format('-DCMAKE_TOOLCHAIN_FILE={0}\llama-box\scripts\build-windows-arm64.cmake', github.workspace) \|\| '' }} `
	${{ matrix.instruction == 'avx2' && '-DGGML_AVX=on -DGGML_AVX_VNNI=off -DGGML_AVX2=on' \|\| '' }} `
	${{ matrix.instruction == 'avx512' && '-DGGML_AVX512=on -DGGML_AVX512_BF16=off -DGGML_AVX512_VBMI=on -DGGML_AVX512_VNNI=on' \|\| '' }} `
	-DGGML_STATIC=on `
	-DGGML_OPENMP=off `
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1))
	cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS}
	Write-Host "===== RESULT ====="
	Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore
	if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
	llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe"
	} else {
	exit 1
	}

	Write-Host "===== PACKAGE ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore \| Out-Null
	Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-${{ matrix.instruction }}.zip"
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}\\out\\*.zip
	name: llama-box-windows-${{ matrix.arch }}-${{ matrix.instruction }}

	windows-hip:
	strategy:
	fail-fast: false
	matrix:
	# see https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html.
	# 6.2 ==> 6.2.4, Windows Server 2022.
	# build fat binary,
	# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878,
	# https://llvm.org/docs/AMDGPUUsage.html.
	# official gpu support list,
	# see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html.
	include:
	- arch: 'amd64'
	version: '6.2'
	distro_binary_installer: 'https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe'
	hip_arch: 'gfx1030;gfx1100;gfx1101;gfx1102'
	runs-on: windows-2022
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-windows-hip-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}\.cache
	- name: Deps
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	choco install ccache ninja curl openssl -y
	- name: Setup HIP
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download AMD ROCm HIP SDK"
	curl.exe --retry 5 --retry-delay 5 `
	--output "${{ runner.temp }}\installer.exe" `
	--url "${{ matrix.distro_binary_installer }}"

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install AMD ROCm HIP SDK"
	Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait `
	-ArgumentList '-install'

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify AMD ROCm HIP SDK"
	& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version

	$hipPath = "$(Resolve-Path -Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' \| Split-Path \| Split-Path)"
	"HIP_PATH=${hipPath}" \| Out-File -FilePath $env:GITHUB_ENV -Append

	if (Test-Path -Path "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat") {
	cmd /c 'call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' \|\| 'amd64_arm64' }} && set' \| ForEach-Object { `
	if ($_ -Match '^(.?)=(.)$') { $_ \| Out-File -FilePath $env:GITHUB_ENV -Append } `
	}
	} else {
	cmd /c 'call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' \|\| 'amd64_arm64' }} && set' \| ForEach-Object { `
	if ($_ -Match '^(.?)=(.)$') { $_ \| Out-File -FilePath $env:GITHUB_ENV -Append } `
	}
	}

	"OPENSSL_ROOT_DIR=C:\Program Files\OpenSSL" \| Out-File -FilePath $env:GITHUB_ENV -Append
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache"
	AMDGPU_TARGETS: "${{ matrix.hip_arch }}"
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "HIP_PATH=${env:HIP_PATH}"

	Write-Host "===== BUILD ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore \| Out-Null
	$env:CMAKE_PREFIX_PATH = "${env:HIP_PATH}"
	Get-ChildItem Env: -ErrorAction Ignore \| Format-Table -Property Name, Value -ErrorAction Ignore
	cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
	-DGGML_HIP=on -DAMDGPU_TARGETS="${env:AMDGPU_TARGETS}" `
	-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
	-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
	-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' \|\| 'off' }} `
	-DGGML_NATIVE=off `
	-DGGML_CUDA_F16=on `
	-DGGML_OPENMP=off `
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1))
	cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS}
	Write-Host "===== RESULT ====="
	Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore
	if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
	llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe"
	} else {
	exit 1
	}

	Write-Host "===== PACKAGE ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore \| Out-Null
	Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}.zip"
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}\\out\\*.zip
	name: llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}

	windows-cuda:
	strategy:
	fail-fast: false
	matrix:
	# see https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=Server2022&target_type=exe_network.
	# 12.4 ==> 12.4.1, Windows Server 2022.
	# 11.8 ==> 11.8.0, Windows Server 2019.
	# build fat binary,
	# see https://developer.nvidia.com/cuda-gpus.
	include:
	- arch: 'amd64'
	version: '12.4'
	distro_binary_installer: 'https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe'
	cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
	- arch: 'amd64'
	version: '11.8'
	distro_binary_installer: 'https://developer.download.nvidia.com/compute/cuda/11.8.0/network_installers/cuda_11.8.0_windows_network.exe'
	cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
	runs-on: ${{ matrix.version == '11.8' && 'windows-2019' \|\| 'windows-2022' }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-windows-cuda-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}\.cache
	- name: Deps
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	choco install ccache ninja curl openssl -y
	- name: Setup CUDA
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download NVIDIA CUDA SDK"
	curl.exe --retry 5 --retry-delay 5 `
	--output "${{ runner.temp }}\installer.exe" `
	--url "${{ matrix.distro_binary_installer }}"

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install NVIDIA CUDA SDK"
	Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait `
	-ArgumentList '-s','nvcc_${{ matrix.version }}','cudart_${{ matrix.version }}','cublas_${{ matrix.version }}','cublas_dev_${{ matrix.version }}','thrust_${{ matrix.version }}','visual_studio_integration_${{ matrix.version }}'

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify NVIDIA CUDA SDK"
	& 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\nvcc.exe' --version

	$cudaPath = "$(Resolve-Path -Path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\nvcc.exe' \| Split-Path \| Split-Path)"
	$cudaVersion=($cudaPath \| Split-Path -Leaf ) -replace 'v(\d+).(\d+)', '$1_$2'
	"CUDA_PATH=${cudaPath}" \| Out-File -FilePath $env:GITHUB_ENV -Append
	"CUDA_PATH_V${cudaVersion}=$cudaPath" \| Out-File -FilePath $env:GITHUB_ENV -Append
	"CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVersion}" \| Out-File -FilePath $env:GITHUB_ENV -Append

	if (Test-Path -Path "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat") {
	cmd /c 'call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' \|\| 'amd64_arm64' }} && set' \| ForEach-Object { `
	if ($_ -Match '^(.?)=(.)$') { $_ \| Out-File -FilePath $env:GITHUB_ENV -Append } `
	}
	} else {
	cmd /c 'call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' \|\| 'amd64_arm64' }} && set' \| ForEach-Object { `
	if ($_ -Match '^(.?)=(.)$') { $_ \| Out-File -FilePath $env:GITHUB_ENV -Append } `
	}
	}

	"OPENSSL_ROOT_DIR=C:\Program Files\OpenSSL" \| Out-File -FilePath $env:GITHUB_ENV -Append
	- name: Build
	env:
	CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache"
	CUDA_ARCHITECTURES: "${{ matrix.cuda_arch }}"
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "CUDA_PATH=${env:CUDA_PATH}"

	Write-Host "===== BUILD ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore \| Out-Null
	Get-ChildItem Env: -ErrorAction Ignore \| Format-Table -Property Name, Value -ErrorAction Ignore
	cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
	-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${env:CUDA_ARCHITECTURES}" `
	-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' \|\| 'off' }} `
	-DGGML_NATIVE=off `
	-DGGML_CUDA_F16=on `
	-DGGML_OPENMP=off `
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1))
	cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS}
	Write-Host "===== RESULT ====="
	Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore
	if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
	llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe"
	} else {
	exit 1
	}

	Write-Host "===== PACKAGE ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore \| Out-Null
	Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip"
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}\\out\\*.zip
	name: llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}

	windows-oneapi:
	strategy:
	fail-fast: false
	matrix:
	# see https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?operatingsystem=windows&windows-install-type=online.
	# 2025.0 ==> 2025.0.0, Windows Server 2022.
	include:
	- arch: 'amd64'
	version: '2025.0'
	distro_binary_installer: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882.exe'
	runs-on: windows-2022
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	# doesn't support ccache,
	# as the oneAPI need to configure the environment variables via setvars.bat.
	timeout-minutes: 5
	uses: actions/cache@v4
	with:
	key: cache-windows-oneapi-${{ matrix.arch }}-${{ matrix.version }}
	path: \|
	${{ github.workspace }}\build
	- name: Deps
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	choco install ninja curl openssl -y
	- name: Setup oneAPI
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download Intel oneAPI SDK"
	curl.exe --retry 5 --retry-delay 5 `
	--output "${{ runner.temp }}\installer.exe" `
	--url "${{ matrix.distro_binary_installer }}"

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install Intel oneAPI SDK"
	Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait `
	-ArgumentList '-s','--action=install','--components=intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel','--eula=accept','-p=NEED_VS2017_INTEGRATION=0','-p=NEED_VS2019_INTEGRATION=0','-p=NEED_VS2022_INTEGRATION=0'

	Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify Intel oneAPI SDK"
	& 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' --version

	$oneapiPath = "$(Resolve-Path -Path 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' \| Split-Path \| Split-Path)"
	"ONEAPI_PATH=${oneapiPath}" \| Out-File -FilePath $env:GITHUB_ENV -Append
	$oneapiRoot = "$(Split-Path -Path $oneapiPath)"
	"ONEAPI_ROOT=${oneapiRoot}" \| Out-File -FilePath $env:GITHUB_ENV -Append

	cmd /c "call `"${oneapiRoot}\setvars.bat`" && set" \| ForEach-Object { `
	if ($_ -Match '^(.?)=(.)$') { $_ \| Out-File -FilePath $env:GITHUB_ENV -Append } `
	}

	"OPENSSL_ROOT_DIR=C:\Program Files\OpenSSL" \| Out-File -FilePath $env:GITHUB_ENV -Append
	- name: Build
	run: \|
	$ErrorActionPreference = "Stop"
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "ONEAPI_PATH=${env:ONEAPI_PATH}"
	Write-Host "ONEAPI_ROOT=${env:ONEAPI_ROOT}"

	Write-Host "===== BUILD ====="
	Get-ChildItem Env: -ErrorAction Ignore \| Format-Table -Property Name, Value -ErrorAction Ignore
	cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
	-DGGML_SYCL=on `
	-DCMAKE_C_COMPILER=cl `
	-DCMAKE_CXX_COMPILER=icx `
	-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' \|\| 'off' }} `
	-DGGML_NATIVE=off `
	-DGGML_SYCL_F16=on `
	-DGGML_OPENMP=off `
	-DGGML_RPC=on
	cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1))
	cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS}
	Write-Host "===== RESULT ====="
	Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore
	if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
	llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe"
	} else {
	exit 1
	}

	Write-Host "===== PACKAGE ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore \| Out-Null
	Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip"
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}\\out\\*.zip
	name: llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }}

	release:
	if: ${{ startsWith(github.ref, 'refs/tags/') }}
	permissions:
	contents: write
	actions: read
	id-token: write
	runs-on: ubuntu-22.04
	needs:
	- darwin
	- darwin-metal
	- linux
	- linux-hip
	- linux-cuda
	- linux-oneapi
	- linux-cann
	- linux-musa
	- linux-dtk
	- windows
	- windows-hip
	- windows-cuda
	- windows-oneapi
	steps:
	- name: Download Artifact
	uses: actions/download-artifact@v4
	with:
	path: ${{ github.workspace }}/out
	merge-multiple: true
	- name: Release
	uses: softprops/action-gh-release@v1
	with:
	fail_on_unmatched_files: true
	tag_name: "${{ env.VERSION }}"
	prerelease: ${{ contains(github.ref, 'rc') }}
	files: ${{ github.workspace }}/out/*

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

chore: bump llama.cpp #693

Workflow file

chore: bump llama.cpp #693

Jobs

Run details

Workflow file for this run