Use tokenizer.apply_chat_template() in vLLM #19

Workflow file for this run

.github/workflows/release.yaml at fda8b15

	name: Build and Release

	on: push

	env:
	GRPC_VERSION: v1.58.0

	permissions:
	contents: write

	concurrency:
	group: ci-releases-${{ github.head_ref \|\| github.ref }}-${{ github.repository }}
	cancel-in-progress: true

	jobs:
	build-linux:
	strategy:
	matrix:
	include:
	- build: 'avx2'
	defines: ''
	- build: 'avx'
	defines: '-DLLAMA_AVX2=OFF'
	- build: 'avx512'
	defines: '-DLLAMA_AVX512=ON'
	- build: 'cuda12'
	defines: ''
	- build: 'cuda11'
	defines: ''
	runs-on: ubuntu-latest
	steps:
	- name: Clone
	uses: actions/checkout@v4
	with:
	submodules: true
	- uses: actions/setup-go@v4
	with:
	go-version: '1.21.x'
	cache: false
	- name: Dependencies
	run: \|
	sudo apt-get update
	sudo apt-get install build-essential ffmpeg
	- name: Install CUDA Dependencies
	if: ${{ matrix.build == 'cuda12' \|\| matrix.build == 'cuda11' }}
	run: \|
	if [ "${{ matrix.build }}" == "cuda12" ]; then
	export CUDA_VERSION=12-3
	else
	export CUDA_VERSION=11-7
	fi
	curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
	sudo dpkg -i cuda-keyring_1.1-1_all.deb
	sudo apt-get update
	sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
	- name: Cache grpc
	id: cache-grpc
	uses: actions/cache@v3
	with:
	path: grpc
	key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
	- name: Build grpc
	if: steps.cache-grpc.outputs.cache-hit != 'true'
	run: \|
	git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
	cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
	-DgRPC_BUILD_TESTS=OFF \
	../.. && sudo make --jobs 5 --output-sync=target
	- name: Install gRPC
	run: \|
	cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
	- name: Build
	id: build
	env:
	CMAKE_ARGS: "${{ matrix.defines }}"
	BUILD_ID: "${{ matrix.build }}"
	run: \|
	if [ "${{ matrix.build }}" == "cuda12" ] \|\| [ "${{ matrix.build }}" == "cuda11" ]; then
	export BUILD_TYPE=cublas
	export PATH=/usr/local/cuda/bin:$PATH
	make dist
	else
	STATIC=true make dist
	fi
	- uses: actions/upload-artifact@v3
	with:
	name: ${{ matrix.build }}
	path: release/
	- name: Release
	uses: softprops/action-gh-release@v1
	if: startsWith(github.ref, 'refs/tags/')
	with:
	files: \|
	release/*

	build-stablediffusion:
	runs-on: ubuntu-latest
	steps:
	- name: Clone
	uses: actions/checkout@v4
	with:
	submodules: true
	- uses: actions/setup-go@v4
	with:
	go-version: '1.21.x'
	cache: false
	- name: Dependencies
	run: \|
	sudo apt-get install -y --no-install-recommends libopencv-dev
	- name: Build stablediffusion
	run: \|
	make backend-assets/grpc/stablediffusion
	mkdir -p release && cp backend-assets/grpc/stablediffusion release
	- uses: actions/upload-artifact@v3
	with:
	name: stablediffusion
	path: release/
	- name: Release
	uses: softprops/action-gh-release@v1
	if: startsWith(github.ref, 'refs/tags/')
	with:
	files: \|
	release/*

	build-macOS:
	strategy:
	matrix:
	include:
	- build: 'avx2'
	defines: ''
	- build: 'avx'
	defines: '-DLLAMA_AVX2=OFF'
	- build: 'avx512'
	defines: '-DLLAMA_AVX512=ON'
	runs-on: macOS-latest
	steps:
	- name: Clone
	uses: actions/checkout@v4
	with:
	submodules: true
	- uses: actions/setup-go@v4
	with:
	go-version: '1.21.x'
	cache: false
	- name: Dependencies
	run: \|
	brew install protobuf grpc
	- name: Build
	id: build
	env:
	CMAKE_ARGS: "${{ matrix.defines }}"
	BUILD_ID: "${{ matrix.build }}"
	run: \|
	export C_INCLUDE_PATH=/usr/local/include
	export CPLUS_INCLUDE_PATH=/usr/local/include
	make dist
	- uses: actions/upload-artifact@v3
	with:
	name: ${{ matrix.build }}
	path: release/
	- name: Release
	uses: softprops/action-gh-release@v1
	if: startsWith(github.ref, 'refs/tags/')
	with:
	files: \|
	release/*

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Use tokenizer.apply_chat_template() in vLLM #19

Workflow file

Use tokenizer.apply_chat_template() in vLLM #19

Jobs

Run details

Workflow file for this run