Skip to content

feat: add openai compatible path #138

feat: add openai compatible path

feat: add openai compatible path #138

Workflow file for this run

name: CI
on:
push:
branches:
- main
tags: ['v*.*.*']
paths: ['.github/scripts/**','.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cc', '**/*.cxx', '!docs/**', '!.gitignore']
pull_request:
types: [opened, synchronize, reopened]
paths: ['.github/scripts/**','.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cc', '**/*.cxx', '!docs/**', '!.gitignore']
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
MODEL_URL: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
jobs:
create-draft-release:
runs-on: ubuntu-latest
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
outputs:
upload_url: ${{ steps.create_release.outputs.upload_url }}
version: ${{ steps.get_version.outputs.version }}
permissions:
contents: write
steps:
- name: Extract tag name without v prefix
id: get_version
run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}"
env:
GITHUB_REF: ${{ github.ref }}
- name: Create Draft Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ github.ref_name }}
release_name: "${{ env.VERSION }}"
draft: true
prerelease: false
ubuntu-amd64-build:
runs-on: linux-gpu
needs: create-draft-release
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped')
permissions:
contents: write
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
# - name: Dependencies
# id: depends
# run: |
# sudo apt-get update
# sudo apt-get install build-essential gcc-8
- name: Build
id: make_build
run: |
./install_deps.sh
mkdir build && cd build
cmake -DDEBUG=ON ..
CC=gcc-8 make -j $(nproc)
ls -la
- name: Package
shell: bash
run: |
mkdir -p nitro
cp build/nitro nitro/
zip -r nitro.zip nitro
# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }}
- uses: actions/[email protected]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-linux-amd64.zip
asset_content_type: application/zip
ubuntu-amd64-cuda-build:
runs-on: linux-gpu
needs: create-draft-release
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped')
permissions:
contents: write
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
# - name: Dependencies
# id: depends
# run: |
# sudo apt-get update
# sudo apt-get install build-essential gcc-8 uuid-dev
- name: Build
id: make_build
run: |
./install_deps.sh
mkdir build && cd build
cmake -DDEBUG=ON -DLLAMA_CUBLAS=ON ..
CC=gcc-8 make -j $(nproc)
ls -la
- name: Package
shell: bash
run: |
mkdir -p nitro
cp build/nitro nitro/
zip -r nitro.zip nitro
# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }}
- uses: actions/[email protected]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-linux-amd64-cuda.zip
asset_content_type: application/zip
macOS-M-build:
runs-on: mac-silicon
needs: create-draft-release
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped')
permissions:
contents: write
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Dependencies
id: depends
continue-on-error: true
run: |
brew update
brew install cmake gcc@8
- name: Build
id: cmake_build
run: |
./install_deps.sh
mkdir build && cd build
cmake ..
CC=gcc-8 make -j $(sysctl -n hw.ncp)
ls -la
- name: Package
shell: bash
run: |
mkdir -p nitro
cp llama.cpp/ggml-metal.metal nitro/
cp build/nitro nitro/
zip -r nitro.zip nitro
# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }}
- uses: actions/[email protected]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-mac-arm64.zip
asset_content_type: application/zip
macOS-Intel-build:
runs-on: macos-latest
needs: create-draft-release
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped')
permissions:
contents: write
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Dependencies
id: depends
continue-on-error: true
run: |
brew update
- name: Build
id: cmake_build
run: |
./install_deps.sh
mkdir build && cd build
cmake -DLLAMA_METAL=OFF ..
CC=gcc-8 make -j $(sysctl -n hw.ncp)
ls -la
- name: Package
shell: bash
run: |
mkdir -p nitro
cp build/nitro nitro/
zip -r nitro.zip nitro
# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }}
- uses: actions/[email protected]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-mac-amd64.zip
asset_content_type: application/zip
windows-amd64-build:
runs-on: windows-latest
needs: create-draft-release
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped')
permissions:
contents: write
env:
OPENBLAS_VERSION: 0.3.23
OPENCL_VERSION: 2023.04.17
CLBLAST_VERSION: 1.6.0
strategy:
matrix:
include:
- build: 'normal'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Setup VSWhere.exe
uses: warrenbuckley/Setup-VSWhere@v1
with:
version: latest
silent: true
env:
ACTIONS_ALLOW_UNSECURE_COMMANDS: true
- name: actions-setup-cmake
uses: jwlawson/[email protected]
- name: Build
id: cmake_build
shell: cmd
run: |
cmake -S ./nitro_deps -B ./build_deps/nitro_deps
cmake --build ./build_deps/nitro_deps --config Release
mkdir -p build
cd build
cmake ..
cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%"
- name: Pack artifacts
id: pack_artifacts
shell: cmd
run: |
robocopy build_deps\_install\bin .\build\Release zlib.dll
robocopy build\bin\Release .\build\Release llama.dll
7z a nitro.zip .\build\Release\*
cd .\build\Release
..\..\.github\scripts\e2e-test-windows.bat .\nitro.exe ${{ env.MODEL_URL }}
- uses: actions/[email protected]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64.zip
asset_content_type: application/zip
windows-amd64-cuda-build:
runs-on: windows-nvidia
needs: create-draft-release
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped')
permissions:
contents: write
strategy:
matrix:
cuda: ['12.2.0']
build: ['cublas']
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Setup VSWhere.exe
uses: warrenbuckley/Setup-VSWhere@v1
with:
version: latest
silent: true
env:
ACTIONS_ALLOW_UNSECURE_COMMANDS: true
- name: Build
id: cmake_build
shell: cmd
run: |
cmake -S ./nitro_deps -B ./build_deps/nitro_deps
cmake --build ./build_deps/nitro_deps --config Release
mkdir -p build
cd build
cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON
cmake --build . --config Release -j "%NUMBER_OF_PROCESSORS%"
- name: Pack artifacts
id: pack_artifacts
shell: cmd
run: |
set PATH=%PATH%;C:\Program Files\7-Zip\
robocopy build_deps\_install\bin .\build\Release zlib.dll
robocopy build\bin\Release .\build\Release llama.dll
7z a nitro.zip .\build\Release\*
cd .\build\Release
..\..\.github\scripts\e2e-test-windows.bat .\nitro.exe ${{ env.MODEL_URL }}
- uses: actions/[email protected]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-cuda.zip
asset_content_type: application/zip
update_release_draft:
needs: [ubuntu-amd64-build, ubuntu-amd64-cuda-build, macOS-M-build, macOS-Intel-build, windows-amd64-build, windows-amd64-cuda-build]
permissions:
contents: write
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: release-drafter/release-drafter@v5
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}