Skip to content

refactor: Refactor nitro with cortext.tensorrtllm engine #13

refactor: Refactor nitro with cortext.tensorrtllm engine

refactor: Refactor nitro with cortext.tensorrtllm engine #13

Workflow file for this run

name: CI
on:
pull_request:
types: [opened, synchronize, reopened]
workflow_dispatch:
jobs:
build-and-test:
runs-on: ${{ matrix.runs-on }}
timeout-minutes: 40
strategy:
matrix:
include:
- os: "linux"
name: "amd64-cuda-12-0"
runs-on: "ubuntu-18-04-cuda-12-0"
# runs-on: "ubuntu-18-04"
# - os: "linux"
# name: "amd64"
# runs-on: "ubuntu-latest"
# - os: "windows"
# name: "amd64"
# runs-on: "windows-cuda-12-0"
# - os: "windows"
# name: "amd64"
# runs-on: "windows-latest"
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
- name: Install make on Windows
if: runner.os == 'windows'
run: |
choco install make -y
- name: Install dependencies
run: |
sudo apt-get install uuid-dev
sudo apt-get install libopenmpi-dev openmpi-bin
ldd --version
cd cpp/tensorrt_llm/cortex.tensorrt-llm
make install-dependencies
- name: Build engine
run: |
cd cpp/tensorrt_llm/cortex.tensorrt-llm
make build-engine
- name: Build example server
run: |
cd cpp/tensorrt_llm/cortex.tensorrt-llm
make build-example-server
# - name: Download engine
# run: |
# cd cpp/tensorrt_llm/cortex.tensorrt-llm/examples/server/build
# wget https://huggingface.co/HHrecode/tllm_checkpoint_1gpu_fp8_hermes_engine/resolve/main/engines.tar.gz
# tar -zxvf engines.tar.gz
# export LD_LIBRARY_PATH=$(pwd)/engines/cortex.tensorrtllm
- name: Test start server
run: |
cd cpp/tensorrt_llm/cortex.tensorrt-llm/examples/server/build
./server &
pid=$!
kill $pid
# - name: Run e2e test
# run: |
# cd cpp/tensorrt_llm/cortex.tensorrt-llm
# make run-e2e-test
# - name: Package
# run: |
# make package
# - name: Upload Artifact
# uses: actions/upload-artifact@v2
# with:
# name: cortex.tensorrt-llm-${{ matrix.os }}-${{ matrix.name }}
# path: ./cortex.tensorrt-llm.tar.gz