Skip to content

🦙 Llama3 on TGI - Jetstream Pytorch #241

🦙 Llama3 on TGI - Jetstream Pytorch

🦙 Llama3 on TGI - Jetstream Pytorch #241

name: Optimum TPU / Test TGI on TPU
on:
push:
branches: [ main ]
paths:
- "text-generation-inference/**"
pull_request:
branches: [ main ]
paths:
- "text-generation-inference/**"
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
do-the-job:
name: Run TGI tests
runs-on: optimum-tpu
container:
# Use a nightly image that works with TPU (release was not working)
image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.4.0_3.10_tpuvm
options: --shm-size "16gb" --ipc host --privileged
env:
PJRT_DEVICE: TPU
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Build and test TGI server
run: |
HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }} make tgi_test
# Use a different step to test the Jetstream Pytorch version, to avoid conflicts with torch-xla[tpu]
- name: Install and test TGI server (Jetstream Pytorch)
run: |
pip install -U .[jetstream-pt] \
-f https://storage.googleapis.com/jax-releases/jax_nightly_releases.html \
-f https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html \
-f https://storage.googleapis.com/libtpu-releases/index.html
JETSTREAM_PT=1 HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }} python -m \
pytest -sv text-generation-inference/tests -k jetstream