daily_ete_test #220
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: daily_ete_test | |
on: | |
workflow_dispatch: | |
inputs: | |
repo_org: | |
required: false | |
description: 'Tested repository organization name. Default is InternLM' | |
type: string | |
default: 'InternLM/lmdeploy' | |
repo_ref: | |
required: false | |
description: 'Set branch or tag or commit id. Default is "main"' | |
type: string | |
default: 'main' | |
backend: | |
required: true | |
description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"' | |
type: string | |
default: "['turbomind', 'pytorch', 'turbomind-vl']" | |
model: | |
required: true | |
description: 'Set testcase module filter: chat, restful, pipeline, quantization. Default contains all models' | |
type: string | |
default: "['quantization','convert','pipeline','restful','chat','local_case']" | |
offline_mode: | |
required: true | |
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself' | |
type: boolean | |
default: false | |
dependency_pkgs: | |
required: true | |
description: 'Dependency packages, you can also set a specific version' | |
type: string | |
default: 'packaging transformers_stream_generator transformers datasets matplotlib openai attrdict timm modelscope jmespath' | |
tools_regression: | |
required: true | |
description: 'Whether start a tool regression' | |
type: boolean | |
default: true | |
restful_regression: | |
required: true | |
description: 'Whether start a restful api regression' | |
type: boolean | |
default: true | |
triton_regression: | |
required: true | |
description: 'Whether start a triton server api regression' | |
type: boolean | |
default: true | |
pipeline_regression: | |
required: true | |
description: 'Whether start an interface pipeline regression' | |
type: boolean | |
default: true | |
schedule: | |
- cron: '00 20 * * 0-4' | |
env: | |
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache | |
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai | |
OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }} | |
TRITON_PTXAS_PATH: /usr/local/cuda/bin/ptxas | |
jobs: | |
linux-build: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}} | |
strategy: | |
matrix: | |
pyver: [py38, py310] | |
runs-on: ubuntu-latest | |
env: | |
PYTHON_VERSION: ${{ matrix.pyver }} | |
PLAT_NAME: manylinux2014_x86_64 | |
DOCKER_TAG: cuda11.8 | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Build | |
run: | | |
echo ${PYTHON_VERSION} | |
echo ${PLAT_NAME} | |
echo ${DOCKER_TAG} | |
echo ${OUTPUT_FOLDER} | |
echo ${GITHUB_RUN_ID} | |
# remove -it | |
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh | |
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER} | |
- name: Upload Artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
if-no-files-found: error | |
path: builder/manywheel/${{ env.OUTPUT_FOLDER }} | |
retention-days: 1 | |
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }} | |
test_tools: | |
needs: linux-build | |
if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.tools_regression)}} | |
runs-on: [self-hosted, linux-a100] | |
timeout-minutes: 300 | |
env: | |
REPORT_DIR: /nvme/qa_test_models/test-reports | |
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA | |
MODELSCOPE_CACHE: /root/modelscope_hub | |
MODELSCOPE_MODULES_CACHE: /root/modelscope_modules | |
container: | |
image: nvcr.io/nvidia/tritonserver:22.12-py3 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/github-actions/modelscope_hub:/root/modelscope_hub | |
- /nvme/github-actions/modelscope_modules:/root/modelscope_modules | |
- /nvme/github-actions/resources/lora:/root/lora | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /nvme/qa_test_models/lmdeploy/autotest:/local_case | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v3 | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Copy repository - offline | |
if: ${{inputs.offline_mode}} | |
run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. . | |
- name: Download Artifacts | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }}-py38 | |
- name: Install pytorch | |
run: | | |
python3 -m pip cache dir | |
python3 -m pip install torch==2.2.2 torchvision==0.17.2 --index-url https://download.pytorch.org/whl/cu118 | |
- name: Install lmdeploy - dependency | |
run: | | |
python3 -m pip install ${{inputs.dependency_pkgs || 'packaging transformers_stream_generator transformers datasets matplotlib openai attrdict timm modelscope jmespath'}} | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.5.7+cu118torch2.2cxx11abiFALSE-cp38-cp38-linux_x86_64.whl | |
python3 -m pip install -U 'xformers<=0.0.26' --index-url https://download.pytorch.org/whl/cu118 | |
- name: Install lmdeploy | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: | | |
python3 -m pip install lmdeploy-*.whl | |
python3 -m pip install -r requirements/test.txt | |
pip install /nvme/qa_test_models/offline_pkg/DeepSeek-VL --no-deps | |
- name: Install lmdeploy - offline | |
if: ${{inputs.offline_mode}} | |
run: | | |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py38/lmdeploy-*.whl | |
python3 -m pip install -r requirements/test.txt | |
pip install /nvme/qa_test_models/offline_pkg/DeepSeek-VL --no-deps | |
- name: Check env | |
run: | | |
python3 -m pip list | |
lmdeploy check_env | |
cp -r /root/lora . | |
rm -rf allure-results | |
# remove tmp log in testcase | |
rm -rf /nvme/qa_test_models/autotest_model/log/* | |
- name: Test lmdeploy - quantization w4a16 | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'quantization')) | |
run: | | |
pytest autotest/tools/quantization/test_quantization_w4a16.py -m 'not pr_test' -n 8 --alluredir=allure-results --clean-alluredir | |
- name: Test lmdeploy - quantization w8a8 | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'quantization')) | |
run: | | |
pytest autotest/tools/quantization/test_quantization_w8a8.py -n 8 --alluredir=allure-results | |
- name: Test lmdeploy - convert | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'convert')) | |
run: | | |
pytest autotest/tools/convert -m 'not pr_test' -n 8 --alluredir=allure-results | |
- name: Test lmdeploy - chat workspace | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'chat')) | |
run: | | |
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true | |
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results | |
- name: Test lmdeploy - chat hf turbomind | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'chat')) | |
run: | | |
pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true | |
pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results | |
- name: Test lmdeploy - chat hf torch | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'chat')) | |
run: | | |
pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true | |
pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results | |
- name: Test lmdeploy - pipeline turbomind | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'pipeline')) | |
run: | | |
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true | |
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results | |
- name: Test lmdeploy - pipeline torch | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'pipeline')) | |
run: | | |
pytest autotest/tools/pipeline/test_pipeline_chat_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true | |
pytest autotest/tools/pipeline/test_pipeline_chat_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results | |
- name: Test lmdeploy - pipeline turbomind vl | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind-vl') && contains(fromJSON(github.event.inputs.model), 'pipeline')) | |
run: | | |
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind_vl.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true | |
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind_vl.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results | |
- name: Test lmdeploy - restful turbomind | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'restful')) | |
run: | | |
pytest autotest/tools/restful/test_restful_chat_hf_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true | |
pytest autotest/tools/restful/test_restful_chat_hf_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results | |
- name: Test lmdeploy - restful turbomind vl | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind-vl') && contains(fromJSON(github.event.inputs.model), 'restful')) | |
run: | | |
pytest autotest/tools/restful/test_restful_chat_hf_turbomind_vl.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true | |
pytest autotest/tools/restful/test_restful_chat_hf_turbomind_vl.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results | |
- name: Test lmdeploy - restful workspace | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'restful')) | |
run: | | |
pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true | |
pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results | |
- name: Test lmdeploy - restful torch | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'restful')) | |
run: | | |
pytest autotest/tools/restful/test_restful_chat_hf_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true | |
pytest autotest/tools/restful/test_restful_chat_hf_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results | |
- name: Test lmdeploy - local testcase | |
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.model), 'local_case') | |
run: | | |
pytest /local_case/issue_regression --alluredir=allure-results | |
- name: Generate reports | |
if: always() | |
run: | | |
export date_today="$(date +'%Y%m%d-%H%M%S')" | |
export report_dir="$REPORT_DIR/$date_today" | |
echo "Save report to $report_dir" | |
mv allure-results $report_dir | |
chmod -R 777 $report_dir | |
- name: Clear workfile | |
if: always() | |
run: | | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
test_triton: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.triton_regression)}} | |
runs-on: [self-hosted, linux-a100-2] | |
needs: test_tools | |
timeout-minutes: 30 | |
env: | |
HF_MODEL: /nvme/qa_test_models/internlm-chat-20b | |
WORKDIR: /nvme/qa_test_models/triton_workspace | |
OFFLINE_PKGS: /nvme/qa_test_models/offline_pkg | |
TB_MODEL: internlm-chat-20b-fp16-tp2 | |
GRPC_PORT: 33337 | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v3 | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Create test container | |
run: | | |
export CONTAINER_ID=$(docker create \ | |
--rm \ | |
--gpus='"device=4,5"' \ | |
--shm-size 16g \ | |
--cap-add=SYS_PTRACE \ | |
--cap-add=SYS_ADMIN \ | |
--security-opt seccomp=unconfined \ | |
--name "lmdeploy-ci-triton-$GITHUB_RUN_ID" \ | |
--workdir /__w/lmdeploy/lmdeploy \ | |
--env NCCL_LAUNCH_MODE=GROUP \ | |
--pull never \ | |
-v $(pwd)/../../:/__w \ | |
-v ${HF_MODEL}:/root/workspace/hf_model \ | |
-v ${WORKDIR}:/root/workspace/workdir \ | |
-v ${OFFLINE_PKGS}:/root/workspace/offline_pkg \ | |
-v ${HOST_PIP_CACHE_DIR}:/root/.cache/pip \ | |
-v ${HOST_LOCALTIME}:/etc/localtime:ro \ | |
openmmlab/lmdeploy:latest tail -f /dev/null \ | |
) | |
docker start $CONTAINER_ID | |
echo "CONTAINER_ID=$CONTAINER_ID" | |
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
- name: Build lmdeploy from source | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: | | |
docker exec $CONTAINER_ID mkdir build | |
docker exec --workdir /__w/lmdeploy/lmdeploy/build \ | |
--env http_proxy=${{secrets.PROXY}} \ | |
--env https_proxy=${{secrets.PROXY}} \ | |
--env HTTP_PROXY=${{secrets.PROXY}} \ | |
--env HTTPS_PROXY=${{secrets.PROXY}} \ | |
--env no_proxy="localhost,127.0.0.1" \ | |
--env NO_PROXY="localhost,127.0.0.1" \ | |
$CONTAINER_ID cmake .. \ | |
-DCMAKE_BUILD_TYPE=RelWithDebInfo \ | |
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ | |
-DCMAKE_INSTALL_PREFIX=/opt/tritonserver \ | |
-DBUILD_PY_FFI=ON \ | |
-DBUILD_MULTI_GPU=ON \ | |
-DCMAKE_CUDA_FLAGS="-lineinfo" \ | |
-DUSE_NVTX=ON \ | |
-DSM=80 \ | |
-DCMAKE_CUDA_ARCHITECTURES=80 \ | |
-DBUILD_TEST=OFF | |
docker exec --workdir /__w/lmdeploy/lmdeploy/build $CONTAINER_ID make -j$(nproc) | |
docker exec --workdir /__w/lmdeploy/lmdeploy/build $CONTAINER_ID make install \ | |
--env http_proxy=${{secrets.PROXY}} \ | |
--env https_proxy=${{secrets.PROXY}} \ | |
--env HTTP_PROXY=${{secrets.PROXY}} \ | |
--env HTTPS_PROXY=${{secrets.PROXY}} | |
- name: Install lmdeploy | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: | | |
docker exec \ | |
--env http_proxy=${{secrets.PROXY}} \ | |
--env https_proxy=${{secrets.PROXY}} \ | |
$CONTAINER_ID python3 -m pip install tritonclient[grpc] protobuf | |
docker exec \ | |
--env http_proxy=${{secrets.PROXY}} \ | |
--env https_proxy=${{secrets.PROXY}} \ | |
$CONTAINER_ID python3 -m pip install -r requirements/test.txt | |
docker exec \ | |
--env http_proxy=${{secrets.PROXY}} \ | |
--env https_proxy=${{secrets.PROXY}} \ | |
$CONTAINER_ID python3 -m pip install . | |
docker exec $CONTAINER_ID lmdeploy check_env | |
- name: Copy repository - offline | |
if: ${{inputs.offline_mode}} | |
run: | | |
docker exec --workdir /__w/lmdeploy $CONTAINER_ID \ | |
cp -r /root/workspace/offline_pkg/lmdeploy . | |
- name: Install lmdeploy - offline | |
if: ${{inputs.offline_mode}} | |
run: | | |
docker exec \ | |
--env http_proxy=${{secrets.PROXY}} \ | |
--env https_proxy=${{secrets.PROXY}} \ | |
$CONTAINER_ID python3 -m pip install tritonclient[grpc] protobuf | |
docker exec --workdir /__w/lmdeploy/lmdeploy \ | |
--env http_proxy=${{secrets.PROXY}} \ | |
--env https_proxy=${{secrets.PROXY}} \ | |
$CONTAINER_ID python3 -m pip install -r requirements/test.txt | |
docker exec --env http_proxy=${{secrets.PROXY}} \ | |
--env https_proxy=${{secrets.PROXY}} $CONTAINER_ID \ | |
python3 -m pip install /root/workspace/offline_pkg/py38/lmdeploy-latest-cp38-cp38-manylinux2014_x86_64.whl | |
docker exec $CONTAINER_ID lmdeploy check_env | |
- name: Convert to turbomind model | |
run: | | |
docker exec $CONTAINER_ID \ | |
lmdeploy convert \ | |
internlm-chat-20b \ | |
/root/workspace/hf_model \ | |
--tp 2 \ | |
--trust-remote-code \ | |
--dst-path /root/workspace/workdir/${TB_MODEL} | |
- name: Start triton server service | |
run: | | |
docker exec --detach $CONTAINER_ID bash -c \ | |
"tritonserver \ | |
--model-repository=/root/workspace/workdir/${TB_MODEL}/model_repository \ | |
--allow-http=0 \ | |
--allow-grpc=1 \ | |
--grpc-port=${GRPC_PORT} \ | |
--log-verbose=0 \ | |
--allow-metrics=1 > run.log 2>&1 ; touch finish.txt" | |
# wait for triton server to fully start up | |
sleep 180s | |
# print triton server log file | |
cat run.log | |
python3 -c 'import os; assert not os.path.exists("finish.txt"), "Failed to start tritonserver"' | |
- name: Test triton server | |
run: | | |
docker exec \ | |
--env no_proxy="localhost,127.0.0.1" \ | |
--env NO_PROXY="localhost,127.0.0.1" \ | |
$CONTAINER_ID python3 .github/scripts/test_triton_server.py --port ${GRPC_PORT} | |
# print triton server log file | |
cat run.log | |
- name: Clear workfile | |
if: always() | |
run: | | |
docker exec --workdir /__w/lmdeploy $CONTAINER_ID rm -rf /root/workspace/workdir/${TB_MODEL} | |
docker exec --workdir /__w/lmdeploy $CONTAINER_ID chmod -R 777 lmdeploy | |
docker stop $CONTAINER_ID | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
test_restful: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.restful_regression)}} | |
runs-on: [self-hosted, linux-a100] | |
needs: test_tools | |
strategy: | |
fail-fast: false | |
matrix: | |
backend: ['turbomind', 'pytorch'] | |
timeout-minutes: 300 | |
env: | |
REPORT_DIR: /nvme/qa_test_models/test-reports | |
container: | |
image: nvcr.io/nvidia/tritonserver:22.12-py3 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v3 | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Copy repository - offline | |
if: ${{inputs.offline_mode}} | |
run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. . | |
- name: Download Artifacts | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }}-py38 | |
- name: Install pytorch | |
run: | | |
python3 -m pip cache dir | |
python3 -m pip install torch==2.2.2 torchvision==0.17.2 --index-url https://download.pytorch.org/whl/cu118 | |
- name: Install lmdeploy - dependency | |
run: | | |
python3 -m pip install ${{inputs.dependency_pkgs || 'packaging transformers_stream_generator transformers datasets matplotlib openai attrdict timm modelscope jmespath'}} | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.5.7+cu118torch2.2cxx11abiFALSE-cp38-cp38-linux_x86_64.whl | |
- name: Install lmdeploy | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: | | |
python3 -m pip install lmdeploy-*.whl | |
python3 -m pip install -r requirements/test.txt | |
- name: Install lmdeploy - offline | |
if: ${{inputs.offline_mode}} | |
run: | | |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py38/lmdeploy-*.whl | |
python3 -m pip install -r requirements/test.txt | |
- name: Check env | |
run: | | |
python3 -m pip list | |
lmdeploy check_env | |
rm -rf allure-results | |
- name: Start restful api turbomind | |
if: matrix.backend == 'turbomind' | |
run: | | |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2-chat-20b --tp 2 > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 120s | |
- name: Start restful api pytorch | |
if: matrix.backend == 'pytorch' | |
run: | | |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2-chat-20b --tp 2 --backend pytorch > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 180s | |
- name: Test lmdeploy - restful api | |
timeout-minutes: 75 | |
run: | | |
pytest autotest/interface/restful/test_restful_chat_func.py -n 20 --alluredir=allure-results | |
- name: Kill api server | |
if: always() | |
run: | | |
kill -15 "$restful_pid" | |
- name: Start restful api turbomind - base | |
if: matrix.backend == 'turbomind' | |
run: | | |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2-20b --tp 2 > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 120s | |
- name: Start restful api pytorch - base | |
if: matrix.backend == 'pytorch' | |
run: | | |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2-20b --tp 2 --backend pytorch > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 180s | |
- name: Test lmdeploy - restful api - base | |
timeout-minutes: 40 | |
run: | | |
pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 --alluredir=allure-results | |
- name: Kill api server | |
if: always() | |
run: | | |
kill -15 "$restful_pid" | |
- name: Generate reports | |
if: always() | |
run: | | |
export date_today="$(date +'%Y%m%d-%H%M%S')" | |
export report_dir="$REPORT_DIR/$date_today" | |
echo "Save report to $report_dir" | |
mv allure-results $report_dir | |
chmod -R 777 $report_dir | |
- name: Clear workfile | |
if: always() | |
run: | | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
test_pipeline: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.pipeline_regression)}} | |
runs-on: [self-hosted, linux-a100] | |
needs: test_tools | |
timeout-minutes: 300 | |
env: | |
REPORT_DIR: /nvme/qa_test_models/test-reports | |
container: | |
image: nvcr.io/nvidia/tritonserver:22.12-py3 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v3 | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Copy repository - offline | |
if: ${{inputs.offline_mode}} | |
run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. . | |
- name: Download Artifacts | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }}-py38 | |
- name: Install pytorch | |
run: | | |
python3 -m pip cache dir | |
python3 -m pip install torch==2.2.2 torchvision==0.17.2 --index-url https://download.pytorch.org/whl/cu118 | |
- name: Install lmdeploy - dependency | |
run: | | |
python3 -m pip install ${{inputs.dependency_pkgs || 'packaging transformers_stream_generator transformers datasets matplotlib openai attrdict timm modelscope jmespath'}} | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.5.7+cu118torch2.2cxx11abiFALSE-cp38-cp38-linux_x86_64.whl | |
- name: Install lmdeploy | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: | | |
python3 -m pip install lmdeploy-*.whl | |
python3 -m pip install -r requirements/test.txt | |
- name: Install lmdeploy - offline | |
if: ${{inputs.offline_mode}} | |
run: | | |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py38/lmdeploy-*.whl | |
python3 -m pip install -r requirements/test.txt | |
- name: Check env | |
run: | | |
python3 -m pip list | |
lmdeploy check_env | |
rm -rf allure-results | |
- name: Test lmdeploy - interface pipeline case | |
run: | | |
pytest autotest/interface/pipeline/test_pipeline_func.py -m 'not pr_test' -n 4 --alluredir=allure-results || true | |
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results || true | |
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results | |
- name: Generate reports | |
if: always() | |
run: | | |
export date_today="$(date +'%Y%m%d-%H%M%S')" | |
export report_dir="$REPORT_DIR/$date_today" | |
echo "Save report to $report_dir" | |
mv allure-results $report_dir | |
chmod -R 777 $report_dir | |
- name: Clear workfile | |
if: always() | |
run: | | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
notify_to_feishu: | |
if: always() && !cancelled() && (github.ref_name == 'develop' || github.ref_name == 'main') | |
needs: [test_tools, test_triton, test_restful] | |
timeout-minutes: 5 | |
runs-on: [self-hosted, linux-a100] | |
steps: | |
- name: fail notify | |
if: contains(needs.*.result, 'failure') | |
run: | | |
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test failed!!!","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.FEISHU_USER_ID }}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }} | |
- name: success notify | |
if: needs.test_tools.result=='success' && needs.test_triton.result=='success' && needs.test_restful.result=='success' | |
run: | | |
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test success","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }} |