Skip to content

Commit

Permalink
Merge branch 'main' into medusa
Browse files Browse the repository at this point in the history
Conflicts:
	lmdeploy/pytorch/engine/model_agent.py
	lmdeploy/pytorch/kernels/cuda/flashattention.py
  • Loading branch information
AllentDan committed Dec 5, 2024
2 parents dcc6e85 + 9bfdeae commit cc5d110
Show file tree
Hide file tree
Showing 189 changed files with 7,054 additions and 8,303 deletions.
11 changes: 11 additions & 0 deletions .github/scripts/eval_base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,17 @@
models as lmdeploy_qwen1_5_7b # noqa: F401, E501
from opencompass.configs.models.qwen.lmdeploy_qwen2_7b import \
models as lmdeploy_qwen2_7b # noqa: F401, E501
# Summary Groups
from opencompass.configs.summarizers.groups.cmmlu import \
cmmlu_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.GaokaoBench import \
GaokaoBench_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.mathbench_v1_2024 import \
mathbench_2024_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.mmlu import \
mmlu_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.mmlu_pro import \
mmlu_pro_summary_groups # noqa: F401, E501

# read models
race_datasets = [race_datasets[1]]
Expand Down
21 changes: 21 additions & 0 deletions .github/scripts/eval_chat_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,27 @@
models as lmdeploy_qwen2_7b_instruct # noqa: F401, E501
from opencompass.configs.models.qwen.lmdeploy_qwen_7b_chat import \
models as lmdeploy_qwen_7b_chat # noqa: F401, E501
# Summary Groups
from opencompass.configs.summarizers.groups.bbh import \
bbh_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.cmmlu import \
cmmlu_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.ds1000 import \
ds1000_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.GaokaoBench import \
GaokaoBench_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.humanevalx import \
humanevalx_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.mathbench_v1_2024 import \
mathbench_2024_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.mmlu import \
mmlu_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.mmlu_pro import \
mmlu_pro_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.scicode import \
scicode_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.teval import \
teval_summary_groups # noqa: F401, E501

llama2_meta_template = dict(round=[
dict(role='HUMAN', begin='[INST] ', end=' [/INST]'),
Expand Down
47 changes: 29 additions & 18 deletions .github/workflows/daily_ete_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,15 @@ on:
required: true
description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"'
type: string
default: '["turbomind", "pytorch", "turbomind_vl"]'
default: "['turbomind', 'pytorch']"
model:
required: true
description: 'Set testcase module filter: chat, restful, pipeline, quantization. Default contains all models'
description: 'Set testcase module filter: llm, vllm. Default contains all models'
type: string
default: "['llm','mllm']"
function:
required: true
description: 'Set testcase function filter: chat, restful, pipeline. Default contains all functions'
type: string
default: '["pipeline", "restful", "chat"]'
offline_mode:
Expand Down Expand Up @@ -206,14 +211,20 @@ jobs:
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch", "turbomind_vl"]')}}
model: ${{ fromJSON(inputs.model || '["pipeline", "restful", "chat"]')}}
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
model: ${{ fromJSON(inputs.model || '["llm", "mllm"]')}}
function: ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}}
exclude:
- backend: turbomind_vl
model: chat
- backend: turbomind
model: mllm
function: chat
- backend: pytorch
model: mllm
function: chat
include:
- backend: turbomind
model: local_case
model: llm
function: local_case
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /root/modelscope_hub
Expand Down Expand Up @@ -261,46 +272,46 @@ jobs:
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - chat workspace
continue-on-error: true
if: matrix.backend == 'turbomind' && matrix.model == 'chat'
if: matrix.backend == 'turbomind' && matrix.model == 'llm' && matrix.function == 'chat'
run: |
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - chat
continue-on-error: true
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'chat'
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'llm' && matrix.function == 'chat'
run: |
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - pipeline
continue-on-error: true
if: matrix.model == 'pipeline'
if: matrix.function == 'pipeline'
run: |
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - restful
continue-on-error: true
if: matrix.model == 'restful'
if: matrix.function == 'restful'
run: |
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - restful workspace
continue-on-error: true
if: matrix.backend == 'turbomind' && matrix.model == 'restful'
if: matrix.backend == 'turbomind' && matrix.model == 'llm' && matrix.function == 'restful'
run: |
pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - local testcase
if: matrix.backend == 'turbomind' && matrix.model == 'local_case'
if: matrix.backend == 'turbomind' && matrix.model == 'llm' && matrix.function == 'local_case'
run: |
pytest /local_case/issue_regression --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}}|| true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
Expand All @@ -321,7 +332,7 @@ jobs:
strategy:
fail-fast: false
matrix:
backend: ['turbomind', 'pytorch']
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
timeout-minutes: 60
container:
image: openmmlab/lmdeploy:latest-cu11
Expand Down
47 changes: 29 additions & 18 deletions .github/workflows/daily_ete_test_v100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,15 @@ on:
required: true
description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"'
type: string
default: '["turbomind", "pytorch", "turbomind_vl"]'
default: "['turbomind', 'pytorch']"
model:
required: true
description: 'Set testcase module filter: chat, restful, pipeline, quantization. Default contains all models'
description: 'Set testcase module filter: llm, vllm. Default contains all models'
type: string
default: "['llm','mllm']"
function:
required: true
description: 'Set testcase function filter: chat, restful, pipeline. Default contains all functions'
type: string
default: '["pipeline", "restful", "chat"]'
offline_mode:
Expand Down Expand Up @@ -201,14 +206,20 @@ jobs:
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch", "turbomind_vl"]')}}
model: ${{ fromJSON(inputs.model || '["pipeline", "restful", "chat"]')}}
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
model: ${{ fromJSON(inputs.model || '["llm", "mllm"]')}}
function: ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}}
exclude:
- backend: turbomind_vl
model: chat
- backend: turbomind
model: mllm
function: chat
- backend: pytorch
model: mllm
function: chat
include:
- backend: turbomind
model: local_case
model: llm
function: local_case
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /root/modelscope_hub
Expand Down Expand Up @@ -255,46 +266,46 @@ jobs:
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - chat workspace
continue-on-error: true
if: matrix.backend == 'turbomind' && matrix.model == 'chat'
if: matrix.backend == 'turbomind' && matrix.model == 'llm' && matrix.function == 'chat'
run: |
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - chat
continue-on-error: true
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'chat'
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'llm' && matrix.function == 'chat'
run: |
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - pipeline
continue-on-error: true
if: matrix.model == 'pipeline'
if: matrix.function == 'pipeline'
run: |
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - restful
continue-on-error: true
if: matrix.model == 'restful'
if: matrix.function == 'restful'
run: |
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}_${{matrix.model}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - restful workspace
continue-on-error: true
if: matrix.backend == 'turbomind' && matrix.model == 'restful'
if: matrix.backend == 'turbomind' && matrix.model == 'llm' && matrix.function == 'restful'
run: |
pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
- name: Test lmdeploy - local testcase
if: matrix.backend == 'turbomind' && matrix.model == 'local_case'
if: matrix.backend == 'turbomind' && matrix.model == 'llm' && matrix.function == 'local_case'
run: |
pytest /local_case/issue_regression --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}}|| true
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
Expand All @@ -315,7 +326,7 @@ jobs:
strategy:
fail-fast: false
matrix:
backend: ['turbomind', 'pytorch']
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
timeout-minutes: 120
container:
image: openmmlab/lmdeploy:latest-cu12
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ link_directories(

# add_subdirectory(3rdparty)
add_subdirectory(src)
add_subdirectory(examples)
# add_subdirectory(examples)

if(BUILD_TEST)
add_subdirectory(tests/csrc)
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ For detailed inference benchmarks in more devices and more settings, please refe
<li>InternVL-Chat (v1.1-v1.5)</li>
<li>InternVL2 (1B-76B)</li>
<li>Mono-InternVL (2B)</li>
<li>ChemVLM (8B-26B)</li>
<li>MiniGeminiLlama (7B)</li>
<li>CogVLM-Chat (17B)</li>
<li>CogVLM2-Chat (19B)</li>
Expand All @@ -166,6 +167,7 @@ For detailed inference benchmarks in more devices and more settings, please refe
<li>Phi-3.5-vision (4.2B)</li>
<li>GLM-4V (9B)</li>
<li>Llama3.2-vision (11B, 90B)</li>
<li>Molmo (7B-D,72B)</li>
</ul>
</td>
</tr>
Expand Down
3 changes: 3 additions & 0 deletions README_ja.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ LMDeploy TurboMindエンジンは卓越した推論能力を持ち、さまざ
<li>DeepSeek-VL (7B)</li>
<li>InternVL-Chat (v1.1-v1.5)</li>
<li>InternVL2 (1B-76B)</li>
<li>Mono-InternVL (2B)</li>
<li>ChemVLM (8B-26B)</li>
<li>MiniGeminiLlama (7B)</li>
<li>CogVLM-Chat (17B)</li>
<li>CogVLM2-Chat (19B)</li>
Expand All @@ -161,6 +163,7 @@ LMDeploy TurboMindエンジンは卓越した推論能力を持ち、さまざ
<li>Phi-3.5-vision (4.2B)</li>
<li>GLM-4V (9B)</li>
<li>Llama3.2-vision (11B, 90B)</li>
<li>Molmo (7B-D,72B)</li>
</ul>
</td>
</tr>
Expand Down
2 changes: 2 additions & 0 deletions README_zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力,在各种规模的模型
<li>InternVL-Chat (v1.1-v1.5)</li>
<li>InternVL2 (1B-76B)</li>
<li>Mono-InternVL (2B)</li>
<li>ChemVLM (8B-26B)</li>
<li>MiniGeminiLlama (7B)</li>
<li>CogVLM-Chat (17B)</li>
<li>CogVLM2-Chat (19B)</li>
Expand All @@ -167,6 +168,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力,在各种规模的模型
<li>Phi-3.5-vision (4.2B)</li>
<li>GLM-4V (9B)</li>
<li>Llama3.2-vision (11B, 90B)</li>
<li>Molmo (7B-D,72B)</li>
</ul>
</td>
</tr>
Expand Down
Loading

0 comments on commit cc5d110

Please sign in to comment.