Skip to content

Commit

Permalink
Package 'bin/llama_gemm' to wheel (#320)
Browse files Browse the repository at this point in the history
* pack llama_gemm

* update CMakeLists.txt

* remove candidate

* update MANIFEST.in
  • Loading branch information
irexyc authored Sep 1, 2023
1 parent eaccbc0 commit 22e8b2c
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 22 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ htmlcov/
*build*/
!builder/
lmdeploy/lib/
lmdeploy/bin/
dist/
examples/cpp/llama/*.csv
*.npy
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ include lmdeploy/lib/*.so
include lmdeploy/lib/*.so*
include lmdeploy/lib/*.dll
include lmdeploy/lib/*.pyd
include lmdeploy/bin/*
include lmdeploy/serve/turbomind/service_docker_up.sh
recursive-include lmdeploy/serve/turbomind/triton_models *
22 changes: 0 additions & 22 deletions examples/cpp/llama/generate_gemm_config.py

This file was deleted.

33 changes: 33 additions & 0 deletions lmdeploy/turbomind/generate_gemm_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) OpenMMLab. All rights reserved.

import subprocess

import fire


def get_llama_gemm():
import os.path as osp

import lmdeploy
lmdeploy_dir = osp.split(lmdeploy.__file__)[0]
bin_path = osp.join(lmdeploy_dir, 'bin', 'llama_gemm')
assert osp.exists(bin_path), f'{bin_path} not exists'
return bin_path


def main(head_num: int = 32,
size_per_head: int = 128,
vocab_size: int = 32000,
inter_size: int = 11008,
tensor_para_size: int = 1,
max_batch_size: int = 64):
for bsz in range(1, max_batch_size + 1):
subprocess.call(
f'{get_llama_gemm()} {bsz} 1 1 {head_num} {size_per_head}'
f' {inter_size} {vocab_size} 1 {tensor_para_size}'
f' {0 if bsz == 1 else 1}',
shell=True)


if __name__ == '__main__':
fire.Fire(main)
4 changes: 4 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def gen_packages_items():


if __name__ == '__main__':
lmdeploy_package_data = ['lmdeploy/bin/llama_gemm']
setup(name='lmdeploy',
version=get_version(),
description='A toolset for compressing, deploying and serving LLM',
Expand All @@ -128,6 +129,9 @@ def gen_packages_items():
author='OpenMMLab',
author_email='[email protected]',
packages=find_packages(exclude=()),
package_data={
'lmdeploy': lmdeploy_package_data,
},
include_package_data=True,
install_requires=parse_requirements('requirements.txt'),
has_ext_modules=check_ext_modules,
Expand Down
1 change: 1 addition & 0 deletions src/turbomind/models/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ endif()

add_executable(llama_gemm llama_gemm.cc)
target_link_libraries(llama_gemm PUBLIC CUDA::cudart gpt_gemm_func memory_utils cuda_utils logger)
install(TARGETS llama_gemm DESTINATION ${CMAKE_SOURCE_DIR}/lmdeploy/bin)

0 comments on commit 22e8b2c

Please sign in to comment.