Skip to content

Commit

Permalink
Fix vLLM template (#734)
Browse files Browse the repository at this point in the history
* Update vllm_mixtral.py

* Fix template

* Fix template
  • Loading branch information
bofenghuang committed May 5, 2024
1 parent e0b46de commit a238c97
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion 06_gpu_and_ml/llm-serving/vllm_gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class Model:
@modal.enter()
def load(self):
self.template = (
"start_of_turn>user\n{user}<end_of_turn>\n<start_of_turn>model"
"<start_of_turn>user\n{user}<end_of_turn>\n<start_of_turn>model\n"
)

# Load the model. Tip: Some models, like MPT, may require `trust_remote_code=true`.
Expand Down
4 changes: 2 additions & 2 deletions 06_gpu_and_ml/llm-serving/vllm_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,11 @@ class Model:
def load_model(self):
# Tip: models that are not fully implemented by Hugging Face may require `trust_remote_code=true`.
self.llm = vllm.LLM(MODEL_DIR, tensor_parallel_size=GPU_CONFIG.count)
self.template = """<s>[INST] <<SYS>>
self.template = """[INST] <<SYS>>
{system}
<</SYS>>
{user} [/INST] """
{user} [/INST]"""

@modal.method()
def generate(self, user_questions):
Expand Down
2 changes: 1 addition & 1 deletion 06_gpu_and_ml/llm-serving/vllm_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def start_engine(self):
disable_log_stats=True, # disable logging so we can stream tokens
disable_log_requests=True,
)
self.template = "<s> [INST] {user} [/INST] "
self.template = "[INST] {user} [/INST]"

# this can take some time!
self.engine = AsyncLLMEngine.from_engine_args(engine_args)
Expand Down

0 comments on commit a238c97

Please sign in to comment.