Skip to content

Commit

Permalink
Add quantization parameter for lmi_dist rolling batch backend for HF (
Browse files Browse the repository at this point in the history
deepjavalibrary#888)

* Set quantization param from properties file

* Format python

* Set quantize if dtype==int8

* Address review comments

* Adding BITSANDBYTES_NOWELCOME flag to fastertransformer

* Add  back
  • Loading branch information
maaquib authored and KexinFeng committed Aug 16, 2023
1 parent ab2cf4f commit 9a0b346
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

import torch

QUANTIZATION_SUPPORT_ALGO = ["bitsandbytes"]


class LmiDistRollingBatch(RollingBatch):

Expand All @@ -46,11 +48,22 @@ def __init__(self, model_id_or_path, device, properties, **kwargs):
def _init_model(self, kwargs, model_id_or_path):
self.config = AutoConfig.from_pretrained(model_id_or_path, **kwargs)
sharded = int(self.properties.get("tensor_parallel_degree", "-1")) > 1
quantize = self.properties.get("quantize", None)
dtype = self.properties.get("dtype", None)
if quantize is not None and dtype is not None:
raise ValueError(
f"Can't set both dtype: {dtype} and quantize: {quantize}")
if quantize is not None and quantize not in QUANTIZATION_SUPPORT_ALGO:
raise ValueError(
f"Invalid value for quantize: {quantize}. Valid values are: {QUANTIZATION_SUPPORT_ALGO}"
)
if quantize is None and dtype == "int8":
quantize = "bitsandbytes"
self.model = get_model(
model_id_or_path,
revision=None,
sharded=sharded,
quantize=None,
quantize=quantize,
trust_remote_code=kwargs.get("trust_remote_code"))
self.batch_cls = self.model.batch_type

Expand Down
1 change: 1 addition & 0 deletions serving/docker/deepspeed.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ ENV DJL_CACHE_DIR=/tmp/.djl.ai
ENV HUGGINGFACE_HUB_CACHE=/tmp
ENV TRANSFORMERS_CACHE=/tmp
ENV PYTORCH_KERNEL_CACHE_PATH=/tmp
ENV BITSANDBYTES_NOWELCOME=1

ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
CMD ["serve"]
Expand Down
1 change: 1 addition & 0 deletions serving/docker/fastertransformer.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ ENV DJL_CACHE_DIR=/tmp/.djl.ai
ENV HUGGINGFACE_HUB_CACHE=/tmp
ENV TRANSFORMERS_CACHE=/tmp
ENV PYTORCH_KERNEL_CACHE_PATH=/tmp
ENV BITSANDBYTES_NOWELCOME=1

ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
CMD ["serve"]
Expand Down

0 comments on commit 9a0b346

Please sign in to comment.