Skip to content

Commit

Permalink
Set quantization param from properties file
Browse files Browse the repository at this point in the history
  • Loading branch information
maaquib committed Jun 30, 2023
1 parent 069dfd0 commit 9c502aa
Showing 1 changed file with 7 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
Request,
Generation
)

import logging
import torch

ARCHITECTURE_2_BATCH_CLS = {
Expand Down Expand Up @@ -67,10 +67,15 @@ def _init_model(self, kwargs, model_id_or_path):
**kwargs)
self.batch_cls = get_batch_cls_from_architecture(self.config.architectures[0])
sharded = int(self.properties.get("tensor_parallel_degree", "-1")) > 1
quantize = self.properties.get("quantize", None)
if quantize and quantize != "bitsanadbytes":
logging.info(f"Invalid value for quantize: {quantize}. Only `bitsandbytes` quantization is supported. "
f"Setting quantization to None")
quantize = None
self.model = get_model(model_id_or_path,
revision=None,
sharded=sharded,
quantize=None,
quantize=quantize,
trust_remote_code=kwargs.get("trust_remote_code"))

def inference(self, input_data, parameters):
Expand Down

0 comments on commit 9c502aa

Please sign in to comment.