|
1 |
| -import pickle |
2 | 1 | import logging
|
| 2 | +import pickle |
| 3 | +from typing import List, Optional, Any |
3 | 4 |
|
4 | 5 | import grpc
|
5 |
| - |
6 | 6 | from lightgbm import Booster
|
7 | 7 |
|
| 8 | +from .exceptions import PythieServingException |
| 9 | +from .tensorflow_proto.tensorflow_serving.apis import ( |
| 10 | + predict_pb2, |
| 11 | + prediction_service_pb2_grpc, |
| 12 | +) |
8 | 13 | from .tensorflow_proto.tensorflow_serving.config import model_server_config_pb2
|
9 |
| -from .tensorflow_proto.tensorflow_serving.apis import predict_pb2, prediction_service_pb2_grpc |
10 | 14 | from .utils import make_ndarray_from_tensor
|
11 |
| -from .exceptions import PythieServingException |
12 | 15 |
|
13 | 16 |
|
14 |
| -class LightGBMPredictionServiceServicer(prediction_service_pb2_grpc.PredictionServiceServicer): |
15 |
| - |
16 |
| - def __init__(self, *, logger: logging.Logger, model_server_config: model_server_config_pb2.ModelServerConfig): |
| 17 | +class LightGBMPredictionServiceServicer( |
| 18 | + prediction_service_pb2_grpc.PredictionServiceServicer |
| 19 | +): |
| 20 | + def __init__( |
| 21 | + self, |
| 22 | + *, |
| 23 | + logger: logging.Logger, |
| 24 | + model_server_config: model_server_config_pb2.ModelServerConfig, |
| 25 | + ): |
17 | 26 | self.logger = logger
|
18 | 27 | self.model_map = {}
|
19 | 28 | for model_config in model_server_config.model_config_list.config:
|
20 |
| - with open(model_config.base_path, 'rb') as opened_model: |
| 29 | + with open(model_config.base_path, "rb") as opened_model: |
21 | 30 | model = pickle.load(opened_model)
|
| 31 | + pandas_categorical = model.pandas_categorical |
22 | 32 |
|
23 | 33 | if isinstance(model, Booster):
|
24 | 34 | feature_names = model.feature_name()
|
25 | 35 | best_iteration = model.best_iteration
|
26 | 36 | else:
|
27 | 37 | feature_names = model.feature_names
|
28 |
| - best_iteration = getattr(model, 'best_iteration', None) |
| 38 | + best_iteration = getattr(model, "best_iteration", None) |
29 | 39 |
|
30 |
| - self.model_map[model_config.name] = {'model': model, 'feature_names': feature_names, |
31 |
| - 'best_iteration': best_iteration} |
| 40 | + self.model_map[model_config.name] = { |
| 41 | + "model": model, |
| 42 | + "feature_names": feature_names, |
| 43 | + "best_iteration": best_iteration, |
| 44 | + "pandas_categorical": pandas_categorical, |
| 45 | + } |
32 | 46 |
|
33 | 47 | def Predict(self, request: predict_pb2.PredictRequest, context: grpc.RpcContext):
|
34 | 48 | model_name = request.model_spec.name
|
35 | 49 | if model_name not in self.model_map:
|
36 |
| - raise PythieServingException(f'Unknown model: {model_name}. This pythie-serving instance can only ' |
37 |
| - f'serve one of the following: {",".join(self.model_map.keys())}') |
| 50 | + raise PythieServingException( |
| 51 | + f"Unknown model: {model_name}. This pythie-serving instance can only " |
| 52 | + f'serve one of the following: {",".join(self.model_map.keys())}' |
| 53 | + ) |
38 | 54 |
|
39 | 55 | model_dict = self.model_map[model_name]
|
40 |
| - |
41 |
| - features_names = model_dict['feature_names'] |
42 |
| - samples = None |
| 56 | + model = model_dict["model"] |
| 57 | + pandas_categorical = model_dict["pandas_categorical"] |
| 58 | + features_names = model_dict["feature_names"] |
| 59 | + pd_categorical_features: List[str] = [] |
| 60 | + samples: Optional[List[List[Any]]] = None |
43 | 61 | for feature_name in features_names:
|
44 | 62 | if feature_name not in request.inputs:
|
45 |
| - raise PythieServingException(f'{feature_name} not set in the predict request') |
| 63 | + raise PythieServingException( |
| 64 | + f"{feature_name} not set in the predict request" |
| 65 | + ) |
46 | 66 |
|
47 | 67 | nd_array = make_ndarray_from_tensor(request.inputs[feature_name])
|
48 |
| - if len(nd_array.shape) != 2 or nd_array.shape[1] != 1: |
49 |
| - raise PythieServingException('All input vectors should be 1D tensor') |
| 68 | + # get features categories from model if feature dtype is object ("string") |
| 69 | + feature_categories = {} |
| 70 | + if nd_array.dtype == object: |
| 71 | + if pandas_categorical is None: |
| 72 | + raise PythieServingException( |
| 73 | + f"{feature_name} feature has type 'object' but " |
| 74 | + f"there is no saved pandas categories from model" |
| 75 | + ) |
| 76 | + # lgbm save categories in the same order categorical features appear in model.feature_name() |
| 77 | + feature_categories = { |
| 78 | + category_name: category_position |
| 79 | + for category_position, category_name in enumerate( |
| 80 | + pandas_categorical[len(pd_categorical_features)] |
| 81 | + ) |
| 82 | + } |
| 83 | + pd_categorical_features.append(feature_name) |
50 | 84 |
|
51 |
| - if samples is None: |
52 |
| - samples = [[] for _ in range(nd_array.shape[0])] |
| 85 | + if len(nd_array.shape) != 2 or nd_array.shape[1] != 1: |
| 86 | + raise PythieServingException("All input vectors should be 1D tensor") |
53 | 87 |
|
54 |
| - for sample_index, value in enumerate(nd_array): |
55 |
| - samples[sample_index].append(value[0]) |
| 88 | + samples = samples or [[] for _ in range(nd_array.shape[0])] |
| 89 | + if len(feature_categories) > 0: # get category position from its value |
| 90 | + for sample_index, value in enumerate(nd_array): |
| 91 | + samples[sample_index].append( |
| 92 | + feature_categories[value[0].decode("utf-8")] |
| 93 | + ) |
| 94 | + else: |
| 95 | + for sample_index, value in enumerate(nd_array): |
| 96 | + samples[sample_index].append(value[0]) |
56 | 97 |
|
57 |
| - model = model_dict['model'] |
58 | 98 | kwargs = {}
|
59 |
| - if model_dict['best_iteration']: |
60 |
| - kwargs['best_iteration'] = model_dict['best_iteration'] |
| 99 | + if model_dict["best_iteration"]: |
| 100 | + kwargs["best_iteration"] = model_dict["best_iteration"] |
61 | 101 | return model.predict(samples, **kwargs)
|
0 commit comments