|
| 1 | +import logging |
1 | 2 | import os
|
2 | 3 | import pickle
|
3 |
| -import logging |
| 4 | +from typing import List, Optional, Any |
4 | 5 |
|
5 | 6 | import grpc
|
6 |
| - |
7 | 7 | from lightgbm import Booster
|
8 | 8 |
|
| 9 | +from .exceptions import PythieServingException |
| 10 | +from .tensorflow_proto.tensorflow_serving.apis import ( |
| 11 | + predict_pb2, |
| 12 | + prediction_service_pb2_grpc, |
| 13 | +) |
9 | 14 | from .tensorflow_proto.tensorflow_serving.config import model_server_config_pb2
|
10 |
| -from .tensorflow_proto.tensorflow_serving.apis import predict_pb2, prediction_service_pb2_grpc |
11 | 15 | from .utils import make_ndarray_from_tensor
|
12 |
| -from .exceptions import PythieServingException |
13 | 16 |
|
14 | 17 |
|
15 |
| -class LightGBMPredictionServiceServicer(prediction_service_pb2_grpc.PredictionServiceServicer): |
16 |
| - |
17 |
| - def __init__(self, *, logger: logging.Logger, model_server_config: model_server_config_pb2.ModelServerConfig): |
| 18 | +class LightGBMPredictionServiceServicer( |
| 19 | + prediction_service_pb2_grpc.PredictionServiceServicer |
| 20 | +): |
| 21 | + def __init__( |
| 22 | + self, |
| 23 | + *, |
| 24 | + logger: logging.Logger, |
| 25 | + model_server_config: model_server_config_pb2.ModelServerConfig, |
| 26 | + ): |
18 | 27 | self.logger = logger
|
19 | 28 | self.model_map = {}
|
20 | 29 | for model_config in model_server_config.model_config_list.config:
|
21 |
| - with open(os.path.join(model_config.base_path, model_config.name) + ".pickled", 'rb') as opened_model: |
| 30 | + with open( |
| 31 | + os.path.join(model_config.base_path, model_config.name) + ".pickled", |
| 32 | + "rb", |
| 33 | + ) as opened_model: |
22 | 34 | model = pickle.load(opened_model)
|
| 35 | + pandas_categorical = model.pandas_categorical |
23 | 36 |
|
24 | 37 | if isinstance(model, Booster):
|
25 | 38 | feature_names = model.feature_name()
|
26 | 39 | best_iteration = model.best_iteration
|
27 | 40 | else:
|
28 | 41 | feature_names = model.feature_names
|
29 |
| - best_iteration = getattr(model, 'best_iteration', None) |
| 42 | + best_iteration = getattr(model, "best_iteration", None) |
30 | 43 |
|
31 |
| - self.model_map[model_config.name] = {'model': model, 'feature_names': feature_names, |
32 |
| - 'best_iteration': best_iteration} |
| 44 | + self.model_map[model_config.name] = { |
| 45 | + "model": model, |
| 46 | + "feature_names": feature_names, |
| 47 | + "best_iteration": best_iteration, |
| 48 | + "pandas_categorical": pandas_categorical, |
| 49 | + } |
33 | 50 |
|
34 | 51 | def Predict(self, request: predict_pb2.PredictRequest, context: grpc.RpcContext):
|
35 | 52 | model_name = request.model_spec.name
|
36 | 53 | if model_name not in self.model_map:
|
37 |
| - raise PythieServingException(f'Unknown model: {model_name}. This pythie-serving instance can only ' |
38 |
| - f'serve one of the following: {",".join(self.model_map.keys())}') |
| 54 | + raise PythieServingException( |
| 55 | + f"Unknown model: {model_name}. This pythie-serving instance can only " |
| 56 | + f'serve one of the following: {",".join(self.model_map.keys())}' |
| 57 | + ) |
39 | 58 |
|
40 | 59 | model_dict = self.model_map[model_name]
|
41 |
| - |
42 |
| - features_names = model_dict['feature_names'] |
43 |
| - samples = None |
| 60 | + model = model_dict["model"] |
| 61 | + pandas_categorical = model_dict["pandas_categorical"] |
| 62 | + features_names = model_dict["feature_names"] |
| 63 | + pd_categorical_features: List[str] = [] |
| 64 | + samples: Optional[List[List[Any]]] = None |
44 | 65 | for feature_name in features_names:
|
45 | 66 | if feature_name not in request.inputs:
|
46 |
| - raise PythieServingException(f'{feature_name} not set in the predict request') |
| 67 | + raise PythieServingException( |
| 68 | + f"{feature_name} not set in the predict request" |
| 69 | + ) |
47 | 70 |
|
48 | 71 | nd_array = make_ndarray_from_tensor(request.inputs[feature_name])
|
49 |
| - if len(nd_array.shape) != 2 or nd_array.shape[1] != 1: |
50 |
| - raise PythieServingException('All input vectors should be 1D tensor') |
| 72 | + # get features categories from model if feature dtype is object ("string") |
| 73 | + feature_categories = {} |
| 74 | + if nd_array.dtype == object: |
| 75 | + if pandas_categorical is None: |
| 76 | + raise PythieServingException( |
| 77 | + f"{feature_name} feature has type 'object' but " |
| 78 | + f"there is no saved pandas categories from model" |
| 79 | + ) |
| 80 | + # lgbm save categories in the same order categorical features appear in model.feature_name() |
| 81 | + feature_categories = { |
| 82 | + category_name: category_position |
| 83 | + for category_position, category_name in enumerate( |
| 84 | + pandas_categorical[len(pd_categorical_features)] |
| 85 | + ) |
| 86 | + } |
| 87 | + pd_categorical_features.append(feature_name) |
51 | 88 |
|
52 |
| - if samples is None: |
53 |
| - samples = [[] for _ in range(nd_array.shape[0])] |
| 89 | + if len(nd_array.shape) != 2 or nd_array.shape[1] != 1: |
| 90 | + raise PythieServingException("All input vectors should be 1D tensor") |
54 | 91 |
|
55 |
| - for sample_index, value in enumerate(nd_array): |
56 |
| - samples[sample_index].append(value[0]) |
| 92 | + samples = samples or [[] for _ in range(nd_array.shape[0])] |
| 93 | + if len(feature_categories) > 0: # get category position from its value |
| 94 | + for sample_index, value in enumerate(nd_array): |
| 95 | + samples[sample_index].append( |
| 96 | + feature_categories[value[0].decode("utf-8")] |
| 97 | + ) |
| 98 | + else: |
| 99 | + for sample_index, value in enumerate(nd_array): |
| 100 | + samples[sample_index].append(value[0]) |
57 | 101 |
|
58 |
| - model = model_dict['model'] |
59 | 102 | kwargs = {}
|
60 |
| - if model_dict['best_iteration']: |
61 |
| - kwargs['best_iteration'] = model_dict['best_iteration'] |
| 103 | + if model_dict["best_iteration"]: |
| 104 | + kwargs["best_iteration"] = model_dict["best_iteration"] |
62 | 105 | return model.predict(samples, **kwargs)
|
0 commit comments