Skip to content

Commit

Permalink
Feat(serving): add spark mllib model serving (#163)
Browse files Browse the repository at this point in the history
* Feat(serving): add spark mllib model serving

* Fix(scripts): fix different model handle in scripts
  • Loading branch information
FogDong authored Oct 26, 2020
1 parent 8aa37b2 commit 601400e
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 33 deletions.
12 changes: 7 additions & 5 deletions build/serving/mlserver/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
FROM python:3.7-slim
FROM openjdk:slim

COPY --from=python:3.7-slim / /

ENV MODEL_STORE /mnt/models

RUN apt-get update && \
apt-get -y --no-install-recommends install \
libgomp1 unzip
apt-get -y --no-install-recommends install \
libgomp1 unzip

# Use MLServer for serving, see https://github.com/SeldonIO/MLServer
WORKDIR /workspace
ADD https://github.com/SeldonIO/MLServer/archive/master.zip .
RUN unzip master.zip && pip install MLServer-master/[all] && rm -r MLServer-master && rm master.zip
ADD https://github.com/FogDong/MLServer/archive/master.zip .
RUN unzip master.zip && pip install MLServer-master/[all] && pip install MLServer-master/custom/[all] && rm -r MLServer-master && rm master.zip

COPY scripts/serving/wrapper /opt/wrapper
RUN pip install -r /opt/wrapper/requirements.txt && rm /opt/wrapper/requirements.txt
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/modeljob/v1alpha1/modeljob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ const (
FormatSKLearn Format = "SKLearn"
FormatXGBoost Format = "XGBoost"
FormatMLflow Format = "MLflow"
FormatMLlib Format = "MLlib"
)

type ModelJobPhase string
Expand Down
28 changes: 16 additions & 12 deletions pkg/registry/serving/composer.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ func getGPUAmount(resource corev1.ResourceRequirements) int64 {

// getDefaultUserContainerPorts get container ports for default image.
func getDefaultUserContainerPorts(format string) []corev1.ContainerPort {
if format == string(modeljobsv1alpha1.FormatSKLearn) || format == string(modeljobsv1alpha1.FormatXGBoost) {
if isMLServerModel(format) {
ports := []corev1.ContainerPort{
{
Name: "http",
Expand Down Expand Up @@ -361,7 +361,7 @@ func getDefaultProbe(format, servingName string) *corev1.Probe {
port := defaultInferenceHTTPPort
if format == string(modeljobsv1alpha1.FormatPMML) {
path = fmt.Sprintf("/openscoring/model/%v", servingName)
} else if format == string(modeljobsv1alpha1.FormatSKLearn) || format == string(modeljobsv1alpha1.FormatXGBoost) {
} else if isMLServerModel(format) {
path = fmt.Sprintf("/v2/models/%v/ready", servingName)
port = defaultMLServerHTTPPort
}
Expand Down Expand Up @@ -402,21 +402,18 @@ func getModelFormat(pu *seldonv1.PredictiveUnit) string {

// getUserContainerImage get image by different model format.
func getUserContainerImage(format string) string {
switch format {
// Group1 for PMML image
case string(modeljobsv1alpha1.FormatPMML):
if format == string(modeljobsv1alpha1.FormatPMML) {
return viper.GetString(envPMMLServingImage)

}
// Group2 for mlserver image
case string(modeljobsv1alpha1.FormatSKLearn):
fallthrough
case string(modeljobsv1alpha1.FormatXGBoost):
if isMLServerModel(format) {
return viper.GetString(envMLServerImage)

// Group3 for TRT server image
default:
return viper.GetString(envTRTServingImage)
}

// Group3 for default TRT server image
return viper.GetString(envTRTServingImage)

}

// getModelTag gets model tag, eg: harbor.demo.io/release/savedmodel:v1, it will return `v1`.
Expand Down Expand Up @@ -530,3 +527,10 @@ func composeSchedulerName(seldonPodSpec *seldonv1.SeldonPodSpec) {
}
seldonPodSpec.Spec.SchedulerName = schedulerName
}

func isMLServerModel(format string) bool {
if format == string(modeljobsv1alpha1.FormatSKLearn) || format == string(modeljobsv1alpha1.FormatXGBoost) || format == string(modeljobsv1alpha1.FormatMLlib) {
return true
}
return false
}
7 changes: 6 additions & 1 deletion scripts/serving/mlserver/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
#!/bin/bash

python3 /opt/wrapper/preprocessor.py
mlserver start $MODEL_STORE

if [ "$MODEL_FORMAT" = "MLlib" ];then
mlservermllib start $MODEL_STORE
else
mlserver start $MODEL_STORE
fi
54 changes: 41 additions & 13 deletions scripts/serving/wrapper/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
from utils.get_model import check_model
from utils.config_generator import TRTISConfigGenerator
from utils.model_formatter import ModelFormatter
from utils.help_functions import isTritonModel, isMLServerModel

SKLEARN_MODEL = "model.joblib"
XGBOOST_MODEL = "model.xgboost"


class Preprocessor:
"""
Preprocessor formats its directory structure and
Expand Down Expand Up @@ -38,8 +40,8 @@ def get_critical_env(env):

self._trtis_conifig_generator = TRTISConfigGenerator()
self.model_root_path = self._model_store
self.model_path = os.path.join(self.model_root_path, self._serving_name, "1")

self.model_path = os.path.join(
self.model_root_path, self._serving_name, "1")

def _extract_yaml(self):
try:
Expand All @@ -62,12 +64,12 @@ def _generate_config_pbtxt(self, yaml_data):
logger.error('error when generating config.pbtxt: ', e)
sys.exit(1)

def _generate_model_setting(self, format):
def _generate_model_setting(self, format, version):
setting = {}
if format == 'SKLearn':
setting = {
'name': self._serving_name,
'version': 'v0.1.0',
'version': version,
'implementation': 'mlserver.models.SKLearnModel',
'parameters': {
'uri': os.path.join(self.model_path, SKLEARN_MODEL)
Expand All @@ -76,12 +78,29 @@ def _generate_model_setting(self, format):
elif format == 'XGBoost':
setting = {
'name': self._serving_name,
'version': 'v0.1.0',
'version': version,
'implementation': 'mlserver.models.XGBoostModel',
'parameters': {
'uri': os.path.join(self.model_path, XGBOOST_MODEL)
}
}
elif format == 'MLlib':
try:
mllibformat = os.environ["MLLIB_FORMAT"]
except KeyError:
logger.error("MLLIB_FORMAT not defined")
sys.exit(1)

setting = {
'name': self._serving_name,
'version': version,
'implementation': 'mlservermllib.models.MLLibModel',
'parameters': {
'uri': os.path.join(
self.model_root_path, self._serving_name, "1"),
'format': mllibformat
}
}

json_str = json.dumps(setting)
with open(os.path.join(self.model_root_path, "model-settings.json"), 'w') as json_file:
Expand All @@ -101,23 +120,32 @@ def start(self):
ormb_file_path = os.path.join(
self.model_root_path, self._serving_name, "ormbfile.yaml")
if not os.path.exists(ormb_file_path):
logger.error(f'{ormb_file_path} does not exist')
return

# Phase 1: Extract model_format and yaml
yaml_data = self._extract_yaml()
if 'format' in yaml_data.items():
logger.error('model format missing')
return
format = yaml_data["format"]

MODEL_NEED_NOT_CONFIG_PBTXT = {'PMML', 'SKLearn', 'XGBoost'}
# Phase 2: Generate 'config.pbtxt' for triton models
if isTritonModel(format):
self._generate_config_pbtxt(yaml_data)

# Phase 2: Generate 'config.pbtxt' if need
if format not in MODEL_NEED_MODEL_SETTING:
self._generate_config_pbtxt(yaml_data)
# Phase 3: Generate 'model setting' for mlserver models
if isMLServerModel(format):
# set env for mlserver
os.putenv('MODEL_FORMAT', format)

MODEL_NEED_MODEL_SETTING = {'SKLearn', 'XGBoost'}
# get version from ormbfile
if 'version' in yaml_data.items():
version = yaml_data["version"]
else:
version = 'v1.0.0'

# Phase 3: Generate 'model setting' if need
if format in MODEL_NEED_MODEL_SETTING:
self._generate_model_setting(format)
self._generate_model_setting(format, version)

# Phase 4: Re-organize directory format
self._format_model(format)
Expand Down
21 changes: 20 additions & 1 deletion scripts/serving/wrapper/utils/help_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
import os
from loguru import logger

mlserver_model = [
'SKLearn', 'XGBoost', 'MLlib'
]


@logger.catch()
def find_file_ends_with(dir_path, ext):
Expand All @@ -21,6 +25,20 @@ def rename(dir_path, ori_name, new_name):
os.rename(ori_name_full, new_name_full)


@logger.catch()
def isTritonModel(format):
if format != 'PMML' and format not in mlserver_model:
return True
return False


@logger.catch()
def isMLServerModel(format):
if format in mlserver_model:
return True
return False


@logger.catch()
def get_platform_by_format(format):
format_platform_dict = {
Expand All @@ -32,7 +50,8 @@ def get_platform_by_format(format):
'pmml': 'pmmlruntime_pmml',
'tensorrt': 'tensorrt_plan',
'sklearn': 'scikitlearn_sklearn',
'xgboost': 'xgboost_xgboost'
'xgboost': 'xgboost_xgboost',
'mllib': 'mllib_mllib'
}

return format_platform_dict[format]
8 changes: 7 additions & 1 deletion scripts/serving/wrapper/utils/model_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ def execute(self, target_dir):
XGBoostFormatter._target_xgboost_filename)


class MLlibFormatter(ModelFormatInterface):
def execute(self, target_dir):
print("do nothing since mllib model is a directory")


class ModelFormatter:
_implemented_dict = {
'onnxruntime_onnx': ONNXFormatter,
Expand All @@ -130,7 +135,8 @@ class ModelFormatter:
'pmmlruntime_pmml': PMMLFormatter,
'tensorrt_plan': TensorRTFormatter,
'scikitlearn_sklearn': SKLearnFormatter,
'xgboost_xgboost': XGBoostFormatter
'xgboost_xgboost': XGBoostFormatter,
'mllib_mllib': MLlibFormatter,
}

def __init__(self, format):
Expand Down

0 comments on commit 601400e

Please sign in to comment.