From 797f899eca770085e8f7e44e83d34a7f81156e44 Mon Sep 17 00:00:00 2001 From: Ivan Chvets Date: Mon, 10 Jul 2023 14:53:10 -0400 Subject: [PATCH] feat: mlserver-huggingface rock integration Summary of changes: - Updated configmap with reference to rock. - Added test for huggingface server Seldon Deployment - Implemented integration test. --- src/templates/configmap.yaml.j2 | 4 +-- tests/assets/crs/huggingface.yaml | 20 +++++++++++++ tests/integration/test_seldon_servers.py | 37 +++++++++++++++++++++++- 3 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 tests/assets/crs/huggingface.yaml diff --git a/src/templates/configmap.yaml.j2 b/src/templates/configmap.yaml.j2 index b0b5deb..a77253e 100644 --- a/src/templates/configmap.yaml.j2 +++ b/src/templates/configmap.yaml.j2 @@ -76,8 +76,8 @@ data: "HUGGINGFACE_SERVER": { "protocols" : { "v2": { - "image": "seldonio/mlserver", - "defaultImageVersion": "1.2.0-huggingface" + "image": "docker.io/charmedkubeflow/mlserver-huggingface_1.2.4_22.04_1_amd64", + "defaultImageVersion": "1.2.4_22.04_1" } } }, diff --git a/tests/assets/crs/huggingface.yaml b/tests/assets/crs/huggingface.yaml new file mode 100644 index 0000000..63906b2 --- /dev/null +++ b/tests/assets/crs/huggingface.yaml @@ -0,0 +1,20 @@ +# https://github.com/SeldonIO/MLServer/blob/master/docs/examples/huggingface/README.md +apiVersion: machinelearning.seldon.io/v1 +kind: SeldonDeployment +metadata: + name: transformer +spec: + protocol: v2 + predictors: + - graph: + name: classifier + implementation: HUGGINGFACE_SERVER + parameters: + - name: task + type: STRING + value: text-generation + - name: pretrained_model + type: STRING + value: distilgpt2 + name: default + replicas: 1 diff --git a/tests/integration/test_seldon_servers.py b/tests/integration/test_seldon_servers.py index 83ac278..d3053ed 100644 --- a/tests/integration/test_seldon_servers.py +++ b/tests/integration/test_seldon_servers.py @@ -212,6 +212,37 @@ async def test_build_and_deploy(ops_test: OpsTest): {"instances": [1.0, 2.0, 5.0]}, {"predictions": [2.5, 3, 4.5]}, ), + ( + "HUGGINGFACE_SERVER", + "huggingface.yaml", + "v2/models/classifier/infer", + { + "inputs": [ + { + "name": "args", + "shape": [1], + "datatype": "BYTES", + "data": ["this is a test"], + } + ], + }, + { + "model_name": "classifier", + "model_version": "v1", + "id": "None", + "parameters": {}, + "outputs": [ + { + "name": "output", + "shape": [1, 1], + "datatype": "BYTES", + "parameters": {"content_type": "str"}, + # 'data' needs to be reset because GPT returns different results every time + "data": "None", + } + ], + }, + ), ], ) @pytest.mark.asyncio @@ -274,7 +305,11 @@ async def test_seldon_predictor_server( # reset id in response, if present if "id" in response.keys(): - response["id"] = None + response["id"] = "None" + + # reset data for HUGGINGFACE_SERVER because GPT inference returns different data every time + if server_name == "HUGGINGFACE_SERVER": + response["outputs"][0]["data"] = "None" # for 'seldon' protocol update test data with correct predictor server image if protocol == "seldon":