From cf666d6e61868611620d315353ec23e9b8737474 Mon Sep 17 00:00:00 2001 From: Ivan Chvets Date: Wed, 14 Jun 2023 15:54:19 -0400 Subject: [PATCH] feat: integration tests bumping resources https://github.com/canonical/seldon-core-operator/issues/133 Summary of changes: - Updated integration tests with server names to retrieve proper image name for testing. - Added test for removal of workload deployed configmap. --- tests/integration/test_charm.py | 74 ++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 14 deletions(-) diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py index 3f263b1..c7ecb0f 100644 --- a/tests/integration/test_charm.py +++ b/tests/integration/test_charm.py @@ -4,6 +4,7 @@ """Integration tests for Seldon Core Operator/Charm.""" +import json import logging import subprocess from pathlib import Path @@ -13,7 +14,7 @@ import requests import tenacity import yaml -from lightkube import ApiError, Client +from lightkube import ApiError, Client, codecs from lightkube.generic_resource import create_namespaced_resource from lightkube.resources.apiextensions_v1 import CustomResourceDefinition from lightkube.resources.apps_v1 import Deployment @@ -110,6 +111,7 @@ def assert_available(client, resource_class, resource_name, namespace): assert state == "Available", f"Waited too long for {resource_class_kind}/{resource_name}!" + @tenacity.retry( wait=tenacity.wait_exponential(multiplier=2, min=1, max=10), stop=tenacity.stop_after_attempt(60), @@ -313,13 +315,15 @@ async def test_seldon_deployment(ops_test: OpsTest): @pytest.mark.parametrize( + # server_name - name of predictor server (should match configmap) # server_config - server configuration file # url - model prediction URL # req_data - data to put into request # resp_data - data expected in response - "server_config, url, req_data, resp_data", + "server_name, server_config, url, req_data, resp_data", [ ( + "SKLEARN_SERVER", "sklearn.yaml", "api/v1.0/predictions", {"data": {"ndarray": [[1, 2, 3, 4]]}}, @@ -328,11 +332,12 @@ async def test_seldon_deployment(ops_test: OpsTest): "names": ["t:0", "t:1", "t:2"], "ndarray": [[0.0006985194531162835, 0.00366803903943666, 0.995633441507447]], }, - # TO-DO: this might need to be adjusted when testing with rocks, i.e. read it from configmap - "meta": {"requestPath": {"classifier": "seldonio/sklearnserver:1.15.0"}}, + # classifier will be replaced according to configmap + "meta": {"requestPath": {"classifier": "IMAGE:VERSION"}}, }, ), ( + "SKLEARN_SERVER", "sklearn-v2.yaml", "v2/models/classifier/infer", { @@ -362,6 +367,7 @@ async def test_seldon_deployment(ops_test: OpsTest): }, ), ( + "XGBOOST_SERVER", "xgboost.yaml", "api/v1.0/predictions", {"data": {"ndarray": [[1.0, 2.0, 5.0, 6.0]]}}, @@ -370,10 +376,11 @@ async def test_seldon_deployment(ops_test: OpsTest): "names": [], "ndarray": [2.0], }, - "meta": {"requestPath": {"classifier": "seldonio/xgboostserver:1.15.0"}}, + "meta": {"requestPath": {"classifier": "IMAGE:VERSION"}}, }, ), ( + "XGBOOST_SERVER", "xgboost-v2.yaml", "v2/models/iris/infer", { @@ -403,6 +410,7 @@ async def test_seldon_deployment(ops_test: OpsTest): }, ), ( + "MLFLOW_SERVER", "mlflowserver.yaml", "api/v1.0/predictions", {"data": {"ndarray": [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1]]}}, @@ -411,11 +419,12 @@ async def test_seldon_deployment(ops_test: OpsTest): "names": [], "ndarray": [5.275558760255382], }, - "meta": {"requestPath": {"classifier": "seldonio/mlflowserver:1.15.0"}}, + "meta": {"requestPath": {"classifier": "IMAGE:VERSION"}}, }, ), # Disable test for mlflowserver V2 due to failure in model in test container # ( + # "MLFLOW_SERVER", # "mlflowserver-v2.yaml", # "v2/models/iris/infer", # { @@ -447,7 +456,9 @@ async def test_seldon_deployment(ops_test: OpsTest): ], ) @pytest.mark.asyncio -async def test_seldon_predictor_server(ops_test: OpsTest, server_config, url, req_data, resp_data): +async def test_seldon_predictor_server( + ops_test: OpsTest, server_name, server_config, url, req_data, resp_data +): """Test Seldon predictor server. Workload deploys Seldon predictor servers defined in ConfigMap. @@ -461,20 +472,26 @@ async def test_seldon_predictor_server(ops_test: OpsTest, server_config, url, re this_ns.metadata.labels.update({"serving.kubeflow.org/inferenceservice": "enabled"}) client.patch(res=Namespace, name=this_ns.metadata.name, obj=this_ns) + # retrieve predictor server information and create Seldon Depoloyment with open(f"examples/{server_config}") as f: deploy_yaml = yaml.safe_load(f.read()) ml_model = deploy_yaml["metadata"]["name"] predictor = deploy_yaml["spec"]["predictors"][0]["name"] + protocol = "seldon" # default protocol + if "protocol" in deploy_yaml["spec"]: + protocol = deploy_yaml["spec"]["protocol"] sdep = SELDON_DEPLOYMENT(deploy_yaml) client.create(sdep, namespace=namespace) assert_available(client, SELDON_DEPLOYMENT, ml_model, namespace) + # obtain prediction service endpoint service_name = f"{ml_model}-{predictor}-classifier" service = client.get(Service, name=service_name, namespace=namespace) service_ip = service.spec.clusterIP service_port = next(p for p in service.spec.ports if p.name == "http").port + # post prediction request response = requests.post(f"http://{service_ip}:{service_port}/{url}", json=req_data) response.raise_for_status() response = response.json() @@ -483,8 +500,25 @@ async def test_seldon_predictor_server(ops_test: OpsTest, server_config, url, re if "id" in response.keys(): response["id"] = None + # for 'seldon' protocol update test data with correct predictor server image + if protocol == "seldon": + # retrieve predictor server image from configmap to implicitly verify that it matches + # deployed predictor server image + configmap = client.get( + ConfigMap, + name="seldon-config", + namespace=ops_test.model_name, + ) + configmap_yaml = yaml.safe_load(codecs.dump_all_yaml([configmap])) + servers = json.loads(configmap_yaml["data"]["predictor_servers"]) + server_image = servers[server_name]["protocols"][protocol]["image"] + server_version = servers[server_name]["protocols"][protocol]["defaultImageVersion"] + resp_data["meta"]["requestPath"]["classifier"] = f"{server_image}:{server_version}" + + # verify prediction response assert sorted(response.items()) == sorted(resp_data.items()) + # remove Seldon Deployment client.delete(SELDON_DEPLOYMENT, name=ml_model, namespace=namespace, grace_period=0) assert_deleted(client, SELDON_DEPLOYMENT, ml_model, namespace) @@ -496,7 +530,7 @@ async def test_seldon_predictor_server(ops_test: OpsTest, server_config, url, re @pytest.mark.abort_on_fail -def test_remove_with_resources_present(ops_test: OpsTest): +async def test_remove_with_resources_present(ops_test: OpsTest): """Test remove with all resources deployed. Verify that all deployed resources that need to be removed are removed. @@ -504,12 +538,14 @@ def test_remove_with_resources_present(ops_test: OpsTest): lightkube_client = Client() # remove deployed charm and verify that it is removed + # verify if needed when https://github.com/juju/python-libjuju/issues/877 is resolved. # TO-DO: use this: await ops_test.run("juju", "remove-application", f"{APP_NAME}") + # TO-DO: use this: assert APP_NAME in ops_test.model.applications subprocess.check_output( - f"juju remove-application -m {ops_test.model_name} {APP_NAME}", - shell=True, - stderr=subprocess.STDOUT, - ) + f"juju remove-application -m {ops_test.model_name} {APP_NAME}", + shell=True, + stderr=subprocess.STDOUT, + ) assert_deleted(lightkube_client, Pod, "seldon-controller-manager-0", ops_test.model_name) # verify that all resources that were deployed are removed @@ -521,8 +557,7 @@ def test_remove_with_resources_present(ops_test: OpsTest): ) assert not list(crd_list) - # verify that ConfigMap is removed - # TO-DO: test all ConfigMaps with label app.juju.is/created-by=seldon-controller-manager + # verify that all ConfigMaps are removed try: _ = lightkube_client.get( ConfigMap, @@ -534,6 +569,17 @@ def test_remove_with_resources_present(ops_test: OpsTest): # other error than Not Found assert False + try: + _ = lightkube_client.get( + ConfigMap, + name="a33bd623.machinelearning.seldon.io", + namespace=ops_test.model_name, + ) + except ApiError as error: + if error.status.code != 404: + # other error than Not Found + assert False + # verify that all related Services are removed svc_list = lightkube_client.list( Service,