feat: integration tests bumping resources

#133 Summary of changes: - Updated integration tests with server names to retrieve proper image name for testing. - Added test for removal of workload deployed configmap.
canonical · Jun 14, 2023 · cf666d6 · cf666d6
1 parent 0bfbe30
commit cf666d6
Showing 1 changed file with 60 additions and 14 deletions.
diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py
@@ -4,6 +4,7 @@
 
 """Integration tests for Seldon Core Operator/Charm."""
 
+import json
 import logging
 import subprocess
 from pathlib import Path
@@ -13,7 +14,7 @@
 import requests
 import tenacity
 import yaml
-from lightkube import ApiError, Client
+from lightkube import ApiError, Client, codecs
 from lightkube.generic_resource import create_namespaced_resource
 from lightkube.resources.apiextensions_v1 import CustomResourceDefinition
 from lightkube.resources.apps_v1 import Deployment
@@ -110,6 +111,7 @@ def assert_available(client, resource_class, resource_name, namespace):
 
     assert state == "Available", f"Waited too long for {resource_class_kind}/{resource_name}!"
 
+
 @tenacity.retry(
     wait=tenacity.wait_exponential(multiplier=2, min=1, max=10),
     stop=tenacity.stop_after_attempt(60),
@@ -313,13 +315,15 @@ async def test_seldon_deployment(ops_test: OpsTest):
 
 
 @pytest.mark.parametrize(
+    # server_name - name of predictor server (should match configmap)
     # server_config - server configuration file
     # url - model prediction URL
     # req_data - data to put into request
     # resp_data - data expected in response
-    "server_config, url, req_data, resp_data",
+    "server_name, server_config, url, req_data, resp_data",
     [
         (
+            "SKLEARN_SERVER",
             "sklearn.yaml",
             "api/v1.0/predictions",
             {"data": {"ndarray": [[1, 2, 3, 4]]}},
@@ -328,11 +332,12 @@ async def test_seldon_deployment(ops_test: OpsTest):
                     "names": ["t:0", "t:1", "t:2"],
                     "ndarray": [[0.0006985194531162835, 0.00366803903943666, 0.995633441507447]],
                 },
-                # TO-DO: this might need to be adjusted when testing with rocks, i.e. read it from configmap
-                "meta": {"requestPath": {"classifier": "seldonio/sklearnserver:1.15.0"}},
+                # classifier will be replaced according to configmap
+                "meta": {"requestPath": {"classifier": "IMAGE:VERSION"}},
             },
         ),
         (
+            "SKLEARN_SERVER",
             "sklearn-v2.yaml",
             "v2/models/classifier/infer",
             {
@@ -362,6 +367,7 @@ async def test_seldon_deployment(ops_test: OpsTest):
             },
         ),
         (
+            "XGBOOST_SERVER",
             "xgboost.yaml",
             "api/v1.0/predictions",
             {"data": {"ndarray": [[1.0, 2.0, 5.0, 6.0]]}},
@@ -370,10 +376,11 @@ async def test_seldon_deployment(ops_test: OpsTest):
                     "names": [],
                     "ndarray": [2.0],
                 },
-                "meta": {"requestPath": {"classifier": "seldonio/xgboostserver:1.15.0"}},
+                "meta": {"requestPath": {"classifier": "IMAGE:VERSION"}},
             },
         ),
         (
+            "XGBOOST_SERVER",
             "xgboost-v2.yaml",
             "v2/models/iris/infer",
             {
@@ -403,6 +410,7 @@ async def test_seldon_deployment(ops_test: OpsTest):
             },
         ),
         (
+            "MLFLOW_SERVER",
             "mlflowserver.yaml",
             "api/v1.0/predictions",
             {"data": {"ndarray": [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1]]}},
@@ -411,11 +419,12 @@ async def test_seldon_deployment(ops_test: OpsTest):
                     "names": [],
                     "ndarray": [5.275558760255382],
                 },
-                "meta": {"requestPath": {"classifier": "seldonio/mlflowserver:1.15.0"}},
+                "meta": {"requestPath": {"classifier": "IMAGE:VERSION"}},
             },
         ),
         # Disable test for mlflowserver V2 due to failure in model in test container
         # (
+        #     "MLFLOW_SERVER",
         #     "mlflowserver-v2.yaml",
         #     "v2/models/iris/infer",
         #     {
@@ -447,7 +456,9 @@ async def test_seldon_deployment(ops_test: OpsTest):
     ],
 )
 @pytest.mark.asyncio
-async def test_seldon_predictor_server(ops_test: OpsTest, server_config, url, req_data, resp_data):
+async def test_seldon_predictor_server(
+    ops_test: OpsTest, server_name, server_config, url, req_data, resp_data
+):
     """Test Seldon predictor server.
 
     Workload deploys Seldon predictor servers defined in ConfigMap.
@@ -461,20 +472,26 @@ async def test_seldon_predictor_server(ops_test: OpsTest, server_config, url, re
     this_ns.metadata.labels.update({"serving.kubeflow.org/inferenceservice": "enabled"})
     client.patch(res=Namespace, name=this_ns.metadata.name, obj=this_ns)
 
+    # retrieve predictor server information and create Seldon Depoloyment
     with open(f"examples/{server_config}") as f:
         deploy_yaml = yaml.safe_load(f.read())
         ml_model = deploy_yaml["metadata"]["name"]
         predictor = deploy_yaml["spec"]["predictors"][0]["name"]
+        protocol = "seldon"  # default protocol
+        if "protocol" in deploy_yaml["spec"]:
+            protocol = deploy_yaml["spec"]["protocol"]
         sdep = SELDON_DEPLOYMENT(deploy_yaml)
         client.create(sdep, namespace=namespace)
 
     assert_available(client, SELDON_DEPLOYMENT, ml_model, namespace)
 
+    # obtain prediction service endpoint
     service_name = f"{ml_model}-{predictor}-classifier"
     service = client.get(Service, name=service_name, namespace=namespace)
     service_ip = service.spec.clusterIP
     service_port = next(p for p in service.spec.ports if p.name == "http").port
 
+    # post prediction request
     response = requests.post(f"http://{service_ip}:{service_port}/{url}", json=req_data)
     response.raise_for_status()
     response = response.json()
@@ -483,8 +500,25 @@ async def test_seldon_predictor_server(ops_test: OpsTest, server_config, url, re
     if "id" in response.keys():
         response["id"] = None
 
+    # for 'seldon' protocol update test data with correct predictor server image
+    if protocol == "seldon":
+        # retrieve predictor server image from configmap to implicitly verify that it matches
+        # deployed predictor server image
+        configmap = client.get(
+            ConfigMap,
+            name="seldon-config",
+            namespace=ops_test.model_name,
+        )
+        configmap_yaml = yaml.safe_load(codecs.dump_all_yaml([configmap]))
+        servers = json.loads(configmap_yaml["data"]["predictor_servers"])
+        server_image = servers[server_name]["protocols"][protocol]["image"]
+        server_version = servers[server_name]["protocols"][protocol]["defaultImageVersion"]
+        resp_data["meta"]["requestPath"]["classifier"] = f"{server_image}:{server_version}"
+
+    # verify prediction response
     assert sorted(response.items()) == sorted(resp_data.items())
 
+    # remove Seldon Deployment
     client.delete(SELDON_DEPLOYMENT, name=ml_model, namespace=namespace, grace_period=0)
     assert_deleted(client, SELDON_DEPLOYMENT, ml_model, namespace)
 
@@ -496,20 +530,22 @@ async def test_seldon_predictor_server(ops_test: OpsTest, server_config, url, re
 
 
 @pytest.mark.abort_on_fail
-def test_remove_with_resources_present(ops_test: OpsTest):
+async def test_remove_with_resources_present(ops_test: OpsTest):
     """Test remove with all resources deployed.
 
     Verify that all deployed resources that need to be removed are removed.
     """
     lightkube_client = Client()
 
     # remove deployed charm and verify that it is removed
+    # verify if needed when https://github.com/juju/python-libjuju/issues/877 is resolved.
     # TO-DO: use this: await ops_test.run("juju", "remove-application", f"{APP_NAME}")
+    # TO-DO: use this: assert APP_NAME in ops_test.model.applications
     subprocess.check_output(
-            f"juju remove-application -m {ops_test.model_name} {APP_NAME}",
-            shell=True,
-            stderr=subprocess.STDOUT,
-        )
+        f"juju remove-application -m {ops_test.model_name} {APP_NAME}",
+        shell=True,
+        stderr=subprocess.STDOUT,
+    )
     assert_deleted(lightkube_client, Pod, "seldon-controller-manager-0", ops_test.model_name)
 
     # verify that all resources that were deployed are removed
@@ -521,8 +557,7 @@ def test_remove_with_resources_present(ops_test: OpsTest):
     )
     assert not list(crd_list)
 
-    # verify that ConfigMap is removed
-    # TO-DO: test all ConfigMaps with label app.juju.is/created-by=seldon-controller-manager
+    # verify that all ConfigMaps are removed
     try:
         _ = lightkube_client.get(
             ConfigMap,
@@ -534,6 +569,17 @@ def test_remove_with_resources_present(ops_test: OpsTest):
             # other error than Not Found
             assert False
 
+    try:
+        _ = lightkube_client.get(
+            ConfigMap,
+            name="a33bd623.machinelearning.seldon.io",
+            namespace=ops_test.model_name,
+        )
+    except ApiError as error:
+        if error.status.code != 404:
+            # other error than Not Found
+            assert False
+
     # verify that all related Services are removed
     svc_list = lightkube_client.list(
         Service,