diff --git a/tests/interop/__init__.py b/tests/interop/__init__.py new file mode 100644 index 00000000..890362ce --- /dev/null +++ b/tests/interop/__init__.py @@ -0,0 +1,2 @@ +__version__ = "0.1.0" +__loggername__ = "css_logger" diff --git a/tests/interop/conftest.py b/tests/interop/conftest.py new file mode 100644 index 00000000..2631a087 --- /dev/null +++ b/tests/interop/conftest.py @@ -0,0 +1,51 @@ +import os + +import pytest +from kubernetes import config +from kubernetes.client import Configuration +from openshift.dynamic import DynamicClient + +from . import __loggername__ +from .css_logger import CSS_Logger + + +def pytest_addoption(parser): + parser.addoption( + "--kubeconfig", + action="store", + default=None, + help="The full path to the kubeconfig file to be used", + ) + + +@pytest.fixture(scope="session") +def get_kubeconfig(request): + if request.config.getoption("--kubeconfig"): + k8config = request.config.getoption("--kubeconfig") + elif "KUBECONFIG" in os.environ.keys() and os.environ["KUBECONFIG"]: + k8config = os.environ["KUBECONFIG"] + else: + raise ValueError( + "A kubeconfig file was not provided. Please provide one either " + "via the --kubeconfig command option or by setting a KUBECONFIG " + "environment variable" + ) + return k8config + + +@pytest.fixture(scope="session") +def kube_config(get_kubeconfig): + kc = Configuration + config.load_kube_config(config_file=get_kubeconfig, client_configuration=kc) + return kc + + +@pytest.fixture(scope="session") +def openshift_dyn_client(get_kubeconfig): + return DynamicClient(client=config.new_client_from_config(get_kubeconfig)) + + +@pytest.fixture(scope="session", autouse=True) +def setup_logger(): + logger = CSS_Logger(__loggername__) + return logger diff --git a/tests/interop/crd.py b/tests/interop/crd.py new file mode 100644 index 00000000..8a433c5c --- /dev/null +++ b/tests/interop/crd.py @@ -0,0 +1,55 @@ +from ocp_resources.resource import NamespacedResource, Resource + + +class ArgoCD(NamespacedResource): + """ + OpenShift ArgoCD / GitOps object. + """ + + api_group = "argoproj.io" + api_version = NamespacedResource.ApiVersion.V1ALPHA1 + kind = "Application" + + @property + def health(self): + """ + Check the health of of the argocd application + :return: boolean + """ + + if ( + self.instance.status.operationState.phase == "Succeeded" + and self.instance.status.health.status == "Healthy" + ): + return True + return False + + +class ManagedCluster(Resource): + """ + OpenShift Managed Cluster object. + """ + + api_version = "cluster.open-cluster-management.io/v1" + + @property + def self_registered(self): + """ + Check if managed cluster is self registered in to ACM running on hub site + :param name: (str) name of managed cluster + :param namespace: namespace + :return: Tuple of boolean and dict on success + """ + is_joined = False + status = dict() + + for condition in self.instance.status.conditions: + if condition["type"] == "HubAcceptedManagedCluster": + status["HubAcceptedManagedCluster"] = condition["status"] + elif condition["type"] == "ManagedClusterConditionAvailable": + status["ManagedClusterConditionAvailable"] = condition["status"] + elif condition["type"] == "ManagedClusterJoined": + is_joined = True + status["ManagedClusterJoined"] = condition["status"] + + return is_joined, status diff --git a/tests/interop/css_logger.py b/tests/interop/css_logger.py new file mode 100644 index 00000000..41f7afb5 --- /dev/null +++ b/tests/interop/css_logger.py @@ -0,0 +1,52 @@ +import logging +import os +from datetime import datetime +from logging.handlers import RotatingFileHandler + +LOG_DIR = os.path.join(os.environ["WORKSPACE"], ".teflo/.results/test_execution_logs") +if not os.path.exists(LOG_DIR): + os.mkdir(LOG_DIR) + + +class CSS_Logger(object): + _logger = None + + def __new__(cls, *args, **kwargs): + if cls._logger is None: + cls._logger = super(CSS_Logger, cls).__new__(cls) + # Put any initialization here. + cls._logger = logging.getLogger(args[0]) + cls._logger.setLevel(logging.DEBUG) + + pytest_current_test = os.environ.get("PYTEST_CURRENT_TEST") + split_test_name = pytest_current_test.split("::")[1] + short_test_name = split_test_name.split(" ")[0] + + datestring = datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + filename = "{}_{}.log".format(short_test_name, datestring) + filepath = os.path.join(LOG_DIR, filename) + + # Create a file handler for logging level above DEBUG + file_handler = RotatingFileHandler( + filepath, maxBytes=1024 * 1024 * 1024, backupCount=20 + ) + + # Create a logging format + log_formatter = logging.Formatter( + "%(asctime)s " + "[%(levelname)s] " + "%(module)s:%(lineno)d " + "%(message)s" + ) + file_handler.setFormatter(log_formatter) + + # Create a stream handler for logging level above INFO + stream_handler = logging.StreamHandler() + stream_handler.setLevel(logging.INFO) + stream_handler.setFormatter(log_formatter) + + # Add the handlers to the logger + cls._logger.addHandler(file_handler) + cls._logger.addHandler(stream_handler) + + return cls._logger diff --git a/tests/interop/edge_util.py b/tests/interop/edge_util.py new file mode 100644 index 00000000..7df5c8a8 --- /dev/null +++ b/tests/interop/edge_util.py @@ -0,0 +1,146 @@ +import base64 +import fileinput +import logging +import os +import subprocess + +import requests +import yaml +from ocp_resources.secret import Secret +from requests import HTTPError, RequestException +from urllib3.exceptions import InsecureRequestWarning, ProtocolError + +from . import __loggername__ + +logger = logging.getLogger(__loggername__) + + +def load_yaml_file(file_path): + """ + Load and parse the yaml file + :param file_path: (str) file path + :return: (dict) yaml_config_obj in the form of Python dict + """ + yaml_config_obj = None + with open(file_path, "r") as yfh: + try: + yaml_config_obj = yaml.load(yfh, Loader=yaml.FullLoader) + except Exception as ex: + raise yaml.YAMLError("YAML Syntax Error:\n %s" % ex) + logger.info("Yaml Config : %s", yaml_config_obj) + return yaml_config_obj + + +def find_number_of_edge_sites(dir_path): + """ + Find the number of edge (managed cluster) sites folder + :param dir_path: (dtr) dir path where edge site manifest resides + :return: (list) site_names + """ + site_names = list() + list_of_dirs = os.listdir(path=dir_path) + + for site_dir in list_of_dirs: + if "staging" in site_dir: + site_names.append(site_dir) + + return site_names + + +def get_long_live_bearer_token( + dyn_client, namespace="default", sub_string="default-token" +): + """ + Get bearer token from secrets to authorize openshift cluster + :param sub_string: (str) substring of secrets name to find actual secret name since openshift append random + 5 ascii digit at the end of every secret name + :param namespace: (string) name of namespace where secret exist + :return: (string) secret token for specified secret + """ + filtered_secrets = [] + try: + for secret in Secret.get(dyn_client=dyn_client, namespace=namespace): + if sub_string in secret.instance.metadata.name: + filtered_secrets.append(secret.instance.data.token) + except StopIteration: + logger.exception( + "Specified substring %s doesn't exist in namespace %s", + sub_string, + namespace, + ) + except ProtocolError as e: + # See https://github.com/kubernetes-client/python/issues/1225 + logger.info( + "Skip %s... because kubelet disconnect client after default 10m...", e + ) + + # All secret tokens in openshift are base64 encoded. + # Decode base64 string into byte and convert byte to str + if len(filtered_secrets) > 0: + bearer_token = base64.b64decode(filtered_secrets[-1]).decode() + return bearer_token + else: + return None + + +def get_site_response(site_url, bearer_token): + """ + + :param site_url: (str) Site API end point + :param bearer_token: (str) bearer token + :return: (dict) site_response + """ + site_response = None + headers = {"Authorization": "Bearer " + bearer_token} + + try: + # Suppress only the single warning from urllib3 needed. + requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning) + site_response = requests.get(site_url, headers=headers, verify=False) + except (ConnectionError, HTTPError, RequestException) as e: + logger.exception( + "Failed to connect %s due to refused connection or unsuccessful status code %s", + site_url, + e, + ) + logger.debug("Site Response %s: ", site_response) + + return site_response + + +def execute_shell_command_local(cmd): + """ + Executes a shell command in a subprocess, wait until it has completed. + :param cmd: Command to execute. + """ + proc = subprocess.Popen( + cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) + (out, error) = proc.communicate() + exit_code = proc.wait() + return exit_code, out, error + + +def modify_file_content(file_name): + with open(file_name, "r") as frb: + logger.debug(f"Current content : {frb.readlines()}") + + with fileinput.FileInput(file_name, inplace=True, backup=".bak") as file: + for line in file: + print( + line.replace( + 'SENSOR_TEMPERATURE_ENABLED: "false"', + 'SENSOR_TEMPERATURE_ENABLED: "true"', + ), + end="", + ) + + with open(file_name, "r") as fra: + contents = fra.readlines() + logger.debug(f"Modified content : {contents}") + + return contents diff --git a/tests/interop/test_subscription_status_edge.py b/tests/interop/test_subscription_status_edge.py new file mode 100644 index 00000000..59168cb8 --- /dev/null +++ b/tests/interop/test_subscription_status_edge.py @@ -0,0 +1,96 @@ +import logging + +import pytest +from ocp_resources.cluster_version import ClusterVersion +from ocp_resources.subscription import Subscription + +from . import __loggername__ + +logger = logging.getLogger(__loggername__) + + +@pytest.mark.subscription_status_edge +def test_subscription_status_edge(openshift_dyn_client): + # These are the operator subscriptions and their associated namespaces + expected_subs = { + "openshift-gitops-operator": ["openshift-operators"], + "amq-streams": ["openshift-operators"], + "crunchy-postgres-operator": ["openshift-operators"], + } + + operator_versions = [] + missing_subs = [] + unhealthy_subs = [] + missing_installplans = [] + upgrades_pending = [] + + for key in expected_subs.keys(): + for val in expected_subs[key]: + try: + subs = Subscription.get( + dyn_client=openshift_dyn_client, name=key, namespace=val + ) + sub = next(subs) + except StopIteration: + missing_subs.append(f"{key} in {val} namespace") + continue + + logger.info( + f"State for {sub.instance.metadata.name}: {sub.instance.status.state}" + ) + if sub.instance.status.state == "UpgradePending": + upgrades_pending.append( + f"{sub.instance.metadata.name} in {sub.instance.metadata.namespace} namespace" + ) + + logger.info( + f"CatalogSourcesUnhealthy: {sub.instance.status.conditions[0].status}" + ) + if sub.instance.status.conditions[0].status != "False": + logger.info(f"Subscription {sub.instance.metadata.name} is unhealthy") + unhealthy_subs.append(sub.instance.metadata.name) + else: + operator_versions.append( + f"installedCSV: {sub.instance.status.installedCSV}" + ) + + logger.info(f"installPlanRef: {sub.instance.status.installPlanRef}") + if not sub.instance.status.installPlanRef: + logger.info( + f"No install plan found for subscription {sub.instance.metadata.name} " + f"in {sub.instance.metadata.namespace} namespace" + ) + missing_installplans.append( + f"{sub.instance.metadata.name} in {sub.instance.metadata.namespace} namespace" + ) + + logger.info("") + + if missing_subs: + logger.error(f"FAIL: The following subscriptions are missing: {missing_subs}") + if unhealthy_subs: + logger.error( + f"FAIL: The following subscriptions are unhealthy: {unhealthy_subs}" + ) + if missing_installplans: + logger.error( + f"FAIL: The install plan for the following subscriptions is missing: {missing_installplans}" + ) + if upgrades_pending: + logger.error( + f"FAIL: The following subscriptions are in UpgradePending state: {upgrades_pending}" + ) + + for line in operator_versions: + logger.info(line) + + versions = ClusterVersion.get(dyn_client=openshift_dyn_client) + version = next(versions) + logger.info(f"Openshift version:\n{version.instance.status.history}") + + if missing_subs or unhealthy_subs or missing_installplans or upgrades_pending: + err_msg = "Subscription status check failed" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + logger.info("PASS: Subscription status check passed") diff --git a/tests/interop/test_subscription_status_hub.py b/tests/interop/test_subscription_status_hub.py new file mode 100644 index 00000000..fb689682 --- /dev/null +++ b/tests/interop/test_subscription_status_hub.py @@ -0,0 +1,165 @@ +import difflib +import logging +import os +import re +import subprocess + +import pytest +from ocp_resources.cluster_version import ClusterVersion +from ocp_resources.subscription import Subscription +from openshift.dynamic.exceptions import NotFoundError + +from . import __loggername__ + +logger = logging.getLogger(__loggername__) + + +@pytest.mark.subscription_status_hub +def test_subscription_status_hub(openshift_dyn_client): + # These are the operator subscriptions and their associated namespaces + expected_subs = { + "openshift-gitops-operator": ["openshift-operators"], + "advanced-cluster-management": ["open-cluster-management"], + "multicluster-engine": ["multicluster-engine"], + "openshift-pipelines-operator-rh": ["openshift-operators"], + "amq-streams": ["openshift-operators"], + "crunchy-postgres-operator": ["openshift-operators"], + } + + operator_versions = [] + missing_subs = [] + unhealthy_subs = [] + missing_installplans = [] + upgrades_pending = [] + + for key in expected_subs.keys(): + for val in expected_subs[key]: + try: + subs = Subscription.get( + dyn_client=openshift_dyn_client, name=key, namespace=val + ) + sub = next(subs) + except NotFoundError: + missing_subs.append(f"{key} in {val} namespace") + continue + + logger.info( + f"State for {sub.instance.metadata.name}: {sub.instance.status.state}" + ) + if sub.instance.status.state == "UpgradePending": + upgrades_pending.append( + f"{sub.instance.metadata.name} in {sub.instance.metadata.namespace} namespace" + ) + + logger.info( + f"CatalogSourcesUnhealthy: {sub.instance.status.conditions[0].status}" + ) + if sub.instance.status.conditions[0].status != "False": + logger.info(f"Subscription {sub.instance.metadata.name} is unhealthy") + unhealthy_subs.append( + f"{sub.instance.metadata.name} in {sub.instance.metadata.namespace} namespace" + ) + else: + operator_versions.append( + f"installedCSV: {sub.instance.status.installedCSV}" + ) + + logger.info(f"installPlanRef: {sub.instance.status.installPlanRef}") + if not sub.instance.status.installPlanRef: + logger.info( + f"No install plan found for subscription {sub.instance.metadata.name} " + f"in {sub.instance.metadata.namespace} namespace" + ) + missing_installplans.append( + f"{sub.instance.metadata.name} in {sub.instance.metadata.namespace} namespace" + ) + + logger.info("") + + if missing_subs: + logger.error(f"FAIL: The following subscriptions are missing: {missing_subs}") + if unhealthy_subs: + logger.error( + f"FAIL: The following subscriptions are unhealthy: {unhealthy_subs}" + ) + if missing_installplans: + logger.error( + f"FAIL: The install plan for the following subscriptions is missing: {missing_installplans}" + ) + if upgrades_pending: + logger.error( + f"FAIL: The following subscriptions are in UpgradePending state: {upgrades_pending}" + ) + + versions = ClusterVersion.get(dyn_client=openshift_dyn_client) + version = next(versions) + logger.info(f"Openshift version:\n{version.instance.status.history}") + shortversion = re.sub("(.[0-9]+$)", "", os.getenv("OPENSHIFT_VER")) + + currentfile = os.getcwd() + "/operators_hub_current" + sourceFile = open(currentfile, "w") + for line in operator_versions: + logger.info(line) + print(line, file=sourceFile) + sourceFile.close() + + logger.info("Clone operator-versions repo") + try: + operator_versions_repo = ( + "git@gitlab.cee.redhat.com:mpqe/mps/vp/operator-versions.git" + ) + clone = subprocess.run( + ["git", "clone", operator_versions_repo], capture_output=True, text=True + ) + logger.info(clone.stdout) + logger.info(clone.stderr) + except Exception: + pass + + previouspath = os.getcwd() + f"/operator-versions/mcgitops_hub_{shortversion}" + previousfile = f"mcgitops_hub_{shortversion}" + + logger.info("Ensure previous file exists") + checkpath = os.path.exists(previouspath) + logger.info(checkpath) + + if checkpath is True: + logger.info("Diff current operator list with previous file") + diff = opdiff(open(previouspath).readlines(), open(currentfile).readlines()) + diffstring = "".join(diff) + logger.info(diffstring) + + logger.info("Write diff to file") + sourceFile = open("operator_diffs_hub.log", "w") + print(diffstring, file=sourceFile) + sourceFile.close() + else: + logger.info("Skipping operator diff - previous file not found") + + if missing_subs or unhealthy_subs or missing_installplans or upgrades_pending: + err_msg = "Subscription status check failed" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + # Only push the new operarator list if the test passed + # and we are not testing a pre-release operator + if checkpath is True and not os.environ["INDEX_IMAGE"]: + os.remove(previouspath) + os.rename(currentfile, previouspath) + + cwd = os.getcwd() + "/operator-versions" + logger.info(f"CWD: {cwd}") + + logger.info("Push new operator list") + subprocess.run(["git", "add", previousfile], cwd=cwd) + subprocess.run( + ["git", "commit", "-m", "Update operator versions list"], + cwd=cwd, + ) + subprocess.run(["git", "push"], cwd=cwd) + + logger.info("PASS: Subscription status check passed") + + +def opdiff(*args): + return filter(lambda x: not x.startswith(" "), difflib.ndiff(*args)) diff --git a/tests/interop/test_validate_edge_site_components.py b/tests/interop/test_validate_edge_site_components.py new file mode 100644 index 00000000..be5840c8 --- /dev/null +++ b/tests/interop/test_validate_edge_site_components.py @@ -0,0 +1,240 @@ +import logging +import os +import subprocess + +import pytest +from ocp_resources.namespace import Namespace +from ocp_resources.pod import Pod +from ocp_resources.route import Route +from openshift.dynamic.exceptions import NotFoundError + +from . import __loggername__ +from .crd import ArgoCD +from .edge_util import get_long_live_bearer_token, get_site_response + +logger = logging.getLogger(__loggername__) + +oc = os.environ["HOME"] + "/oc_client/oc" + +""" +Validate following retail on edge site (line server): + +1) applications health (Applications deployed through argocd) +""" + + +@pytest.mark.test_validate_edge_site_components +def test_validate_edge_site_components(): + logger.info("Checking Openshift version on edge site") + version_out = subprocess.run([oc, "version"], capture_output=True) + version_out = version_out.stdout.decode("utf-8") + logger.info(f"Openshift version:\n{version_out}") + + +@pytest.mark.validate_edge_site_reachable +def test_validate_edge_site_reachable(kube_config, openshift_dyn_client): + logger.info("Check if edge site API end point is reachable") + edge_api_url = kube_config.host + if not edge_api_url: + err_msg = "Edge site url is missing in kubeconfig file" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + logger.info(f"EDGE api url : {edge_api_url}") + + bearer_token = get_long_live_bearer_token(dyn_client=openshift_dyn_client) + if not bearer_token: + assert False, "Bearer token is missing for hub site" + + edge_api_response = get_site_response( + site_url=edge_api_url, bearer_token=bearer_token + ) + + if edge_api_response.status_code != 200: + err_msg = "Edge site is not reachable. Please check the deployment." + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + logger.info("PASS: Edge site is reachable") + + +@pytest.mark.check_pod_status_edge +def test_check_pod_status(openshift_dyn_client): + logger.info("Checking pod status") + + err_msg = [] + failed_pods = [] + missing_pods = [] + missing_projects = [] + projects = [ + "openshift-operators", + "open-cluster-management-agent", + "open-cluster-management-agent-addon", + "openshift-gitops", + ] + + for project in projects: + # Check for missing project + try: + namespaces = Namespace.get(dyn_client=openshift_dyn_client, name=project) + next(namespaces) + except NotFoundError: + missing_projects.append(project) + continue + # Check for absence of pods in project + try: + pods = Pod.get(dyn_client=openshift_dyn_client, namespace=project) + pod = next(pods) + except StopIteration: + missing_pods.append(project) + continue + + for project in projects: + pods = Pod.get(dyn_client=openshift_dyn_client, namespace=project) + logger.info(f"Checking pods in namespace '{project}'") + for pod in pods: + for container in pod.instance.status.containerStatuses: + logger.info( + f"{pod.instance.metadata.name} : {container.name} :" + f" {container.state}" + ) + if container.state.terminated: + if container.state.terminated.reason != "Completed": + logger.info( + f"Pod {pod.instance.metadata.name} in" + f" {pod.instance.metadata.namespace} namespace is" + " FAILED:" + ) + failed_pods.append(pod.instance.metadata.name) + logger.info(describe_pod(project, pod.instance.metadata.name)) + logger.info( + get_log_output( + project, + pod.instance.metadata.name, + container.name, + ) + ) + elif not container.state.running: + logger.info( + f"Pod {pod.instance.metadata.name} in" + f" {pod.instance.metadata.namespace} namespace is" + " FAILED:" + ) + failed_pods.append(pod.instance.metadata.name) + logger.info(describe_pod(project, pod.instance.metadata.name)) + logger.info( + get_log_output( + project, pod.instance.metadata.name, container.name + ) + ) + + if missing_projects: + err_msg.append(f"The following namespaces are missing: {missing_projects}") + + if missing_pods: + err_msg.append( + f"The following namespaces have no pods deployed: {missing_pods}" + ) + + if failed_pods: + err_msg.append(f"The following pods are failed: {failed_pods}") + + if err_msg: + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + logger.info("PASS: Pod status check succeeded.") + + +@pytest.mark.validate_argocd_reachable_edge_site +def test_validate_argocd_reachable_edge_site(openshift_dyn_client): + namespace = "openshift-gitops" + + try: + for route in Route.get( + dyn_client=openshift_dyn_client, + namespace=namespace, + name="openshift-gitops-server", + ): + pass + except StopIteration: + err_msg = f"Argocd url/route is missing in {namespace} namespace" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + + argocd_route_url = route.instance.spec.host + + logger.info("Check if argocd route/url on hub site is reachable") + if not argocd_route_url: + err_msg = f"Argocd url/route is missing in {namespace} namespace" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + final_argocd_url = f"{'https://'}{argocd_route_url}" + logger.info(f"Argocd route/url : {final_argocd_url}") + + bearer_token = get_long_live_bearer_token( + dyn_client=openshift_dyn_client, + namespace=namespace, + sub_string="openshift-gitops-argocd-server-token", + ) + if not bearer_token: + err_msg = f"Bearer token is missing for argocd-server in {namespace} namespace" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + logger.debug(f"Argocd bearer token : {bearer_token}") + + argocd_route_response = get_site_response( + site_url=final_argocd_url, bearer_token=bearer_token + ) + + logger.info(f"Argocd route response : {argocd_route_response}") + + if argocd_route_response.status_code != 200: + err_msg = "Argocd is not reachable. Please check the deployment." + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + logger.info("PASS: Argocd is reachable") + + +@pytest.mark.validate_argocd_applications_health_edge_site +def test_validate_argocd_applications_health_edge_site(openshift_dyn_client): + namespace = "oepnshift-gitops" + + argocd_apps_status = dict() + logger.info("Get all applications deployed by argocd on edge site") + + for app in ArgoCD.get(dyn_client=openshift_dyn_client, namespace=namespace): + app_name = app.instance.metadata.name + app_health = app.health + argocd_apps_status[app_name] = app_health + logger.info(f"Health status of {app_name} is: {app_health}") + + if False in (argocd_apps_status.values()): + err_msg = f"Some or all applications deployed on edge site are Degraded/Unhealthy: {argocd_apps_status}" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + logger.info("PASS: All applications deployed on edge site are healthy.") + + +def describe_pod(project, pod): + cmd_out = subprocess.run( + [oc, "describe", "pod", "-n", project, pod], capture_output=True + ) + if cmd_out.stdout: + return cmd_out.stdout.decode("utf-8") + else: + assert False, cmd_out.stderr + + +def get_log_output(project, pod, container): + cmd_out = subprocess.run( + [oc, "logs", "-n", project, pod, "-c", container], capture_output=True + ) + if cmd_out.stdout: + return cmd_out.stdout.decode("utf-8") + else: + assert False, cmd_out.stderr diff --git a/tests/interop/test_validate_hub_site_components.py b/tests/interop/test_validate_hub_site_components.py new file mode 100644 index 00000000..a59e43a7 --- /dev/null +++ b/tests/interop/test_validate_hub_site_components.py @@ -0,0 +1,278 @@ +import logging +import os +import subprocess + +import pytest +from ocp_resources.namespace import Namespace +from ocp_resources.pod import Pod +from ocp_resources.route import Route +from ocp_resources.storage_class import StorageClass +from openshift.dynamic.exceptions import NotFoundError + +from . import __loggername__ +from .crd import ArgoCD, ManagedCluster +from .edge_util import get_long_live_bearer_token, get_site_response + +logger = logging.getLogger(__loggername__) + +oc = os.environ["HOME"] + "/oc_client/oc" + +""" +Validate following retail components on hub site (central server): + +1) applications health (Applications deployed through argocd) +""" + + +@pytest.mark.test_validate_hub_site_components +def test_validate_hub_site_components(openshift_dyn_client): + logger.info("Checking Openshift version on hub site") + version_out = subprocess.run([oc, "version"], capture_output=True) + version_out = version_out.stdout.decode("utf-8") + logger.info(f"Openshift version:\n{version_out}") + + logger.info("Dump PVC and storageclass info") + pvcs_out = subprocess.run([oc, "get", "pvc", "-A"], capture_output=True) + pvcs_out = pvcs_out.stdout.decode("utf-8") + logger.info(f"PVCs:\n{pvcs_out}") + + for sc in StorageClass.get(dyn_client=openshift_dyn_client): + logger.info(sc.instance) + + +@pytest.mark.validate_hub_site_reachable +def test_validate_hub_site_reachable(kube_config, openshift_dyn_client): + logger.info("Check if hub site API end point is reachable") + hub_api_url = kube_config.host + if not hub_api_url: + err_msg = "Hub site url is missing in kubeconfig file" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + logger.info(f"HUB api url : {hub_api_url}") + + bearer_token = get_long_live_bearer_token(dyn_client=openshift_dyn_client) + if not bearer_token: + assert False, "Bearer token is missing for hub site" + + hub_api_response = get_site_response( + site_url=hub_api_url, bearer_token=bearer_token + ) + + if hub_api_response.status_code != 200: + err_msg = "Hub site is not reachable. Please check the deployment." + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + logger.info("PASS: Hub site is reachable") + + +@pytest.mark.check_pod_status_hub +def test_check_pod_status(openshift_dyn_client): + logger.info("Checking pod status") + + err_msg = [] + failed_pods = [] + missing_pods = [] + missing_projects = [] + projects = [ + "openshift-operators", + "open-cluster-management", + "open-cluster-management-hub", + "openshift-gitops", + "vault", + ] + + for project in projects: + # Check for missing project + try: + namespaces = Namespace.get(dyn_client=openshift_dyn_client, name=project) + next(namespaces) + except NotFoundError: + missing_projects.append(project) + continue + # Check for absence of pods in project + try: + pods = Pod.get(dyn_client=openshift_dyn_client, namespace=project) + next(pods) + except StopIteration: + missing_pods.append(project) + continue + + for project in projects: + pods = Pod.get(dyn_client=openshift_dyn_client, namespace=project) + logger.info(f"Checking pods in namespace '{project}'") + for pod in pods: + for container in pod.instance.status.containerStatuses: + logger.info( + f"{pod.instance.metadata.name} : {container.name} :" + f" {container.state}" + ) + if container.state.terminated: + if container.state.terminated.reason != "Completed": + logger.info( + f"Pod {pod.instance.metadata.name} in" + f" {pod.instance.metadata.namespace} namespace is" + " FAILED:" + ) + failed_pods.append(pod.instance.metadata.name) + logger.info(describe_pod(project, pod.instance.metadata.name)) + logger.info( + get_log_output( + project, + pod.instance.metadata.name, + container.name, + ) + ) + elif not container.state.running: + logger.info( + f"Pod {pod.instance.metadata.name} in" + f" {pod.instance.metadata.namespace} namespace is" + " FAILED:" + ) + failed_pods.append(pod.instance.metadata.name) + logger.info(describe_pod(project, pod.instance.metadata.name)) + logger.info( + get_log_output( + project, pod.instance.metadata.name, container.name + ) + ) + + if missing_projects: + err_msg.append(f"The following namespaces are missing: {missing_projects}") + + if missing_pods: + err_msg.append( + f"The following namespaces have no pods deployed: {missing_pods}" + ) + + if failed_pods: + err_msg.append(f"The following pods are failed: {failed_pods}") + + if err_msg: + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + logger.info("PASS: Pod status check succeeded.") + + +@pytest.mark.validate_acm_self_registration_managed_clusters +def test_validate_acm_self_registration_managed_clusters(openshift_dyn_client): + logger.info("Check ACM self registration for edge site") + site_name = ( + os.environ["EDGE_CLUSTER_PREFIX"] + + "-" + + os.environ["INFRA_PROVIDER"] + + "-" + + os.environ["MPTS_TEST_RUN_ID"] + ) + clusters = ManagedCluster.get(dyn_client=openshift_dyn_client, name=site_name) + cluster = next(clusters) + is_managed_cluster_joined, managed_cluster_status = cluster.self_registered + + logger.info(f"Cluster Managed : {is_managed_cluster_joined}") + logger.info(f"Managed Cluster Status : {managed_cluster_status}") + + if not is_managed_cluster_joined: + err_msg = f"{site_name} is not self registered" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + logger.info(f"PASS: {site_name} is self registered") + + +@pytest.mark.validate_argocd_reachable_hub_site +def test_validate_argocd_reachable_hub_site(openshift_dyn_client): + namespace = "openshift-gitops" + logger.info("Check if argocd route/url on hub site is reachable") + try: + for route in Route.get( + dyn_client=openshift_dyn_client, + namespace=namespace, + name="openshift-gitops-server", + ): + argocd_route_url = route.instance.spec.host + except StopIteration: + err_msg = "Argocd url/route is missing in open-cluster-management namespace" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + + final_argocd_url = f"{'http://'}{argocd_route_url}" + logger.info(f"ACM route/url : {final_argocd_url}") + + bearer_token = get_long_live_bearer_token( + dyn_client=openshift_dyn_client, + namespace=namespace, + sub_string="openshift-gitops-argocd-server-token", + ) + if not bearer_token: + err_msg = ( + "Bearer token is missing for argocd-server in openshift-gitops namespace" + ) + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + logger.debug(f"Argocd bearer token : {bearer_token}") + + argocd_route_response = get_site_response( + site_url=final_argocd_url, bearer_token=bearer_token + ) + + logger.info(f"Argocd route response : {argocd_route_response}") + + if argocd_route_response.status_code != 200: + err_msg = "Argocd is not reachable. Please check the deployment" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + else: + logger.info("PASS: Argocd is reachable") + + +@pytest.mark.validate_argocd_applications_health_hub_site +def test_validate_argocd_applications_health_hub_site(openshift_dyn_client): + unhealthy_apps = [] + logger.info("Get all applications deployed by argocd on hub site") + projects = ["openshift-gitops", "retail-hub"] + for project in projects: + for app in ArgoCD.get(dyn_client=openshift_dyn_client, namespace=project): + app_name = app.instance.metadata.name + app_health = app.instance.status.health.status + app_sync = app.instance.status.sync.status + + logger.info(f"Status for {app_name} : {app_health} : {app_sync}") + + if "Healthy" != app_health or "Synced" != app_sync: + logger.info(f"Dumping failed resources for app: {app_name}") + unhealthy_apps.append(app_name) + for res in app.instance.status.resources: + if ( + res.health and res.health.status != "Healthy" + ) or res.status != "Synced": + logger.info(f"\n{res}") + + if unhealthy_apps: + err_msg = "Some or all applications deployed on hub site are unhealthy" + logger.error(f"FAIL: {err_msg}:\n{unhealthy_apps}") + assert False, err_msg + else: + logger.info("PASS: All applications deployed on hub site are healthy.") + + +def describe_pod(project, pod): + cmd_out = subprocess.run( + [oc, "describe", "pod", "-n", project, pod], capture_output=True + ) + if cmd_out.stdout: + return cmd_out.stdout.decode("utf-8") + else: + assert False, cmd_out.stderr + + +def get_log_output(project, pod, container): + cmd_out = subprocess.run( + [oc, "logs", "-n", project, pod, "-c", container], capture_output=True + ) + if cmd_out.stdout: + return cmd_out.stdout.decode("utf-8") + else: + assert False, cmd_out.stderr diff --git a/tests/interop/test_validate_pipelineruns.py b/tests/interop/test_validate_pipelineruns.py new file mode 100644 index 00000000..bfa314c8 --- /dev/null +++ b/tests/interop/test_validate_pipelineruns.py @@ -0,0 +1,197 @@ +import logging +import os +import re +import subprocess +import time + +import pytest +from ocp_resources.pipeline import Pipeline +from ocp_resources.pipelineruns import PipelineRun +from ocp_resources.task_run import TaskRun + +from . import __loggername__ + +logger = logging.getLogger(__loggername__) + +oc = os.environ["HOME"] + "/oc_client/oc" + + +@pytest.mark.test_validate_pipelineruns +def test_validate_pipelineruns(openshift_dyn_client): + project = "quarkuscoffeeshop-cicd" + + expected_pipelines = [ + "build-and-push-quarkuscoffeeshop-barista", + "build-and-push-quarkuscoffeeshop-counter", + "build-and-push-quarkuscoffeeshop-customerloyalty", + "build-and-push-quarkuscoffeeshop-customermocker", + "build-and-push-quarkuscoffeeshop-inventory", + "build-and-push-quarkuscoffeeshop-kitchen", + "build-and-push-quarkuscoffeeshop-web", + ] + + expected_pipelineruns = [ + "quarkuscoffeeshop-barista", + "quarkuscoffeeshop-counter", + "quarkuscoffeeshop-customerloyalty", + "quarkuscoffeeshop-customermocker", + "quarkuscoffeeshop-inventory", + "quarkuscoffeeshop-kitchen", + "quarkuscoffeeshop-web", + ] + + found_pipelines = [] + found_pipelineruns = [] + passed_pipelineruns = [] + failed_pipelineruns = [] + + logger.info("Checking Openshift pipelines") + + # FAIL here if no pipelines are found + try: + pipelines = Pipeline.get(dyn_client=openshift_dyn_client, namespace=project) + next(pipelines) + except StopIteration: + err_msg = "No pipelines were found" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + + for pipeline in Pipeline.get(dyn_client=openshift_dyn_client, namespace=project): + for expected_pipeline in expected_pipelines: + match = expected_pipeline + "$" + if re.match(match, pipeline.instance.metadata.name): + if pipeline.instance.metadata.name not in found_pipelines: + logger.info(f"found pipeline: {pipeline.instance.metadata.name}") + found_pipelines.append(pipeline.instance.metadata.name) + break + + if len(expected_pipelines) == len(found_pipelines): + logger.info("Found all expected pipelines") + else: + err_msg = "Some or all pipelines are missing" + logger.error( + f"FAIL: {err_msg}:\nExpected: {expected_pipelines}\nFound: {found_pipelines}" + ) + assert False, err_msg + + logger.info("Checking Openshift pipeline runs") + timeout = time.time() + 3600 + + # FAIL here if no pipelineruns are found + try: + pipelineruns = PipelineRun.get( + dyn_client=openshift_dyn_client, namespace=project + ) + next(pipelineruns) + except StopIteration: + err_msg = "No pipeline runs were found" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + + while time.time() < timeout: + for pipelinerun in PipelineRun.get( + dyn_client=openshift_dyn_client, namespace=project + ): + for expected_pipelinerun in expected_pipelineruns: + if re.search(expected_pipelinerun, pipelinerun.instance.metadata.name): + if pipelinerun.instance.metadata.name not in found_pipelineruns: + logger.info( + f"found pipelinerun: {pipelinerun.instance.metadata.name}" + ) + found_pipelineruns.append(pipelinerun.instance.metadata.name) + break + + if len(expected_pipelineruns) == len(found_pipelineruns): + break + else: + time.sleep(60) + continue + + if len(expected_pipelineruns) == len(found_pipelineruns): + logger.info("Found all expected pipeline runs") + else: + err_msg = "Some pipeline runs are missing" + logger.error( + f"FAIL: {err_msg}:\nExpected: {expected_pipelineruns}\nFound: {found_pipelineruns}" + ) + assert False, err_msg + + logger.info("Checking Openshift pipeline run status") + timeout = time.time() + 3600 + + while time.time() < timeout: + for pipelinerun in PipelineRun.get( + dyn_client=openshift_dyn_client, namespace=project + ): + if pipelinerun.instance.status.conditions[0].reason == "Succeeded": + if pipelinerun.instance.metadata.name not in passed_pipelineruns: + logger.info( + f"Pipeline run succeeded: {pipelinerun.instance.metadata.name}" + ) + passed_pipelineruns.append(pipelinerun.instance.metadata.name) + elif pipelinerun.instance.status.conditions[0].reason == "Running": + logger.info( + f"Pipeline {pipelinerun.instance.metadata.name} is still running" + ) + else: + reason = pipelinerun.instance.status.conditions[0].reason + logger.info( + f"Pipeline run FAILED: {pipelinerun.instance.metadata.name} Reason: {reason}" + ) + if pipelinerun.instance.metadata.name not in failed_pipelineruns: + failed_pipelineruns.append(pipelinerun.instance.metadata.name) + + logger.info(f"Failed pipelineruns: {failed_pipelineruns}") + logger.info(f"Passed pipelineruns: {passed_pipelineruns}") + + if (len(failed_pipelineruns) + len(passed_pipelineruns)) == len( + expected_pipelines + ): + break + else: + time.sleep(60) + continue + + if ((len(failed_pipelineruns)) > 0) or ( + len(passed_pipelineruns) < len(expected_pipelineruns) + ): + logger.info("Checking Openshift task runs") + + # FAIL here if no task runs are found + try: + taskruns = TaskRun.get(dyn_client=openshift_dyn_client, namespace=project) + next(taskruns) + except StopIteration: + err_msg = "No task runs were found" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + + for taskrun in TaskRun.get(dyn_client=openshift_dyn_client, namespace=project): + if taskrun.instance.status.conditions[0].status == "False": + reason = taskrun.instance.status.conditions[0].reason + logger.info( + f"Task FAILED: {taskrun.instance.metadata.name} Reason: {reason}" + ) + + message = taskrun.instance.status.conditions[0].message + logger.info(f"message: {message}") + + try: + cmdstring = re.search("for logs run: kubectl(.*)$", message).group( + 1 + ) + cmd = str(oc + cmdstring) + logger.info(f"CMD: {cmd}") + cmd_out = subprocess.run(cmd, shell=True, capture_output=True) + + logger.info(cmd_out.stdout.decode("utf-8")) + logger.info(cmd_out.stderr.decode("utf-8")) + except AttributeError: + logger.error("No logs to collect") + + err_msg = "Some or all tasks have failed" + logger.error(f"FAIL: {err_msg}") + assert False, err_msg + + else: + logger.info("PASS: Pipeline verification succeeded.")