add dapr components crawler to test-crawler (#205)

* add components crawler Signed-off-by: MregXN <[email protected]> * fix catch missing of TestMain Signed-off-by: MregXN <[email protected]> * fix typo Signed-off-by: MregXN <[email protected]> --------- Signed-off-by: MregXN <[email protected]>
dapr · Oct 13, 2023 · a3251d4 · a3251d4
1 parent 3af2ac7
commit a3251d4
Show file tree

Hide file tree

Showing 7 changed files with 240 additions and 37 deletions.
diff --git a/.github/workflows/dapr-e2e-tests-crawler.yml b/.github/workflows/dapr-e2e-tests-crawler.yml
@@ -17,7 +17,6 @@ jobs:
     runs-on: ubuntu-latest  
     env:
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN}}
-      OUTPUT_TARGET: "log.txt"
     steps:  
     - name: Checkout code  
       uses: actions/checkout@v3
@@ -33,9 +32,14 @@ jobs:
     - name: Run Script
       run: python3 test-crawler/__init__.py
 
+    - name: Compress logs
+      if: always()
+      run: |
+        tar -cvf test-crawler/result.tar test-crawler/tests.txt test-crawler/components.txt
+
     - name: Upload results
       if: always()
       uses: actions/upload-artifact@master
       with:
         name: "result" 
-        path: ${{ env.OUTPUT_TARGET }}
+        path: "test-crawler/result.tar"
diff --git a/test-crawler/__init__.py b/test-crawler/__init__.py
@@ -3,24 +3,13 @@
 # Licensed under the MIT License.
 # ------------------------------------------------------------
 
-from global_settings import REPO, WORKFLOW_NAME, ACCESS_TOKEN, OUTPUT_TARGET
+from global_settings import REPO, WORKFLOW_NAME, ACCESS_TOKEN
 from workflow_scan import WorkFlowScaner
 
-
 if __name__ == "__main__":
     workflow_scaner = WorkFlowScaner(REPO, WORKFLOW_NAME, ACCESS_TOKEN)
-    print(
-        f"Dapr E2E Tests Crawler start. \nREPO : {REPO}  WORKFLOW_NAME : {WORKFLOW_NAME}"
-    )
+    print(f"Dapr E2E Tests Crawler start. \nREPO : {REPO}  WORKFLOW_NAME : {WORKFLOW_NAME}")
     workflow_scaner.scan_workflow()
 
-    pass_rate_string = f"\nPass rate of {WORKFLOW_NAME} is " + "{:.2%}\n".format(
-        workflow_scaner.get_pass_rate()
-    )
-    print(pass_rate_string)
-
-    with open(OUTPUT_TARGET, "w") as file:
-        file.write(pass_rate_string + "\n")
-
-    print("\nFailure Workflow crawling start:")
+    print("Failure Workflow crawling start:")
     workflow_scaner.list_failure_case()
diff --git a/test-crawler/components_crawler.py b/test-crawler/components_crawler.py
@@ -0,0 +1,71 @@
+# ------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------------------------------
+
+import base64
+import yaml
+import requests
+from global_settings import GITHUB_API_PARAMETER
+
+
+class ComponentsCrawler:
+    def __init__(self, repo, access_token):
+        self.repo = repo
+        self.access_token = access_token
+        self.headers = {
+            "Accept": "application/vnd.github+json",
+            "X-GitHub-Api-Version": "2022-11-28",
+            "Authorization": f"token {access_token}",
+        }
+        self.app_components_dict = {}
+
+    def scan_components(self):
+        print("\nstart to scan components.\n")
+        url = f"https://api.github.com/repos/{self.repo}/contents/tests/config/"
+        response = requests.get(url, headers=self.headers, params=GITHUB_API_PARAMETER)
+        content = response.json()
+        for file in content:
+            print("\nscanning " + file["name"] + "...")
+            try:
+                file_url = url + file["name"]
+                response = requests.get(
+                    file_url, headers=self.headers, params=GITHUB_API_PARAMETER
+                )
+                content = response.json()["content"]
+                yaml_string = base64.b64decode(content).decode("utf-8")
+
+                yaml_split_string = yaml_string.split("---")
+                for s in yaml_split_string:
+                    if len(s):
+                        data = yaml.safe_load(s)
+
+                        if (data is not None) and ("kind" in data):
+                            if data["kind"] != "Component":
+                                print("it is not about component, skip")
+                                continue
+
+                            components_name = data["spec"]["type"]
+                            if "scopes" in data:
+                                for name in data["scopes"]:
+                                    if name not in self.app_components_dict:
+                                        self.app_components_dict[name] = []
+                                    self.app_components_dict[name].append(
+                                        components_name
+                                    )
+                                    print("app " + name + " is added.")
+                            else:
+                                print(
+                                    "No scope is specified for component "
+                                    + components_name
+                                    + ", skip"
+                                )
+                        else:
+                            print("it is not a k8s api yaml, skip.")
+            except:
+                print("Fail to parse " + file["name"] + ", skip.")
+
+        for key in self.app_components_dict:
+            self.app_components_dict[key] = set(self.app_components_dict[key])
+
+        return self.app_components_dict
diff --git a/test-crawler/failure_log_crawler.py b/test-crawler/failure_log_crawler.py
@@ -9,7 +9,7 @@
 import xml.etree.ElementTree as ET
 import zipfile
 from io import BytesIO
-from global_settings import OUTPUT_TARGET
+from global_settings import TESTS_OUTPUT_TARGET, COMPONENTS_OUTPUT_TARGET
 
 
 class TestCaseInfo:
@@ -50,14 +50,17 @@ def __init__(self, repo, access_token):
         }
         self.fail_testcase_dict = {}
         self.fail_testcase_dict_sorted_list = []
+        self.workflow_with_no_artifact = []
 
-    def crawl(self, failure_id, workflow_len):
+    def crawl_failure_workflow(self, failure_id, workflow_len):
         failure_id_len = len(failure_id)
         for index, id in enumerate(failure_id):
             print(
-                f"({index+1}/{failure_id_len}) crawling failure workflow... workflow id is "
+                f"({index+1}/{failure_id_len}) crawling failure workflow "
                 + str(id)
+                + "..."
             )
+
             url = (
                 f"https://api.github.com/repos/{self.repo}/actions/runs/{id}/artifacts"
             )
@@ -67,12 +70,17 @@ def crawl(self, failure_id, workflow_len):
             except:
                 print("JSON decode error occured when get artifacts.")
                 continue
-            for artifact in artifacts:
-                if (
-                    artifact["name"] == "linux_amd64_e2e.json"
-                    or artifact["name"] == "windows_amd64_e2e.json"
-                ):
-                    self.parse_artifact(artifact, id)
+
+            if len(artifacts) == 0:
+                print("workflow " + str(id) + " does not upload any artifacts, skip.")
+                self.workflow_with_no_artifact.append(id)
+            else:
+                for artifact in artifacts:
+                    if (
+                        artifact["name"] == "linux_amd64_e2e.json"
+                        or artifact["name"] == "windows_amd64_e2e.json"
+                    ):
+                        self.parse_artifact(artifact, id)
 
         for v in self.fail_testcase_dict.values():
             v.get_fail_rate(workflow_len)
@@ -96,11 +104,13 @@ def parse_artifact(self, artifact, id):
             sys.stderr.write(f"Error occurred when parse {artifact_name}, skiped")
             return
         tree = ET.parse(extracted_file)
-
         root = tree.getroot()
+
+        failures_count = 0
         for testsuite in root:
             failures = int(testsuite.attrib["failures"])
             if failures:
+                failures_count += failures
                 fail_testcases = testsuite.findall("testcase")
                 for fail_testcase in fail_testcases[:failures]:
                     os = artifact["name"].split("_")[0]
@@ -115,9 +125,23 @@ def parse_artifact(self, artifact, id):
                         latest_url = f"https://github.com/{self.repo}/actions/runs/{id}"
                         self.fail_testcase_dict[name].set_latest_url(latest_url)
 
+        # failures occur in TestMain
+        if root.attrib["failures"] == "1" and failures_count== 0:
+            os = artifact["name"].split("_")[0]
+            name = "TestMain"
+
+            if name in self.fail_testcase_dict:
+                self.fail_testcase_dict[name].update_os(os)
+                self.fail_testcase_dict[name].increase_fail_times()
+            else:
+                testcase_info = TestCaseInfo(name, os)
+                self.fail_testcase_dict[name] = testcase_info
+
+                latest_url = f"https://github.com/{self.repo}/actions/runs/{id}"
+                self.fail_testcase_dict[name].set_latest_url(latest_url)
+
     def list_failure_testcase(self):
-        print("\nFailed Test Cases:")
-        with open(OUTPUT_TARGET, "a") as file:
+        with open(TESTS_OUTPUT_TARGET, "a") as file:
             for case in self.fail_testcase_dict_sorted_list:
                 fali_rate_string = (
                     "Fail Rate: "
@@ -133,5 +157,31 @@ def list_failure_testcase(self):
                     + str(case.latest_url)
                     + "\n"
                 )
-                print(fali_rate_string)
                 file.write(fali_rate_string + "\n")
+
+            if len(self.workflow_with_no_artifact):
+                file.write("\nWorkflows without any artifact: \n")
+                for idx, id in enumerate(self.workflow_with_no_artifact):
+                    file.write(f"{idx}: https://github.com/{self.repo}/actions/runs/{id}\n")
+
+        print("tests result output completed")
+
+    def list_failure_components(self, components_tests_dict):
+        with open(COMPONENTS_OUTPUT_TARGET, "w") as file:
+            for component, tests in components_tests_dict.items():
+                file.write(component + ":\n")
+                for test in tests:
+                    if test in self.fail_testcase_dict:
+                        pass_rate = 1 - self.fail_testcase_dict[test].fail_rate
+                    else:
+                        pass_rate = 1
+                    file.write(
+                        "Pass Rate: "
+                        + "{:.2%}".format(float(pass_rate))
+                        + "     "
+                        + "Test Case: "
+                        + test
+                        + "\n"
+                    )
+                file.write("\n")
+        print("components result output completed.")
diff --git a/test-crawler/global_settings.py b/test-crawler/global_settings.py
@@ -18,5 +18,8 @@
 # Parameters brought when accessing github API
 GITHUB_API_PARAMETER = {"per_page": "100"}
 
-# Target to output logs
-OUTPUT_TARGET = "log.txt"
+# Target to output crawl result of tests
+TESTS_OUTPUT_TARGET = "tests.txt"
+
+# Target to output crawl result of components
+COMPONENTS_OUTPUT_TARGET = "components.txt"
diff --git a/test-crawler/requirements.pip b/test-crawler/requirements.pip
@@ -1 +1,2 @@
-requests
+requestsrequests
+pyyaml