Skip to content

Commit

Permalink
add dapr components crawler to test-crawler (#205)
Browse files Browse the repository at this point in the history
* add components crawler

Signed-off-by: MregXN <[email protected]>

* fix catch missing of TestMain

Signed-off-by: MregXN <[email protected]>

* fix typo

Signed-off-by: MregXN <[email protected]>

---------

Signed-off-by: MregXN <[email protected]>
  • Loading branch information
MregXN authored Oct 13, 2023
1 parent 3af2ac7 commit a3251d4
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 37 deletions.
8 changes: 6 additions & 2 deletions .github/workflows/dapr-e2e-tests-crawler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ jobs:
runs-on: ubuntu-latest
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN}}
OUTPUT_TARGET: "log.txt"
steps:
- name: Checkout code
uses: actions/checkout@v3
Expand All @@ -33,9 +32,14 @@ jobs:
- name: Run Script
run: python3 test-crawler/__init__.py

- name: Compress logs
if: always()
run: |
tar -cvf test-crawler/result.tar test-crawler/tests.txt test-crawler/components.txt
- name: Upload results
if: always()
uses: actions/upload-artifact@master
with:
name: "result"
path: ${{ env.OUTPUT_TARGET }}
path: "test-crawler/result.tar"
17 changes: 3 additions & 14 deletions test-crawler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,13 @@
# Licensed under the MIT License.
# ------------------------------------------------------------

from global_settings import REPO, WORKFLOW_NAME, ACCESS_TOKEN, OUTPUT_TARGET
from global_settings import REPO, WORKFLOW_NAME, ACCESS_TOKEN
from workflow_scan import WorkFlowScaner


if __name__ == "__main__":
workflow_scaner = WorkFlowScaner(REPO, WORKFLOW_NAME, ACCESS_TOKEN)
print(
f"Dapr E2E Tests Crawler start. \nREPO : {REPO} WORKFLOW_NAME : {WORKFLOW_NAME}"
)
print(f"Dapr E2E Tests Crawler start. \nREPO : {REPO} WORKFLOW_NAME : {WORKFLOW_NAME}")
workflow_scaner.scan_workflow()

pass_rate_string = f"\nPass rate of {WORKFLOW_NAME} is " + "{:.2%}\n".format(
workflow_scaner.get_pass_rate()
)
print(pass_rate_string)

with open(OUTPUT_TARGET, "w") as file:
file.write(pass_rate_string + "\n")

print("\nFailure Workflow crawling start:")
print("Failure Workflow crawling start:")
workflow_scaner.list_failure_case()
71 changes: 71 additions & 0 deletions test-crawler/components_crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# ------------------------------------------------------------
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------------------------------

import base64
import yaml
import requests
from global_settings import GITHUB_API_PARAMETER


class ComponentsCrawler:
def __init__(self, repo, access_token):
self.repo = repo
self.access_token = access_token
self.headers = {
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
"Authorization": f"token {access_token}",
}
self.app_components_dict = {}

def scan_components(self):
print("\nstart to scan components.\n")
url = f"https://api.github.com/repos/{self.repo}/contents/tests/config/"
response = requests.get(url, headers=self.headers, params=GITHUB_API_PARAMETER)
content = response.json()
for file in content:
print("\nscanning " + file["name"] + "...")
try:
file_url = url + file["name"]
response = requests.get(
file_url, headers=self.headers, params=GITHUB_API_PARAMETER
)
content = response.json()["content"]
yaml_string = base64.b64decode(content).decode("utf-8")

yaml_split_string = yaml_string.split("---")
for s in yaml_split_string:
if len(s):
data = yaml.safe_load(s)

if (data is not None) and ("kind" in data):
if data["kind"] != "Component":
print("it is not about component, skip")
continue

components_name = data["spec"]["type"]
if "scopes" in data:
for name in data["scopes"]:
if name not in self.app_components_dict:
self.app_components_dict[name] = []
self.app_components_dict[name].append(
components_name
)
print("app " + name + " is added.")
else:
print(
"No scope is specified for component "
+ components_name
+ ", skip"
)
else:
print("it is not a k8s api yaml, skip.")
except:
print("Fail to parse " + file["name"] + ", skip.")

for key in self.app_components_dict:
self.app_components_dict[key] = set(self.app_components_dict[key])

return self.app_components_dict
76 changes: 63 additions & 13 deletions test-crawler/failure_log_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import xml.etree.ElementTree as ET
import zipfile
from io import BytesIO
from global_settings import OUTPUT_TARGET
from global_settings import TESTS_OUTPUT_TARGET, COMPONENTS_OUTPUT_TARGET


class TestCaseInfo:
Expand Down Expand Up @@ -50,14 +50,17 @@ def __init__(self, repo, access_token):
}
self.fail_testcase_dict = {}
self.fail_testcase_dict_sorted_list = []
self.workflow_with_no_artifact = []

def crawl(self, failure_id, workflow_len):
def crawl_failure_workflow(self, failure_id, workflow_len):
failure_id_len = len(failure_id)
for index, id in enumerate(failure_id):
print(
f"({index+1}/{failure_id_len}) crawling failure workflow... workflow id is "
f"({index+1}/{failure_id_len}) crawling failure workflow "
+ str(id)
+ "..."
)

url = (
f"https://api.github.com/repos/{self.repo}/actions/runs/{id}/artifacts"
)
Expand All @@ -67,12 +70,17 @@ def crawl(self, failure_id, workflow_len):
except:
print("JSON decode error occured when get artifacts.")
continue
for artifact in artifacts:
if (
artifact["name"] == "linux_amd64_e2e.json"
or artifact["name"] == "windows_amd64_e2e.json"
):
self.parse_artifact(artifact, id)

if len(artifacts) == 0:
print("workflow " + str(id) + " does not upload any artifacts, skip.")
self.workflow_with_no_artifact.append(id)
else:
for artifact in artifacts:
if (
artifact["name"] == "linux_amd64_e2e.json"
or artifact["name"] == "windows_amd64_e2e.json"
):
self.parse_artifact(artifact, id)

for v in self.fail_testcase_dict.values():
v.get_fail_rate(workflow_len)
Expand All @@ -96,11 +104,13 @@ def parse_artifact(self, artifact, id):
sys.stderr.write(f"Error occurred when parse {artifact_name}, skiped")
return
tree = ET.parse(extracted_file)

root = tree.getroot()

failures_count = 0
for testsuite in root:
failures = int(testsuite.attrib["failures"])
if failures:
failures_count += failures
fail_testcases = testsuite.findall("testcase")
for fail_testcase in fail_testcases[:failures]:
os = artifact["name"].split("_")[0]
Expand All @@ -115,9 +125,23 @@ def parse_artifact(self, artifact, id):
latest_url = f"https://github.com/{self.repo}/actions/runs/{id}"
self.fail_testcase_dict[name].set_latest_url(latest_url)

# failures occur in TestMain
if root.attrib["failures"] == "1" and failures_count== 0:
os = artifact["name"].split("_")[0]
name = "TestMain"

if name in self.fail_testcase_dict:
self.fail_testcase_dict[name].update_os(os)
self.fail_testcase_dict[name].increase_fail_times()
else:
testcase_info = TestCaseInfo(name, os)
self.fail_testcase_dict[name] = testcase_info

latest_url = f"https://github.com/{self.repo}/actions/runs/{id}"
self.fail_testcase_dict[name].set_latest_url(latest_url)

def list_failure_testcase(self):
print("\nFailed Test Cases:")
with open(OUTPUT_TARGET, "a") as file:
with open(TESTS_OUTPUT_TARGET, "a") as file:
for case in self.fail_testcase_dict_sorted_list:
fali_rate_string = (
"Fail Rate: "
Expand All @@ -133,5 +157,31 @@ def list_failure_testcase(self):
+ str(case.latest_url)
+ "\n"
)
print(fali_rate_string)
file.write(fali_rate_string + "\n")

if len(self.workflow_with_no_artifact):
file.write("\nWorkflows without any artifact: \n")
for idx, id in enumerate(self.workflow_with_no_artifact):
file.write(f"{idx}: https://github.com/{self.repo}/actions/runs/{id}\n")

print("tests result output completed")

def list_failure_components(self, components_tests_dict):
with open(COMPONENTS_OUTPUT_TARGET, "w") as file:
for component, tests in components_tests_dict.items():
file.write(component + ":\n")
for test in tests:
if test in self.fail_testcase_dict:
pass_rate = 1 - self.fail_testcase_dict[test].fail_rate
else:
pass_rate = 1
file.write(
"Pass Rate: "
+ "{:.2%}".format(float(pass_rate))
+ " "
+ "Test Case: "
+ test
+ "\n"
)
file.write("\n")
print("components result output completed.")
7 changes: 5 additions & 2 deletions test-crawler/global_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,8 @@
# Parameters brought when accessing github API
GITHUB_API_PARAMETER = {"per_page": "100"}

# Target to output logs
OUTPUT_TARGET = "log.txt"
# Target to output crawl result of tests
TESTS_OUTPUT_TARGET = "tests.txt"

# Target to output crawl result of components
COMPONENTS_OUTPUT_TARGET = "components.txt"
3 changes: 2 additions & 1 deletion test-crawler/requirements.pip
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
requests
requestsrequests
pyyaml
Loading

0 comments on commit a3251d4

Please sign in to comment.